first (part 2: other directories)

2026-04-16 15:11:25 +03:00
parent 7814e0bc6b
commit 2f0b85a0c7
203 changed files with 59989 additions and 0 deletions
@@ -0,0 +1,182 @@
+"""
+VQWEN v3 Shared-Contract Backtest
+=================================
+
+Evaluates the retrained VQWEN models on the temporal validation slice using
+the exact same pre-match feature contract as training/runtime.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import pickle
+import sys
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import psycopg2
+from dotenv import load_dotenv
+
+AI_DIR = Path(__file__).resolve().parent
+ENGINE_DIR = AI_DIR.parent
+REPO_DIR = ENGINE_DIR.parent
+MODELS_DIR = ENGINE_DIR / "models" / "vqwen"
+
+if str(ENGINE_DIR) not in sys.path:
+    sys.path.insert(0, str(ENGINE_DIR))
+
+from features.vqwen_contract import FEATURE_COLUMNS  # noqa: E402
+from train_vqwen_v3 import (  # noqa: E402
+    _enrich_pre_match_context,
+    _fetch_dataframe,
+    _prepare_features,
+    _temporal_split,
+    load_top_league_ids,
+)
+
+
+def _load_env() -> None:
+    load_dotenv(REPO_DIR / ".env", override=False)
+    load_dotenv(ENGINE_DIR / ".env", override=False)
+
+
+def get_clean_dsn() -> str:
+    _load_env()
+    raw = os.getenv("DATABASE_URL", "").strip().strip('"').strip("'")
+    if not raw:
+        raise RuntimeError("DATABASE_URL is missing.")
+    return raw.split("?", 1)[0]
+
+
+def _accuracy(y_true: np.ndarray, y_pred: np.ndarray) -> float:
+    if len(y_true) == 0:
+        return 0.0
+    return float((y_true == y_pred).mean())
+
+
+def _binary_metrics(prob: np.ndarray, y_true: np.ndarray) -> tuple[float, float]:
+    pred = (prob >= 0.5).astype(int)
+    acc = _accuracy(y_true, pred)
+    brier = float(np.mean((prob - y_true) ** 2)) if len(y_true) else 1.0
+    return acc, brier
+
+
+def _multiclass_brier(prob: np.ndarray, y_true: np.ndarray, n_classes: int = 3) -> float:
+    if len(y_true) == 0:
+        return 1.0
+    target = np.zeros((len(y_true), n_classes), dtype=np.float64)
+    target[np.arange(len(y_true)), y_true.astype(int)] = 1.0
+    return float(np.mean(np.sum((prob - target) ** 2, axis=1)))
+
+
+def _band_label(probability: float) -> str:
+    if probability >= 0.70:
+        return "HIGH"
+    if probability >= 0.60:
+        return "MEDIUM"
+    if probability >= 0.50:
+        return "LOW"
+    return "NO_BET"
+
+
+def _summarize_bands(
+    name: str,
+    confidence: np.ndarray,
+    is_correct: np.ndarray,
+) -> list[str]:
+    lines: list[str] = []
+    for band in ("HIGH", "MEDIUM", "LOW"):
+        mask = np.array([_band_label(float(p)) == band for p in confidence], dtype=bool)
+        count = int(mask.sum())
+        accuracy = float(is_correct[mask].mean()) if count else 0.0
+        avg_conf = float(confidence[mask].mean()) if count else 0.0
+        lines.append(
+            f"{name} {band:<6} count={count:<4} accuracy={accuracy*100:5.1f}% avg_conf={avg_conf*100:5.1f}%"
+        )
+    return lines
+
+
+def run_v3_backtest() -> None:
+    print("VQWEN v3 SHARED-CONTRACT BACKTEST")
+    print("=" * 60)
+
+    league_ids = load_top_league_ids()
+    dsn = get_clean_dsn()
+
+    with psycopg2.connect(dsn) as conn:
+        with conn.cursor() as cur:
+            df = _fetch_dataframe(cur, league_ids)
+            df = _enrich_pre_match_context(cur, df)
+            df = _prepare_features(df)
+
+    train_df, valid_df = _temporal_split(df)
+    print(f"Toplam ornek: {len(df)} | Train: {len(train_df)} | Valid: {len(valid_df)}")
+
+    with (MODELS_DIR / "vqwen_ms.pkl").open("rb") as handle:
+        model_ms = pickle.load(handle)
+    with (MODELS_DIR / "vqwen_ou25.pkl").open("rb") as handle:
+        model_ou25 = pickle.load(handle)
+    with (MODELS_DIR / "vqwen_btts.pkl").open("rb") as handle:
+        model_btts = pickle.load(handle)
+
+    X_valid = valid_df[FEATURE_COLUMNS]
+    y_ms = valid_df["t_ms"].to_numpy(dtype=np.int64)
+    y_ou25 = valid_df["t_ou"].to_numpy(dtype=np.int64)
+    y_btts = valid_df["t_btts"].to_numpy(dtype=np.int64)
+
+    ms_prob = np.asarray(model_ms.predict(X_valid), dtype=np.float64)
+    ou25_prob = np.asarray(model_ou25.predict(X_valid), dtype=np.float64).reshape(-1)
+    btts_prob = np.asarray(model_btts.predict(X_valid), dtype=np.float64).reshape(-1)
+
+    ms_pred = np.argmax(ms_prob, axis=1)
+    ms_conf = np.max(ms_prob, axis=1)
+    ms_correct = (ms_pred == y_ms).astype(np.int64)
+
+    ou25_pred = (ou25_prob >= 0.5).astype(np.int64)
+    ou25_conf = np.where(ou25_prob >= 0.5, ou25_prob, 1.0 - ou25_prob)
+    ou25_correct = (ou25_pred == y_ou25).astype(np.int64)
+
+    btts_pred = (btts_prob >= 0.5).astype(np.int64)
+    btts_conf = np.where(btts_prob >= 0.5, btts_prob, 1.0 - btts_prob)
+    btts_correct = (btts_pred == y_btts).astype(np.int64)
+
+    ms_acc = _accuracy(y_ms, ms_pred)
+    ou25_acc, ou25_brier = _binary_metrics(ou25_prob, y_ou25)
+    btts_acc, btts_brier = _binary_metrics(btts_prob, y_btts)
+    ms_brier = _multiclass_brier(ms_prob, y_ms)
+
+    print("\nGenel metrikler")
+    print(f"MS accuracy   : {ms_acc*100:.2f}% | multiclass_brier={ms_brier:.4f}")
+    print(f"OU25 accuracy : {ou25_acc*100:.2f}% | brier={ou25_brier:.4f}")
+    print(f"BTTS accuracy : {btts_acc*100:.2f}% | brier={btts_brier:.4f}")
+
+    print("\nConfidence band")
+    for line in _summarize_bands("MS", ms_conf, ms_correct):
+        print(line)
+    for line in _summarize_bands("OU25", ou25_conf, ou25_correct):
+        print(line)
+    for line in _summarize_bands("BTTS", btts_conf, btts_correct):
+        print(line)
+
+    summary = {
+        "validation_samples": int(len(valid_df)),
+        "metrics": {
+            "ms_accuracy": round(ms_acc, 4),
+            "ms_brier": round(ms_brier, 4),
+            "ou25_accuracy": round(ou25_acc, 4),
+            "ou25_brier": round(ou25_brier, 4),
+            "btts_accuracy": round(btts_acc, 4),
+            "btts_brier": round(btts_brier, 4),
+        },
+    }
+    (MODELS_DIR / "vqwen_backtest_v3_summary.json").write_text(
+        json.dumps(summary, indent=2),
+        encoding="utf-8",
+    )
+    print("\nKaydedildi: vqwen_backtest_v3_summary.json")
+
+
+if __name__ == "__main__":
+    run_v3_backtest()