This commit is contained in:
@@ -0,0 +1,182 @@
|
||||
"""
|
||||
VQWEN v3 Shared-Contract Backtest
|
||||
=================================
|
||||
|
||||
Evaluates the retrained VQWEN models on the temporal validation slice using
|
||||
the exact same pre-match feature contract as training/runtime.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import pickle
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import psycopg2
|
||||
from dotenv import load_dotenv
|
||||
|
||||
AI_DIR = Path(__file__).resolve().parent
|
||||
ENGINE_DIR = AI_DIR.parent
|
||||
REPO_DIR = ENGINE_DIR.parent
|
||||
MODELS_DIR = ENGINE_DIR / "models" / "vqwen"
|
||||
|
||||
if str(ENGINE_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(ENGINE_DIR))
|
||||
|
||||
from features.vqwen_contract import FEATURE_COLUMNS # noqa: E402
|
||||
from train_vqwen_v3 import ( # noqa: E402
|
||||
_enrich_pre_match_context,
|
||||
_fetch_dataframe,
|
||||
_prepare_features,
|
||||
_temporal_split,
|
||||
load_top_league_ids,
|
||||
)
|
||||
|
||||
|
||||
def _load_env() -> None:
|
||||
load_dotenv(REPO_DIR / ".env", override=False)
|
||||
load_dotenv(ENGINE_DIR / ".env", override=False)
|
||||
|
||||
|
||||
def get_clean_dsn() -> str:
|
||||
_load_env()
|
||||
raw = os.getenv("DATABASE_URL", "").strip().strip('"').strip("'")
|
||||
if not raw:
|
||||
raise RuntimeError("DATABASE_URL is missing.")
|
||||
return raw.split("?", 1)[0]
|
||||
|
||||
|
||||
def _accuracy(y_true: np.ndarray, y_pred: np.ndarray) -> float:
|
||||
if len(y_true) == 0:
|
||||
return 0.0
|
||||
return float((y_true == y_pred).mean())
|
||||
|
||||
|
||||
def _binary_metrics(prob: np.ndarray, y_true: np.ndarray) -> tuple[float, float]:
|
||||
pred = (prob >= 0.5).astype(int)
|
||||
acc = _accuracy(y_true, pred)
|
||||
brier = float(np.mean((prob - y_true) ** 2)) if len(y_true) else 1.0
|
||||
return acc, brier
|
||||
|
||||
|
||||
def _multiclass_brier(prob: np.ndarray, y_true: np.ndarray, n_classes: int = 3) -> float:
|
||||
if len(y_true) == 0:
|
||||
return 1.0
|
||||
target = np.zeros((len(y_true), n_classes), dtype=np.float64)
|
||||
target[np.arange(len(y_true)), y_true.astype(int)] = 1.0
|
||||
return float(np.mean(np.sum((prob - target) ** 2, axis=1)))
|
||||
|
||||
|
||||
def _band_label(probability: float) -> str:
|
||||
if probability >= 0.70:
|
||||
return "HIGH"
|
||||
if probability >= 0.60:
|
||||
return "MEDIUM"
|
||||
if probability >= 0.50:
|
||||
return "LOW"
|
||||
return "NO_BET"
|
||||
|
||||
|
||||
def _summarize_bands(
|
||||
name: str,
|
||||
confidence: np.ndarray,
|
||||
is_correct: np.ndarray,
|
||||
) -> list[str]:
|
||||
lines: list[str] = []
|
||||
for band in ("HIGH", "MEDIUM", "LOW"):
|
||||
mask = np.array([_band_label(float(p)) == band for p in confidence], dtype=bool)
|
||||
count = int(mask.sum())
|
||||
accuracy = float(is_correct[mask].mean()) if count else 0.0
|
||||
avg_conf = float(confidence[mask].mean()) if count else 0.0
|
||||
lines.append(
|
||||
f"{name} {band:<6} count={count:<4} accuracy={accuracy*100:5.1f}% avg_conf={avg_conf*100:5.1f}%"
|
||||
)
|
||||
return lines
|
||||
|
||||
|
||||
def run_v3_backtest() -> None:
|
||||
print("VQWEN v3 SHARED-CONTRACT BACKTEST")
|
||||
print("=" * 60)
|
||||
|
||||
league_ids = load_top_league_ids()
|
||||
dsn = get_clean_dsn()
|
||||
|
||||
with psycopg2.connect(dsn) as conn:
|
||||
with conn.cursor() as cur:
|
||||
df = _fetch_dataframe(cur, league_ids)
|
||||
df = _enrich_pre_match_context(cur, df)
|
||||
df = _prepare_features(df)
|
||||
|
||||
train_df, valid_df = _temporal_split(df)
|
||||
print(f"Toplam ornek: {len(df)} | Train: {len(train_df)} | Valid: {len(valid_df)}")
|
||||
|
||||
with (MODELS_DIR / "vqwen_ms.pkl").open("rb") as handle:
|
||||
model_ms = pickle.load(handle)
|
||||
with (MODELS_DIR / "vqwen_ou25.pkl").open("rb") as handle:
|
||||
model_ou25 = pickle.load(handle)
|
||||
with (MODELS_DIR / "vqwen_btts.pkl").open("rb") as handle:
|
||||
model_btts = pickle.load(handle)
|
||||
|
||||
X_valid = valid_df[FEATURE_COLUMNS]
|
||||
y_ms = valid_df["t_ms"].to_numpy(dtype=np.int64)
|
||||
y_ou25 = valid_df["t_ou"].to_numpy(dtype=np.int64)
|
||||
y_btts = valid_df["t_btts"].to_numpy(dtype=np.int64)
|
||||
|
||||
ms_prob = np.asarray(model_ms.predict(X_valid), dtype=np.float64)
|
||||
ou25_prob = np.asarray(model_ou25.predict(X_valid), dtype=np.float64).reshape(-1)
|
||||
btts_prob = np.asarray(model_btts.predict(X_valid), dtype=np.float64).reshape(-1)
|
||||
|
||||
ms_pred = np.argmax(ms_prob, axis=1)
|
||||
ms_conf = np.max(ms_prob, axis=1)
|
||||
ms_correct = (ms_pred == y_ms).astype(np.int64)
|
||||
|
||||
ou25_pred = (ou25_prob >= 0.5).astype(np.int64)
|
||||
ou25_conf = np.where(ou25_prob >= 0.5, ou25_prob, 1.0 - ou25_prob)
|
||||
ou25_correct = (ou25_pred == y_ou25).astype(np.int64)
|
||||
|
||||
btts_pred = (btts_prob >= 0.5).astype(np.int64)
|
||||
btts_conf = np.where(btts_prob >= 0.5, btts_prob, 1.0 - btts_prob)
|
||||
btts_correct = (btts_pred == y_btts).astype(np.int64)
|
||||
|
||||
ms_acc = _accuracy(y_ms, ms_pred)
|
||||
ou25_acc, ou25_brier = _binary_metrics(ou25_prob, y_ou25)
|
||||
btts_acc, btts_brier = _binary_metrics(btts_prob, y_btts)
|
||||
ms_brier = _multiclass_brier(ms_prob, y_ms)
|
||||
|
||||
print("\nGenel metrikler")
|
||||
print(f"MS accuracy : {ms_acc*100:.2f}% | multiclass_brier={ms_brier:.4f}")
|
||||
print(f"OU25 accuracy : {ou25_acc*100:.2f}% | brier={ou25_brier:.4f}")
|
||||
print(f"BTTS accuracy : {btts_acc*100:.2f}% | brier={btts_brier:.4f}")
|
||||
|
||||
print("\nConfidence band")
|
||||
for line in _summarize_bands("MS", ms_conf, ms_correct):
|
||||
print(line)
|
||||
for line in _summarize_bands("OU25", ou25_conf, ou25_correct):
|
||||
print(line)
|
||||
for line in _summarize_bands("BTTS", btts_conf, btts_correct):
|
||||
print(line)
|
||||
|
||||
summary = {
|
||||
"validation_samples": int(len(valid_df)),
|
||||
"metrics": {
|
||||
"ms_accuracy": round(ms_acc, 4),
|
||||
"ms_brier": round(ms_brier, 4),
|
||||
"ou25_accuracy": round(ou25_acc, 4),
|
||||
"ou25_brier": round(ou25_brier, 4),
|
||||
"btts_accuracy": round(btts_acc, 4),
|
||||
"btts_brier": round(btts_brier, 4),
|
||||
},
|
||||
}
|
||||
(MODELS_DIR / "vqwen_backtest_v3_summary.json").write_text(
|
||||
json.dumps(summary, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
print("\nKaydedildi: vqwen_backtest_v3_summary.json")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_v3_backtest()
|
||||
Reference in New Issue
Block a user