@@ -0,0 +1,806 @@
|
||||
"""
|
||||
V25 Backtest + Calibration Training Script
|
||||
==========================================
|
||||
Runs a full backtest on historical football matches, measures model accuracy
|
||||
by market / confidence band / league, and trains isotonic calibration models
|
||||
for MS, OU15, OU25, and BTTS markets.
|
||||
|
||||
Usage:
|
||||
venv/bin/python scripts/run_backtest_and_calibrate.py
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import pickle
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional, Tuple, Any
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Path setup — works whether executed from ai-engine/ or project root
|
||||
# ---------------------------------------------------------------------------
|
||||
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
AI_ENGINE_DIR = os.path.dirname(SCRIPT_DIR)
|
||||
sys.path.insert(0, AI_ENGINE_DIR)
|
||||
|
||||
from data.db import get_clean_dsn
|
||||
from models.calibration import Calibrator
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
QUALIFIED_LEAGUES_PATH = os.path.join(AI_ENGINE_DIR, "..", "qualified_leagues.json")
|
||||
CALIBRATION_DIR = os.path.join(AI_ENGINE_DIR, "models", "calibration")
|
||||
REPORTS_DIR = os.path.join(AI_ENGINE_DIR, "reports")
|
||||
MAX_MATCHES = 3000 # target upper bound
|
||||
PROGRESS_INTERVAL = 100 # print every N matches
|
||||
|
||||
os.makedirs(CALIBRATION_DIR, exist_ok=True)
|
||||
os.makedirs(REPORTS_DIR, exist_ok=True)
|
||||
|
||||
# Mapping: Turkish category name -> internal feature key
|
||||
ODDS_CATEGORY_MAP = {
|
||||
"Maç Sonucu": {
|
||||
"1": "odds_ms_h",
|
||||
"X": "odds_ms_d",
|
||||
"2": "odds_ms_a",
|
||||
},
|
||||
"1,5 Alt/Üst": {
|
||||
"Üst": "odds_ou15_o",
|
||||
"Alt": "odds_ou15_u",
|
||||
},
|
||||
"2,5 Alt/Üst": {
|
||||
"Üst": "odds_ou25_o",
|
||||
"Alt": "odds_ou25_u",
|
||||
},
|
||||
"3,5 Alt/Üst": {
|
||||
"Üst": "odds_ou35_o",
|
||||
"Alt": "odds_ou35_u",
|
||||
},
|
||||
"0,5 Alt/Üst": {
|
||||
"Üst": "odds_ou05_o",
|
||||
"Alt": "odds_ou05_u",
|
||||
},
|
||||
"Karşılıklı Gol": {
|
||||
"Var": "odds_btts_y",
|
||||
"Yok": "odds_btts_n",
|
||||
},
|
||||
"1. Yarı Sonucu": {
|
||||
"1": "odds_ht_ms_h",
|
||||
"X": "odds_ht_ms_d",
|
||||
"2": "odds_ht_ms_a",
|
||||
},
|
||||
"1. Yarı 0,5 Alt/Üst": {
|
||||
"Üst": "odds_ht_ou05_o",
|
||||
"Alt": "odds_ht_ou05_u",
|
||||
},
|
||||
"1. Yarı 1,5 Alt/Üst": {
|
||||
"Üst": "odds_ht_ou15_o",
|
||||
"Alt": "odds_ht_ou15_u",
|
||||
},
|
||||
}
|
||||
|
||||
# Top 5 leagues by name for individual breakdown (will be matched by league_id)
|
||||
TOP5_LEAGUE_NAMES = {
|
||||
"Premier League",
|
||||
"La Liga",
|
||||
"Bundesliga",
|
||||
"Serie A",
|
||||
"Ligue 1",
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# STEP 1 — Load qualified league IDs
|
||||
# ============================================================================
|
||||
|
||||
def load_qualified_leagues() -> List[str]:
|
||||
path = os.path.abspath(QUALIFIED_LEAGUES_PATH)
|
||||
with open(path, "r") as f:
|
||||
leagues = json.load(f)
|
||||
print(f"[Step 1] Loaded {len(leagues)} qualified league IDs.")
|
||||
return [str(lid) for lid in leagues]
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# STEP 1b — Fetch matches + pre-computed features in batch
|
||||
# ============================================================================
|
||||
|
||||
def fetch_matches(conn, league_ids: List[str]) -> pd.DataFrame:
|
||||
"""
|
||||
Single batch query: matches + football_ai_features + league name.
|
||||
Only returns matches that also have odds data (inner join on odd_categories).
|
||||
Returns a DataFrame with one row per match.
|
||||
"""
|
||||
print("[Step 1b] Fetching matches with pre-computed features and odds ...")
|
||||
cur = conn.cursor(cursor_factory=RealDictCursor)
|
||||
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT
|
||||
m.id AS match_id,
|
||||
m.league_id,
|
||||
l.name AS league_name,
|
||||
m.score_home,
|
||||
m.score_away,
|
||||
m.mst_utc,
|
||||
-- From football_ai_features
|
||||
f.home_elo AS home_overall_elo,
|
||||
f.away_elo AS away_overall_elo,
|
||||
f.elo_diff,
|
||||
f.home_home_elo,
|
||||
f.away_away_elo,
|
||||
f.home_form_elo,
|
||||
f.away_form_elo,
|
||||
f.home_goals_avg_5 AS home_goals_avg,
|
||||
f.away_goals_avg_5 AS away_goals_avg,
|
||||
f.home_conceded_avg_5 AS home_conceded_avg,
|
||||
f.away_conceded_avg_5 AS away_conceded_avg,
|
||||
f.home_clean_sheet_rate,
|
||||
f.away_clean_sheet_rate,
|
||||
f.home_scoring_rate,
|
||||
f.away_scoring_rate,
|
||||
f.home_win_streak AS home_winning_streak,
|
||||
f.away_win_streak AS away_winning_streak,
|
||||
f.home_avg_possession,
|
||||
f.away_avg_possession,
|
||||
f.home_avg_shots_on_target,
|
||||
f.away_avg_shots_on_target,
|
||||
f.home_shot_conversion,
|
||||
f.away_shot_conversion,
|
||||
f.home_avg_corners,
|
||||
f.away_avg_corners,
|
||||
f.h2h_total AS h2h_total_matches,
|
||||
f.h2h_home_win_rate,
|
||||
f.h2h_avg_goals,
|
||||
f.h2h_over25_rate,
|
||||
f.h2h_btts_rate,
|
||||
f.league_avg_goals,
|
||||
f.league_home_win_pct AS league_home_win_rate,
|
||||
f.league_over25_pct AS league_ou25_rate,
|
||||
f.referee_avg_cards AS referee_cards_total,
|
||||
f.referee_home_bias,
|
||||
f.referee_avg_goals,
|
||||
f.missing_players_impact AS home_missing_impact,
|
||||
f.implied_home,
|
||||
f.implied_draw,
|
||||
f.implied_away
|
||||
FROM matches m
|
||||
JOIN football_ai_features f ON f.match_id = m.id
|
||||
-- Only matches that have odds data
|
||||
JOIN (SELECT DISTINCT match_id FROM odd_categories WHERE sport = 'football') oc
|
||||
ON oc.match_id = m.id
|
||||
LEFT JOIN leagues l ON l.id = m.league_id
|
||||
WHERE m.status = 'FT'
|
||||
AND m.score_home IS NOT NULL
|
||||
AND m.score_away IS NOT NULL
|
||||
AND m.league_id = ANY(%s)
|
||||
ORDER BY m.mst_utc DESC
|
||||
LIMIT %s
|
||||
""",
|
||||
(league_ids, MAX_MATCHES),
|
||||
)
|
||||
|
||||
rows = cur.fetchall()
|
||||
cur.close()
|
||||
df = pd.DataFrame([dict(r) for r in rows])
|
||||
print(f"[Step 1b] Fetched {len(df)} matches with features + odds coverage.")
|
||||
return df
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# STEP 1c — Fetch all odds for the matched match IDs in one query
|
||||
# ============================================================================
|
||||
|
||||
def fetch_odds_bulk(conn, match_ids: List[str]) -> Dict[str, Dict[str, float]]:
|
||||
"""
|
||||
Returns {match_id: {feature_key: odd_value, ...}} for all known categories.
|
||||
"""
|
||||
print(f"[Step 1c] Fetching odds for {len(match_ids)} matches ...")
|
||||
cur = conn.cursor(cursor_factory=RealDictCursor)
|
||||
|
||||
# Build a set of known category names
|
||||
known_cats = tuple(ODDS_CATEGORY_MAP.keys())
|
||||
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT oc.match_id, oc.name AS cat_name, os.name AS sel_name, os.odd_value
|
||||
FROM odd_categories oc
|
||||
JOIN odd_selections os ON os.odd_category_db_id = oc.db_id
|
||||
WHERE oc.match_id = ANY(%s)
|
||||
AND oc.name = ANY(%s)
|
||||
AND oc.sport = 'football'
|
||||
AND os.odd_value IS NOT NULL
|
||||
AND os.odd_value ~ '^[0-9]+(\.[0-9]+)?$'
|
||||
""",
|
||||
(match_ids, list(known_cats)),
|
||||
)
|
||||
|
||||
rows = cur.fetchall()
|
||||
cur.close()
|
||||
|
||||
# Build nested dict: match_id -> {feature_key -> value}
|
||||
odds_map: Dict[str, Dict[str, float]] = defaultdict(dict)
|
||||
for r in rows:
|
||||
cat_name = r["cat_name"]
|
||||
sel_name = r["sel_name"]
|
||||
if cat_name in ODDS_CATEGORY_MAP and sel_name in ODDS_CATEGORY_MAP[cat_name]:
|
||||
feat_key = ODDS_CATEGORY_MAP[cat_name][sel_name]
|
||||
try:
|
||||
val = float(r["odd_value"])
|
||||
if val > 1.0:
|
||||
# Keep first encountered (most recent or primary bookmaker)
|
||||
if feat_key not in odds_map[r["match_id"]]:
|
||||
odds_map[r["match_id"]][feat_key] = val
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
|
||||
print(f"[Step 1c] Odds loaded for {len(odds_map)} matches.")
|
||||
return dict(odds_map)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# STEP 2 — Build 114-feature vector per match
|
||||
# ============================================================================
|
||||
|
||||
def load_feature_cols() -> List[str]:
|
||||
path = os.path.join(AI_ENGINE_DIR, "models", "v25", "feature_cols.json")
|
||||
with open(path, "r") as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def build_feature_vector(
|
||||
match_row: pd.Series,
|
||||
odds: Dict[str, float],
|
||||
feature_cols: List[str],
|
||||
) -> Dict[str, float]:
|
||||
"""
|
||||
Construct the full feature dict for one match.
|
||||
Falls back to 0.0 for any missing feature.
|
||||
"""
|
||||
feat: Dict[str, float] = {col: 0.0 for col in feature_cols}
|
||||
|
||||
# ---- Direct columns from match row ----
|
||||
direct_map = {
|
||||
"home_overall_elo": "home_overall_elo",
|
||||
"away_overall_elo": "away_overall_elo",
|
||||
"elo_diff": "elo_diff",
|
||||
"home_home_elo": "home_home_elo",
|
||||
"away_away_elo": "away_away_elo",
|
||||
"home_form_elo": "home_form_elo",
|
||||
"away_form_elo": "away_form_elo",
|
||||
"home_goals_avg": "home_goals_avg",
|
||||
"away_goals_avg": "away_goals_avg",
|
||||
"home_conceded_avg": "home_conceded_avg",
|
||||
"away_conceded_avg": "away_conceded_avg",
|
||||
"home_clean_sheet_rate": "home_clean_sheet_rate",
|
||||
"away_clean_sheet_rate": "away_clean_sheet_rate",
|
||||
"home_scoring_rate": "home_scoring_rate",
|
||||
"away_scoring_rate": "away_scoring_rate",
|
||||
"home_winning_streak": "home_winning_streak",
|
||||
"away_winning_streak": "away_winning_streak",
|
||||
"home_avg_possession": "home_avg_possession",
|
||||
"away_avg_possession": "away_avg_possession",
|
||||
"home_avg_shots_on_target": "home_avg_shots_on_target",
|
||||
"away_avg_shots_on_target": "away_avg_shots_on_target",
|
||||
"home_shot_conversion": "home_shot_conversion",
|
||||
"away_shot_conversion": "away_shot_conversion",
|
||||
"home_avg_corners": "home_avg_corners",
|
||||
"away_avg_corners": "away_avg_corners",
|
||||
"h2h_total_matches": "h2h_total_matches",
|
||||
"h2h_home_win_rate": "h2h_home_win_rate",
|
||||
"h2h_avg_goals": "h2h_avg_goals",
|
||||
"h2h_over25_rate": "h2h_over25_rate",
|
||||
"h2h_btts_rate": "h2h_btts_rate",
|
||||
"league_avg_goals": "league_avg_goals",
|
||||
"league_home_win_rate": "league_home_win_rate",
|
||||
"league_ou25_rate": "league_ou25_rate",
|
||||
"referee_cards_total": "referee_cards_total",
|
||||
"referee_home_bias": "referee_home_bias",
|
||||
"referee_avg_goals": "referee_avg_goals",
|
||||
"home_missing_impact": "home_missing_impact",
|
||||
"implied_home": "implied_home",
|
||||
"implied_draw": "implied_draw",
|
||||
"implied_away": "implied_away",
|
||||
}
|
||||
|
||||
for src_col, feat_col in direct_map.items():
|
||||
if feat_col in feat and src_col in match_row.index:
|
||||
val = match_row.get(src_col)
|
||||
if val is not None and not (isinstance(val, float) and np.isnan(val)):
|
||||
feat[feat_col] = float(val)
|
||||
|
||||
# ---- Derived elo features ----
|
||||
if feat.get("home_form_elo", 0) and feat.get("away_form_elo", 0):
|
||||
feat["form_elo_diff"] = feat["home_form_elo"] - feat["away_form_elo"]
|
||||
|
||||
# ---- Odds features from relational tables ----
|
||||
odds_features = [
|
||||
"odds_ms_h", "odds_ms_d", "odds_ms_a",
|
||||
"odds_ht_ms_h", "odds_ht_ms_d", "odds_ht_ms_a",
|
||||
"odds_ou05_o", "odds_ou05_u",
|
||||
"odds_ou15_o", "odds_ou15_u",
|
||||
"odds_ou25_o", "odds_ou25_u",
|
||||
"odds_ou35_o", "odds_ou35_u",
|
||||
"odds_ht_ou05_o", "odds_ht_ou05_u",
|
||||
"odds_ht_ou15_o", "odds_ht_ou15_u",
|
||||
"odds_btts_y", "odds_btts_n",
|
||||
]
|
||||
for ok in odds_features:
|
||||
if ok in odds:
|
||||
feat[ok] = odds[ok]
|
||||
presence_key = f"{ok}_present"
|
||||
if presence_key in feat:
|
||||
feat[presence_key] = 1.0
|
||||
|
||||
# Recompute implied probabilities from odds if available and not already set
|
||||
if feat.get("odds_ms_h", 0) > 1 and feat.get("odds_ms_d", 0) > 1 and feat.get("odds_ms_a", 0) > 1:
|
||||
raw_h = 1.0 / feat["odds_ms_h"]
|
||||
raw_d = 1.0 / feat["odds_ms_d"]
|
||||
raw_a = 1.0 / feat["odds_ms_a"]
|
||||
total = raw_h + raw_d + raw_a
|
||||
if total > 0:
|
||||
feat["implied_home"] = raw_h / total
|
||||
feat["implied_draw"] = raw_d / total
|
||||
feat["implied_away"] = raw_a / total
|
||||
|
||||
# ---- Derived match metadata ----
|
||||
mst = match_row.get("mst_utc")
|
||||
if mst is not None:
|
||||
try:
|
||||
ts_s = int(mst) / 1000 # stored as epoch ms
|
||||
dt = datetime.utcfromtimestamp(ts_s)
|
||||
if "match_month" in feat:
|
||||
feat["match_month"] = float(dt.month)
|
||||
# Season markers: Sept-Oct = start, April-May = end
|
||||
if "is_season_start" in feat:
|
||||
feat["is_season_start"] = 1.0 if dt.month in (8, 9, 10) else 0.0
|
||||
if "is_season_end" in feat:
|
||||
feat["is_season_end"] = 1.0 if dt.month in (4, 5) else 0.0
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# ---- Interaction features ----
|
||||
if "attack_vs_defense_home" in feat:
|
||||
feat["attack_vs_defense_home"] = feat.get("home_goals_avg", 0) - feat.get("away_conceded_avg", 0)
|
||||
if "attack_vs_defense_away" in feat:
|
||||
feat["attack_vs_defense_away"] = feat.get("away_goals_avg", 0) - feat.get("home_conceded_avg", 0)
|
||||
if "form_momentum_interaction" in feat:
|
||||
feat["form_momentum_interaction"] = (
|
||||
feat.get("home_momentum_score", 0) * feat.get("home_goals_avg", 0)
|
||||
- feat.get("away_momentum_score", 0) * feat.get("away_goals_avg", 0)
|
||||
)
|
||||
if "elo_form_consistency" in feat:
|
||||
feat["elo_form_consistency"] = feat.get("elo_diff", 0) * feat.get("home_goals_avg", 0)
|
||||
|
||||
return feat
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# STEP 3 — Run V25 predictions
|
||||
# ============================================================================
|
||||
|
||||
def load_predictor():
|
||||
from models.v25_ensemble import get_v25_predictor
|
||||
print("[Step 3] Loading V25 predictor ...")
|
||||
pred = get_v25_predictor()
|
||||
print("[Step 3] V25 predictor ready.")
|
||||
return pred
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# STEP 4 — Compute actual outcomes from scores
|
||||
# ============================================================================
|
||||
|
||||
def compute_actuals(score_home: int, score_away: int) -> Dict[str, Any]:
|
||||
total = score_home + score_away
|
||||
return {
|
||||
"ms_actual": "1" if score_home > score_away else ("X" if score_home == score_away else "2"),
|
||||
"ou15_actual": "Over" if total >= 2 else "Under",
|
||||
"ou25_actual": "Over" if total >= 3 else "Under",
|
||||
"btts_actual": "Yes" if score_home > 0 and score_away > 0 else "No",
|
||||
}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# STEP 5 — Accuracy helpers
|
||||
# ============================================================================
|
||||
|
||||
def confidence_band(prob: float) -> str:
|
||||
if prob < 0.50:
|
||||
return "<50%"
|
||||
elif prob < 0.65:
|
||||
return "50-65%"
|
||||
elif prob < 0.75:
|
||||
return "65-75%"
|
||||
else:
|
||||
return "75%+"
|
||||
|
||||
|
||||
def pick_from_ms(home_prob: float, draw_prob: float, away_prob: float) -> Tuple[str, float]:
|
||||
picks = {"1": home_prob, "X": draw_prob, "2": away_prob}
|
||||
best = max(picks, key=picks.__getitem__)
|
||||
return best, picks[best]
|
||||
|
||||
|
||||
def pick_from_binary(yes_prob: float, no_prob: float, yes_label: str, no_label: str) -> Tuple[str, float]:
|
||||
if yes_prob >= no_prob:
|
||||
return yes_label, yes_prob
|
||||
return no_label, no_prob
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# MAIN
|
||||
# ============================================================================
|
||||
|
||||
def main():
|
||||
t_start = time.time()
|
||||
print("=" * 70)
|
||||
print(" V25 Backtest + Calibration Training")
|
||||
print(f" Run at: {datetime.utcnow().isoformat()} UTC")
|
||||
print("=" * 70)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step 1 — Load qualified leagues
|
||||
# ------------------------------------------------------------------
|
||||
league_ids = load_qualified_leagues()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step 1b — Fetch matches with features
|
||||
# ------------------------------------------------------------------
|
||||
conn = psycopg2.connect(get_clean_dsn())
|
||||
try:
|
||||
matches_df = fetch_matches(conn, league_ids)
|
||||
|
||||
if matches_df.empty:
|
||||
print("[ERROR] No matches found. Check DB connection and league IDs.")
|
||||
return
|
||||
|
||||
match_ids = matches_df["match_id"].tolist()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step 1c — Fetch odds in bulk
|
||||
# ------------------------------------------------------------------
|
||||
odds_map = fetch_odds_bulk(conn, match_ids)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step 2 — Build feature vectors
|
||||
# ------------------------------------------------------------------
|
||||
print(f"\n[Step 2] Building feature vectors for {len(matches_df)} matches ...")
|
||||
feature_cols = load_feature_cols()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step 3 — Load V25 predictor
|
||||
# ------------------------------------------------------------------
|
||||
predictor = load_predictor()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Main loop — predict each match, collect results
|
||||
# ------------------------------------------------------------------
|
||||
print(f"\n[Loop] Running predictions ...")
|
||||
|
||||
# Storage for calibration training
|
||||
calib_data: Dict[str, List[Tuple[float, int]]] = {
|
||||
"ms_home": [], # (prob, 1 if home win)
|
||||
"ms_draw": [],
|
||||
"ms_away": [],
|
||||
"ou15": [],
|
||||
"ou25": [],
|
||||
"btts": [],
|
||||
}
|
||||
|
||||
# Storage for accuracy reporting
|
||||
records = []
|
||||
|
||||
skipped = 0
|
||||
processed = 0
|
||||
|
||||
for idx, row in matches_df.iterrows():
|
||||
match_id = row["match_id"]
|
||||
score_home = row.get("score_home")
|
||||
score_away = row.get("score_away")
|
||||
|
||||
# Validate scores
|
||||
try:
|
||||
score_home = int(score_home)
|
||||
score_away = int(score_away)
|
||||
except (TypeError, ValueError):
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
# Build features
|
||||
match_odds = odds_map.get(match_id, {})
|
||||
feat = build_feature_vector(row, match_odds, feature_cols)
|
||||
|
||||
# Run predictions
|
||||
try:
|
||||
home_prob, draw_prob, away_prob = predictor.predict_ms(feat)
|
||||
over25_prob, under25_prob = predictor.predict_ou25(feat)
|
||||
btts_yes_prob, btts_no_prob = predictor.predict_btts(feat)
|
||||
|
||||
# ou15 is loaded via predict_market (returns np.ndarray for binary)
|
||||
ou15_arr = predictor.predict_market("ou15", feat)
|
||||
if ou15_arr is not None and len(ou15_arr) > 0:
|
||||
over15_prob = float(ou15_arr[0])
|
||||
under15_prob = 1.0 - over15_prob
|
||||
else:
|
||||
over15_prob = 0.5
|
||||
under15_prob = 0.5
|
||||
|
||||
except Exception as e:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
# Compute actuals
|
||||
actuals = compute_actuals(score_home, score_away)
|
||||
|
||||
# MS picks
|
||||
ms_pick, ms_conf = pick_from_ms(home_prob, draw_prob, away_prob)
|
||||
ms_correct = int(ms_pick == actuals["ms_actual"])
|
||||
|
||||
# OU15
|
||||
ou15_pick, ou15_conf = pick_from_binary(over15_prob, under15_prob, "Over", "Under")
|
||||
ou15_correct = int(ou15_pick == actuals["ou15_actual"])
|
||||
|
||||
# OU25
|
||||
ou25_pick, ou25_conf = pick_from_binary(over25_prob, under25_prob, "Over", "Under")
|
||||
ou25_correct = int(ou25_pick == actuals["ou25_actual"])
|
||||
|
||||
# BTTS
|
||||
btts_pick, btts_conf = pick_from_binary(btts_yes_prob, btts_no_prob, "Yes", "No")
|
||||
btts_correct = int(btts_pick == actuals["btts_actual"])
|
||||
|
||||
# Collect calibration data
|
||||
calib_data["ms_home"].append((home_prob, int(actuals["ms_actual"] == "1")))
|
||||
calib_data["ms_draw"].append((draw_prob, int(actuals["ms_actual"] == "X")))
|
||||
calib_data["ms_away"].append((away_prob, int(actuals["ms_actual"] == "2")))
|
||||
calib_data["ou15"].append((over15_prob, int(actuals["ou15_actual"] == "Over")))
|
||||
calib_data["ou25"].append((over25_prob, int(actuals["ou25_actual"] == "Over")))
|
||||
calib_data["btts"].append((btts_yes_prob, int(actuals["btts_actual"] == "Yes")))
|
||||
|
||||
# Determine league group
|
||||
league_name = str(row.get("league_name", "Other") or "Other")
|
||||
league_group = league_name if league_name in TOP5_LEAGUE_NAMES else "Other"
|
||||
|
||||
records.append({
|
||||
"match_id": match_id,
|
||||
"league_name": league_name,
|
||||
"league_group": league_group,
|
||||
"score_home": score_home,
|
||||
"score_away": score_away,
|
||||
# MS
|
||||
"ms_pick": ms_pick,
|
||||
"ms_actual": actuals["ms_actual"],
|
||||
"ms_conf": ms_conf,
|
||||
"ms_conf_band": confidence_band(ms_conf),
|
||||
"ms_correct": ms_correct,
|
||||
"ms_home_prob": home_prob,
|
||||
"ms_draw_prob": draw_prob,
|
||||
"ms_away_prob": away_prob,
|
||||
# OU15
|
||||
"ou15_pick": ou15_pick,
|
||||
"ou15_actual": actuals["ou15_actual"],
|
||||
"ou15_conf": ou15_conf,
|
||||
"ou15_conf_band": confidence_band(ou15_conf),
|
||||
"ou15_correct": ou15_correct,
|
||||
"ou15_over_prob": over15_prob,
|
||||
# OU25
|
||||
"ou25_pick": ou25_pick,
|
||||
"ou25_actual": actuals["ou25_actual"],
|
||||
"ou25_conf": ou25_conf,
|
||||
"ou25_conf_band": confidence_band(ou25_conf),
|
||||
"ou25_correct": ou25_correct,
|
||||
"ou25_over_prob": over25_prob,
|
||||
# BTTS
|
||||
"btts_pick": btts_pick,
|
||||
"btts_actual": actuals["btts_actual"],
|
||||
"btts_conf": btts_conf,
|
||||
"btts_conf_band": confidence_band(btts_conf),
|
||||
"btts_correct": btts_correct,
|
||||
"btts_yes_prob": btts_yes_prob,
|
||||
})
|
||||
|
||||
processed += 1
|
||||
if processed % PROGRESS_INTERVAL == 0:
|
||||
elapsed = time.time() - t_start
|
||||
print(f" [Progress] {processed}/{len(matches_df)} matches | "
|
||||
f"skipped={skipped} | elapsed={elapsed:.1f}s")
|
||||
|
||||
print(f"\n[Loop] Done. Processed={processed}, Skipped={skipped}")
|
||||
|
||||
if not records:
|
||||
print("[ERROR] No records to analyze. Exiting.")
|
||||
return
|
||||
|
||||
results_df = pd.DataFrame(records)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step 5 — Accuracy report
|
||||
# ------------------------------------------------------------------
|
||||
print("\n" + "=" * 70)
|
||||
print(" ACCURACY REPORT")
|
||||
print("=" * 70)
|
||||
|
||||
markets = [
|
||||
("MS", "ms_correct", "ms_conf", "ms_conf_band", "ms_pick"),
|
||||
("OU15", "ou15_correct", "ou15_conf", "ou15_conf_band", "ou15_pick"),
|
||||
("OU25", "ou25_correct", "ou25_conf", "ou25_conf_band", "ou25_pick"),
|
||||
("BTTS", "btts_correct", "btts_conf", "btts_conf_band", "btts_pick"),
|
||||
]
|
||||
|
||||
summary: Dict[str, Any] = {
|
||||
"generated_at": datetime.utcnow().isoformat(),
|
||||
"matches_processed": processed,
|
||||
"matches_skipped": skipped,
|
||||
"markets": {},
|
||||
}
|
||||
|
||||
for market_label, correct_col, conf_col, band_col, pick_col in markets:
|
||||
print(f"\n--- {market_label} ---")
|
||||
sub = results_df[[correct_col, conf_col, band_col, pick_col, "league_group"]].copy()
|
||||
total = len(sub)
|
||||
overall_acc = sub[correct_col].mean() * 100
|
||||
print(f" Overall accuracy: {overall_acc:.1f}% ({sub[correct_col].sum()}/{total})")
|
||||
|
||||
market_summary = {
|
||||
"overall_accuracy": round(overall_acc, 2),
|
||||
"total_matches": total,
|
||||
"by_confidence_band": {},
|
||||
"by_league": {},
|
||||
"by_pick_direction": {},
|
||||
}
|
||||
|
||||
# By confidence band
|
||||
print(f" By confidence band:")
|
||||
bands = ["<50%", "50-65%", "65-75%", "75%+"]
|
||||
for band in bands:
|
||||
mask = sub[band_col] == band
|
||||
n = mask.sum()
|
||||
if n > 0:
|
||||
acc = sub.loc[mask, correct_col].mean() * 100
|
||||
mean_conf = sub.loc[mask, conf_col].mean() * 100
|
||||
print(f" {band:8s}: {acc:5.1f}% acc | {n:4d} matches | "
|
||||
f"mean_conf={mean_conf:.1f}%")
|
||||
market_summary["by_confidence_band"][band] = {
|
||||
"accuracy": round(acc, 2),
|
||||
"count": int(n),
|
||||
"mean_confidence": round(mean_conf, 2),
|
||||
}
|
||||
|
||||
# By league group
|
||||
print(f" By league:")
|
||||
league_groups = list(results_df["league_group"].unique())
|
||||
# Sort: named leagues first, then Other
|
||||
named = sorted([g for g in league_groups if g != "Other"])
|
||||
ordered = named + (["Other"] if "Other" in league_groups else [])
|
||||
for lg in ordered:
|
||||
mask = sub["league_group"] == lg
|
||||
n = mask.sum()
|
||||
if n > 0:
|
||||
acc = sub.loc[mask, correct_col].mean() * 100
|
||||
print(f" {lg[:20]:20s}: {acc:5.1f}% ({n} matches)")
|
||||
market_summary["by_league"][lg] = {
|
||||
"accuracy": round(acc, 2),
|
||||
"count": int(n),
|
||||
}
|
||||
|
||||
# By pick direction
|
||||
print(f" By pick direction:")
|
||||
for pick_val in sorted(sub[pick_col].unique()):
|
||||
mask = sub[pick_col] == pick_val
|
||||
n = mask.sum()
|
||||
if n > 0:
|
||||
acc = sub.loc[mask, correct_col].mean() * 100
|
||||
mean_conf = sub.loc[mask, conf_col].mean() * 100
|
||||
print(f" {pick_val:8s}: {acc:5.1f}% acc | {n:4d} matches | "
|
||||
f"mean_conf={mean_conf:.1f}%")
|
||||
market_summary["by_pick_direction"][pick_val] = {
|
||||
"accuracy": round(acc, 2),
|
||||
"count": int(n),
|
||||
"mean_confidence": round(mean_conf, 2),
|
||||
}
|
||||
|
||||
summary["markets"][market_label] = market_summary
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step 6 — Train calibration models
|
||||
# ------------------------------------------------------------------
|
||||
print("\n" + "=" * 70)
|
||||
print(" CALIBRATION TRAINING")
|
||||
print("=" * 70)
|
||||
|
||||
calibrator = Calibrator()
|
||||
|
||||
# Market config: market_key -> (label for prob, label for actual binary)
|
||||
calib_market_map = {
|
||||
"ms_home": "ms_home",
|
||||
"ms_draw": "ms_draw",
|
||||
"ms_away": "ms_away",
|
||||
"ou15": "ou15",
|
||||
"ou25": "ou25",
|
||||
"btts": "btts",
|
||||
}
|
||||
|
||||
calibration_results: Dict[str, Dict] = {}
|
||||
|
||||
for market_key in calib_market_map:
|
||||
pairs = calib_data[market_key]
|
||||
if len(pairs) < 100:
|
||||
print(f"[Calib] {market_key}: only {len(pairs)} samples — skipping.")
|
||||
continue
|
||||
|
||||
probs = np.array([p for p, _ in pairs])
|
||||
actuals_bin = np.array([a for _, a in pairs])
|
||||
|
||||
# Build a tiny DataFrame to use Calibrator.train_calibration
|
||||
calib_df = pd.DataFrame({
|
||||
"prob": probs,
|
||||
"actual": actuals_bin,
|
||||
})
|
||||
|
||||
metrics = calibrator.train_calibration(
|
||||
df=calib_df,
|
||||
market=market_key,
|
||||
prob_col="prob",
|
||||
actual_col="actual",
|
||||
min_samples=100,
|
||||
save=True,
|
||||
)
|
||||
calibration_results[market_key] = metrics.to_dict()
|
||||
print(f" [Calib] {market_key}: Brier={metrics.brier_score:.4f} | "
|
||||
f"ECE={metrics.calibration_error:.4f} | n={metrics.sample_count}")
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step 7 — Save results
|
||||
# ------------------------------------------------------------------
|
||||
output_path = os.path.join(REPORTS_DIR, "backtest_results.json")
|
||||
full_report = {
|
||||
**summary,
|
||||
"calibration": calibration_results,
|
||||
"runtime_seconds": round(time.time() - t_start, 1),
|
||||
}
|
||||
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(full_report, f, indent=2)
|
||||
print(f"\n[Step 7] Report saved to {output_path}")
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Final summary table
|
||||
# ------------------------------------------------------------------
|
||||
print("\n" + "=" * 70)
|
||||
print(" FINAL SUMMARY TABLE")
|
||||
print("=" * 70)
|
||||
print(f"{'Market':<8} {'Overall Acc':>12} {'Matches':>8} "
|
||||
f"{'Best Band (acc)':>18}")
|
||||
print("-" * 70)
|
||||
for market_label, _, _, _, _ in markets:
|
||||
ms = summary["markets"].get(market_label, {})
|
||||
overall = ms.get("overall_accuracy", 0)
|
||||
total_m = ms.get("total_matches", 0)
|
||||
bands_d = ms.get("by_confidence_band", {})
|
||||
# Find best accuracy band with >= 50 matches
|
||||
best_band = "-"
|
||||
best_acc = 0.0
|
||||
for band, bdata in bands_d.items():
|
||||
if bdata["count"] >= 50 and bdata["accuracy"] > best_acc:
|
||||
best_acc = bdata["accuracy"]
|
||||
best_band = f"{band} ({best_acc:.1f}%)"
|
||||
print(f"{market_label:<8} {overall:>11.1f}% {total_m:>8d} {best_band:>18s}")
|
||||
|
||||
elapsed_total = time.time() - t_start
|
||||
print(f"\nTotal runtime: {elapsed_total:.1f}s")
|
||||
print("=" * 70)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user