main
Deploy Iddaai Backend / build-and-deploy (push) Successful in 37s

This commit is contained in:
2026-05-17 02:17:22 +03:00
parent 17ace9bd12
commit 94c7a4481a
53 changed files with 29602 additions and 7832 deletions
@@ -0,0 +1,806 @@
"""
V25 Backtest + Calibration Training Script
==========================================
Runs a full backtest on historical football matches, measures model accuracy
by market / confidence band / league, and trains isotonic calibration models
for MS, OU15, OU25, and BTTS markets.
Usage:
venv/bin/python scripts/run_backtest_and_calibrate.py
"""
from __future__ import annotations
import os
import sys
import json
import pickle
import time
from collections import defaultdict
from datetime import datetime
from typing import Dict, List, Optional, Tuple, Any
import numpy as np
import pandas as pd
import psycopg2
from psycopg2.extras import RealDictCursor
# ---------------------------------------------------------------------------
# Path setup — works whether executed from ai-engine/ or project root
# ---------------------------------------------------------------------------
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
AI_ENGINE_DIR = os.path.dirname(SCRIPT_DIR)
sys.path.insert(0, AI_ENGINE_DIR)
from data.db import get_clean_dsn
from models.calibration import Calibrator
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
QUALIFIED_LEAGUES_PATH = os.path.join(AI_ENGINE_DIR, "..", "qualified_leagues.json")
CALIBRATION_DIR = os.path.join(AI_ENGINE_DIR, "models", "calibration")
REPORTS_DIR = os.path.join(AI_ENGINE_DIR, "reports")
MAX_MATCHES = 3000 # target upper bound
PROGRESS_INTERVAL = 100 # print every N matches
os.makedirs(CALIBRATION_DIR, exist_ok=True)
os.makedirs(REPORTS_DIR, exist_ok=True)
# Mapping: Turkish category name -> internal feature key
ODDS_CATEGORY_MAP = {
"Maç Sonucu": {
"1": "odds_ms_h",
"X": "odds_ms_d",
"2": "odds_ms_a",
},
"1,5 Alt/Üst": {
"Üst": "odds_ou15_o",
"Alt": "odds_ou15_u",
},
"2,5 Alt/Üst": {
"Üst": "odds_ou25_o",
"Alt": "odds_ou25_u",
},
"3,5 Alt/Üst": {
"Üst": "odds_ou35_o",
"Alt": "odds_ou35_u",
},
"0,5 Alt/Üst": {
"Üst": "odds_ou05_o",
"Alt": "odds_ou05_u",
},
"Karşılıklı Gol": {
"Var": "odds_btts_y",
"Yok": "odds_btts_n",
},
"1. Yarı Sonucu": {
"1": "odds_ht_ms_h",
"X": "odds_ht_ms_d",
"2": "odds_ht_ms_a",
},
"1. Yarı 0,5 Alt/Üst": {
"Üst": "odds_ht_ou05_o",
"Alt": "odds_ht_ou05_u",
},
"1. Yarı 1,5 Alt/Üst": {
"Üst": "odds_ht_ou15_o",
"Alt": "odds_ht_ou15_u",
},
}
# Top 5 leagues by name for individual breakdown (will be matched by league_id)
TOP5_LEAGUE_NAMES = {
"Premier League",
"La Liga",
"Bundesliga",
"Serie A",
"Ligue 1",
}
# ============================================================================
# STEP 1 — Load qualified league IDs
# ============================================================================
def load_qualified_leagues() -> List[str]:
path = os.path.abspath(QUALIFIED_LEAGUES_PATH)
with open(path, "r") as f:
leagues = json.load(f)
print(f"[Step 1] Loaded {len(leagues)} qualified league IDs.")
return [str(lid) for lid in leagues]
# ============================================================================
# STEP 1b — Fetch matches + pre-computed features in batch
# ============================================================================
def fetch_matches(conn, league_ids: List[str]) -> pd.DataFrame:
"""
Single batch query: matches + football_ai_features + league name.
Only returns matches that also have odds data (inner join on odd_categories).
Returns a DataFrame with one row per match.
"""
print("[Step 1b] Fetching matches with pre-computed features and odds ...")
cur = conn.cursor(cursor_factory=RealDictCursor)
cur.execute(
"""
SELECT
m.id AS match_id,
m.league_id,
l.name AS league_name,
m.score_home,
m.score_away,
m.mst_utc,
-- From football_ai_features
f.home_elo AS home_overall_elo,
f.away_elo AS away_overall_elo,
f.elo_diff,
f.home_home_elo,
f.away_away_elo,
f.home_form_elo,
f.away_form_elo,
f.home_goals_avg_5 AS home_goals_avg,
f.away_goals_avg_5 AS away_goals_avg,
f.home_conceded_avg_5 AS home_conceded_avg,
f.away_conceded_avg_5 AS away_conceded_avg,
f.home_clean_sheet_rate,
f.away_clean_sheet_rate,
f.home_scoring_rate,
f.away_scoring_rate,
f.home_win_streak AS home_winning_streak,
f.away_win_streak AS away_winning_streak,
f.home_avg_possession,
f.away_avg_possession,
f.home_avg_shots_on_target,
f.away_avg_shots_on_target,
f.home_shot_conversion,
f.away_shot_conversion,
f.home_avg_corners,
f.away_avg_corners,
f.h2h_total AS h2h_total_matches,
f.h2h_home_win_rate,
f.h2h_avg_goals,
f.h2h_over25_rate,
f.h2h_btts_rate,
f.league_avg_goals,
f.league_home_win_pct AS league_home_win_rate,
f.league_over25_pct AS league_ou25_rate,
f.referee_avg_cards AS referee_cards_total,
f.referee_home_bias,
f.referee_avg_goals,
f.missing_players_impact AS home_missing_impact,
f.implied_home,
f.implied_draw,
f.implied_away
FROM matches m
JOIN football_ai_features f ON f.match_id = m.id
-- Only matches that have odds data
JOIN (SELECT DISTINCT match_id FROM odd_categories WHERE sport = 'football') oc
ON oc.match_id = m.id
LEFT JOIN leagues l ON l.id = m.league_id
WHERE m.status = 'FT'
AND m.score_home IS NOT NULL
AND m.score_away IS NOT NULL
AND m.league_id = ANY(%s)
ORDER BY m.mst_utc DESC
LIMIT %s
""",
(league_ids, MAX_MATCHES),
)
rows = cur.fetchall()
cur.close()
df = pd.DataFrame([dict(r) for r in rows])
print(f"[Step 1b] Fetched {len(df)} matches with features + odds coverage.")
return df
# ============================================================================
# STEP 1c — Fetch all odds for the matched match IDs in one query
# ============================================================================
def fetch_odds_bulk(conn, match_ids: List[str]) -> Dict[str, Dict[str, float]]:
"""
Returns {match_id: {feature_key: odd_value, ...}} for all known categories.
"""
print(f"[Step 1c] Fetching odds for {len(match_ids)} matches ...")
cur = conn.cursor(cursor_factory=RealDictCursor)
# Build a set of known category names
known_cats = tuple(ODDS_CATEGORY_MAP.keys())
cur.execute(
"""
SELECT oc.match_id, oc.name AS cat_name, os.name AS sel_name, os.odd_value
FROM odd_categories oc
JOIN odd_selections os ON os.odd_category_db_id = oc.db_id
WHERE oc.match_id = ANY(%s)
AND oc.name = ANY(%s)
AND oc.sport = 'football'
AND os.odd_value IS NOT NULL
AND os.odd_value ~ '^[0-9]+(\.[0-9]+)?$'
""",
(match_ids, list(known_cats)),
)
rows = cur.fetchall()
cur.close()
# Build nested dict: match_id -> {feature_key -> value}
odds_map: Dict[str, Dict[str, float]] = defaultdict(dict)
for r in rows:
cat_name = r["cat_name"]
sel_name = r["sel_name"]
if cat_name in ODDS_CATEGORY_MAP and sel_name in ODDS_CATEGORY_MAP[cat_name]:
feat_key = ODDS_CATEGORY_MAP[cat_name][sel_name]
try:
val = float(r["odd_value"])
if val > 1.0:
# Keep first encountered (most recent or primary bookmaker)
if feat_key not in odds_map[r["match_id"]]:
odds_map[r["match_id"]][feat_key] = val
except (TypeError, ValueError):
pass
print(f"[Step 1c] Odds loaded for {len(odds_map)} matches.")
return dict(odds_map)
# ============================================================================
# STEP 2 — Build 114-feature vector per match
# ============================================================================
def load_feature_cols() -> List[str]:
path = os.path.join(AI_ENGINE_DIR, "models", "v25", "feature_cols.json")
with open(path, "r") as f:
return json.load(f)
def build_feature_vector(
match_row: pd.Series,
odds: Dict[str, float],
feature_cols: List[str],
) -> Dict[str, float]:
"""
Construct the full feature dict for one match.
Falls back to 0.0 for any missing feature.
"""
feat: Dict[str, float] = {col: 0.0 for col in feature_cols}
# ---- Direct columns from match row ----
direct_map = {
"home_overall_elo": "home_overall_elo",
"away_overall_elo": "away_overall_elo",
"elo_diff": "elo_diff",
"home_home_elo": "home_home_elo",
"away_away_elo": "away_away_elo",
"home_form_elo": "home_form_elo",
"away_form_elo": "away_form_elo",
"home_goals_avg": "home_goals_avg",
"away_goals_avg": "away_goals_avg",
"home_conceded_avg": "home_conceded_avg",
"away_conceded_avg": "away_conceded_avg",
"home_clean_sheet_rate": "home_clean_sheet_rate",
"away_clean_sheet_rate": "away_clean_sheet_rate",
"home_scoring_rate": "home_scoring_rate",
"away_scoring_rate": "away_scoring_rate",
"home_winning_streak": "home_winning_streak",
"away_winning_streak": "away_winning_streak",
"home_avg_possession": "home_avg_possession",
"away_avg_possession": "away_avg_possession",
"home_avg_shots_on_target": "home_avg_shots_on_target",
"away_avg_shots_on_target": "away_avg_shots_on_target",
"home_shot_conversion": "home_shot_conversion",
"away_shot_conversion": "away_shot_conversion",
"home_avg_corners": "home_avg_corners",
"away_avg_corners": "away_avg_corners",
"h2h_total_matches": "h2h_total_matches",
"h2h_home_win_rate": "h2h_home_win_rate",
"h2h_avg_goals": "h2h_avg_goals",
"h2h_over25_rate": "h2h_over25_rate",
"h2h_btts_rate": "h2h_btts_rate",
"league_avg_goals": "league_avg_goals",
"league_home_win_rate": "league_home_win_rate",
"league_ou25_rate": "league_ou25_rate",
"referee_cards_total": "referee_cards_total",
"referee_home_bias": "referee_home_bias",
"referee_avg_goals": "referee_avg_goals",
"home_missing_impact": "home_missing_impact",
"implied_home": "implied_home",
"implied_draw": "implied_draw",
"implied_away": "implied_away",
}
for src_col, feat_col in direct_map.items():
if feat_col in feat and src_col in match_row.index:
val = match_row.get(src_col)
if val is not None and not (isinstance(val, float) and np.isnan(val)):
feat[feat_col] = float(val)
# ---- Derived elo features ----
if feat.get("home_form_elo", 0) and feat.get("away_form_elo", 0):
feat["form_elo_diff"] = feat["home_form_elo"] - feat["away_form_elo"]
# ---- Odds features from relational tables ----
odds_features = [
"odds_ms_h", "odds_ms_d", "odds_ms_a",
"odds_ht_ms_h", "odds_ht_ms_d", "odds_ht_ms_a",
"odds_ou05_o", "odds_ou05_u",
"odds_ou15_o", "odds_ou15_u",
"odds_ou25_o", "odds_ou25_u",
"odds_ou35_o", "odds_ou35_u",
"odds_ht_ou05_o", "odds_ht_ou05_u",
"odds_ht_ou15_o", "odds_ht_ou15_u",
"odds_btts_y", "odds_btts_n",
]
for ok in odds_features:
if ok in odds:
feat[ok] = odds[ok]
presence_key = f"{ok}_present"
if presence_key in feat:
feat[presence_key] = 1.0
# Recompute implied probabilities from odds if available and not already set
if feat.get("odds_ms_h", 0) > 1 and feat.get("odds_ms_d", 0) > 1 and feat.get("odds_ms_a", 0) > 1:
raw_h = 1.0 / feat["odds_ms_h"]
raw_d = 1.0 / feat["odds_ms_d"]
raw_a = 1.0 / feat["odds_ms_a"]
total = raw_h + raw_d + raw_a
if total > 0:
feat["implied_home"] = raw_h / total
feat["implied_draw"] = raw_d / total
feat["implied_away"] = raw_a / total
# ---- Derived match metadata ----
mst = match_row.get("mst_utc")
if mst is not None:
try:
ts_s = int(mst) / 1000 # stored as epoch ms
dt = datetime.utcfromtimestamp(ts_s)
if "match_month" in feat:
feat["match_month"] = float(dt.month)
# Season markers: Sept-Oct = start, April-May = end
if "is_season_start" in feat:
feat["is_season_start"] = 1.0 if dt.month in (8, 9, 10) else 0.0
if "is_season_end" in feat:
feat["is_season_end"] = 1.0 if dt.month in (4, 5) else 0.0
except Exception:
pass
# ---- Interaction features ----
if "attack_vs_defense_home" in feat:
feat["attack_vs_defense_home"] = feat.get("home_goals_avg", 0) - feat.get("away_conceded_avg", 0)
if "attack_vs_defense_away" in feat:
feat["attack_vs_defense_away"] = feat.get("away_goals_avg", 0) - feat.get("home_conceded_avg", 0)
if "form_momentum_interaction" in feat:
feat["form_momentum_interaction"] = (
feat.get("home_momentum_score", 0) * feat.get("home_goals_avg", 0)
- feat.get("away_momentum_score", 0) * feat.get("away_goals_avg", 0)
)
if "elo_form_consistency" in feat:
feat["elo_form_consistency"] = feat.get("elo_diff", 0) * feat.get("home_goals_avg", 0)
return feat
# ============================================================================
# STEP 3 — Run V25 predictions
# ============================================================================
def load_predictor():
from models.v25_ensemble import get_v25_predictor
print("[Step 3] Loading V25 predictor ...")
pred = get_v25_predictor()
print("[Step 3] V25 predictor ready.")
return pred
# ============================================================================
# STEP 4 — Compute actual outcomes from scores
# ============================================================================
def compute_actuals(score_home: int, score_away: int) -> Dict[str, Any]:
total = score_home + score_away
return {
"ms_actual": "1" if score_home > score_away else ("X" if score_home == score_away else "2"),
"ou15_actual": "Over" if total >= 2 else "Under",
"ou25_actual": "Over" if total >= 3 else "Under",
"btts_actual": "Yes" if score_home > 0 and score_away > 0 else "No",
}
# ============================================================================
# STEP 5 — Accuracy helpers
# ============================================================================
def confidence_band(prob: float) -> str:
if prob < 0.50:
return "<50%"
elif prob < 0.65:
return "50-65%"
elif prob < 0.75:
return "65-75%"
else:
return "75%+"
def pick_from_ms(home_prob: float, draw_prob: float, away_prob: float) -> Tuple[str, float]:
picks = {"1": home_prob, "X": draw_prob, "2": away_prob}
best = max(picks, key=picks.__getitem__)
return best, picks[best]
def pick_from_binary(yes_prob: float, no_prob: float, yes_label: str, no_label: str) -> Tuple[str, float]:
if yes_prob >= no_prob:
return yes_label, yes_prob
return no_label, no_prob
# ============================================================================
# MAIN
# ============================================================================
def main():
t_start = time.time()
print("=" * 70)
print(" V25 Backtest + Calibration Training")
print(f" Run at: {datetime.utcnow().isoformat()} UTC")
print("=" * 70)
# ------------------------------------------------------------------
# Step 1 — Load qualified leagues
# ------------------------------------------------------------------
league_ids = load_qualified_leagues()
# ------------------------------------------------------------------
# Step 1b — Fetch matches with features
# ------------------------------------------------------------------
conn = psycopg2.connect(get_clean_dsn())
try:
matches_df = fetch_matches(conn, league_ids)
if matches_df.empty:
print("[ERROR] No matches found. Check DB connection and league IDs.")
return
match_ids = matches_df["match_id"].tolist()
# ------------------------------------------------------------------
# Step 1c — Fetch odds in bulk
# ------------------------------------------------------------------
odds_map = fetch_odds_bulk(conn, match_ids)
finally:
conn.close()
# ------------------------------------------------------------------
# Step 2 — Build feature vectors
# ------------------------------------------------------------------
print(f"\n[Step 2] Building feature vectors for {len(matches_df)} matches ...")
feature_cols = load_feature_cols()
# ------------------------------------------------------------------
# Step 3 — Load V25 predictor
# ------------------------------------------------------------------
predictor = load_predictor()
# ------------------------------------------------------------------
# Main loop — predict each match, collect results
# ------------------------------------------------------------------
print(f"\n[Loop] Running predictions ...")
# Storage for calibration training
calib_data: Dict[str, List[Tuple[float, int]]] = {
"ms_home": [], # (prob, 1 if home win)
"ms_draw": [],
"ms_away": [],
"ou15": [],
"ou25": [],
"btts": [],
}
# Storage for accuracy reporting
records = []
skipped = 0
processed = 0
for idx, row in matches_df.iterrows():
match_id = row["match_id"]
score_home = row.get("score_home")
score_away = row.get("score_away")
# Validate scores
try:
score_home = int(score_home)
score_away = int(score_away)
except (TypeError, ValueError):
skipped += 1
continue
# Build features
match_odds = odds_map.get(match_id, {})
feat = build_feature_vector(row, match_odds, feature_cols)
# Run predictions
try:
home_prob, draw_prob, away_prob = predictor.predict_ms(feat)
over25_prob, under25_prob = predictor.predict_ou25(feat)
btts_yes_prob, btts_no_prob = predictor.predict_btts(feat)
# ou15 is loaded via predict_market (returns np.ndarray for binary)
ou15_arr = predictor.predict_market("ou15", feat)
if ou15_arr is not None and len(ou15_arr) > 0:
over15_prob = float(ou15_arr[0])
under15_prob = 1.0 - over15_prob
else:
over15_prob = 0.5
under15_prob = 0.5
except Exception as e:
skipped += 1
continue
# Compute actuals
actuals = compute_actuals(score_home, score_away)
# MS picks
ms_pick, ms_conf = pick_from_ms(home_prob, draw_prob, away_prob)
ms_correct = int(ms_pick == actuals["ms_actual"])
# OU15
ou15_pick, ou15_conf = pick_from_binary(over15_prob, under15_prob, "Over", "Under")
ou15_correct = int(ou15_pick == actuals["ou15_actual"])
# OU25
ou25_pick, ou25_conf = pick_from_binary(over25_prob, under25_prob, "Over", "Under")
ou25_correct = int(ou25_pick == actuals["ou25_actual"])
# BTTS
btts_pick, btts_conf = pick_from_binary(btts_yes_prob, btts_no_prob, "Yes", "No")
btts_correct = int(btts_pick == actuals["btts_actual"])
# Collect calibration data
calib_data["ms_home"].append((home_prob, int(actuals["ms_actual"] == "1")))
calib_data["ms_draw"].append((draw_prob, int(actuals["ms_actual"] == "X")))
calib_data["ms_away"].append((away_prob, int(actuals["ms_actual"] == "2")))
calib_data["ou15"].append((over15_prob, int(actuals["ou15_actual"] == "Over")))
calib_data["ou25"].append((over25_prob, int(actuals["ou25_actual"] == "Over")))
calib_data["btts"].append((btts_yes_prob, int(actuals["btts_actual"] == "Yes")))
# Determine league group
league_name = str(row.get("league_name", "Other") or "Other")
league_group = league_name if league_name in TOP5_LEAGUE_NAMES else "Other"
records.append({
"match_id": match_id,
"league_name": league_name,
"league_group": league_group,
"score_home": score_home,
"score_away": score_away,
# MS
"ms_pick": ms_pick,
"ms_actual": actuals["ms_actual"],
"ms_conf": ms_conf,
"ms_conf_band": confidence_band(ms_conf),
"ms_correct": ms_correct,
"ms_home_prob": home_prob,
"ms_draw_prob": draw_prob,
"ms_away_prob": away_prob,
# OU15
"ou15_pick": ou15_pick,
"ou15_actual": actuals["ou15_actual"],
"ou15_conf": ou15_conf,
"ou15_conf_band": confidence_band(ou15_conf),
"ou15_correct": ou15_correct,
"ou15_over_prob": over15_prob,
# OU25
"ou25_pick": ou25_pick,
"ou25_actual": actuals["ou25_actual"],
"ou25_conf": ou25_conf,
"ou25_conf_band": confidence_band(ou25_conf),
"ou25_correct": ou25_correct,
"ou25_over_prob": over25_prob,
# BTTS
"btts_pick": btts_pick,
"btts_actual": actuals["btts_actual"],
"btts_conf": btts_conf,
"btts_conf_band": confidence_band(btts_conf),
"btts_correct": btts_correct,
"btts_yes_prob": btts_yes_prob,
})
processed += 1
if processed % PROGRESS_INTERVAL == 0:
elapsed = time.time() - t_start
print(f" [Progress] {processed}/{len(matches_df)} matches | "
f"skipped={skipped} | elapsed={elapsed:.1f}s")
print(f"\n[Loop] Done. Processed={processed}, Skipped={skipped}")
if not records:
print("[ERROR] No records to analyze. Exiting.")
return
results_df = pd.DataFrame(records)
# ------------------------------------------------------------------
# Step 5 — Accuracy report
# ------------------------------------------------------------------
print("\n" + "=" * 70)
print(" ACCURACY REPORT")
print("=" * 70)
markets = [
("MS", "ms_correct", "ms_conf", "ms_conf_band", "ms_pick"),
("OU15", "ou15_correct", "ou15_conf", "ou15_conf_band", "ou15_pick"),
("OU25", "ou25_correct", "ou25_conf", "ou25_conf_band", "ou25_pick"),
("BTTS", "btts_correct", "btts_conf", "btts_conf_band", "btts_pick"),
]
summary: Dict[str, Any] = {
"generated_at": datetime.utcnow().isoformat(),
"matches_processed": processed,
"matches_skipped": skipped,
"markets": {},
}
for market_label, correct_col, conf_col, band_col, pick_col in markets:
print(f"\n--- {market_label} ---")
sub = results_df[[correct_col, conf_col, band_col, pick_col, "league_group"]].copy()
total = len(sub)
overall_acc = sub[correct_col].mean() * 100
print(f" Overall accuracy: {overall_acc:.1f}% ({sub[correct_col].sum()}/{total})")
market_summary = {
"overall_accuracy": round(overall_acc, 2),
"total_matches": total,
"by_confidence_band": {},
"by_league": {},
"by_pick_direction": {},
}
# By confidence band
print(f" By confidence band:")
bands = ["<50%", "50-65%", "65-75%", "75%+"]
for band in bands:
mask = sub[band_col] == band
n = mask.sum()
if n > 0:
acc = sub.loc[mask, correct_col].mean() * 100
mean_conf = sub.loc[mask, conf_col].mean() * 100
print(f" {band:8s}: {acc:5.1f}% acc | {n:4d} matches | "
f"mean_conf={mean_conf:.1f}%")
market_summary["by_confidence_band"][band] = {
"accuracy": round(acc, 2),
"count": int(n),
"mean_confidence": round(mean_conf, 2),
}
# By league group
print(f" By league:")
league_groups = list(results_df["league_group"].unique())
# Sort: named leagues first, then Other
named = sorted([g for g in league_groups if g != "Other"])
ordered = named + (["Other"] if "Other" in league_groups else [])
for lg in ordered:
mask = sub["league_group"] == lg
n = mask.sum()
if n > 0:
acc = sub.loc[mask, correct_col].mean() * 100
print(f" {lg[:20]:20s}: {acc:5.1f}% ({n} matches)")
market_summary["by_league"][lg] = {
"accuracy": round(acc, 2),
"count": int(n),
}
# By pick direction
print(f" By pick direction:")
for pick_val in sorted(sub[pick_col].unique()):
mask = sub[pick_col] == pick_val
n = mask.sum()
if n > 0:
acc = sub.loc[mask, correct_col].mean() * 100
mean_conf = sub.loc[mask, conf_col].mean() * 100
print(f" {pick_val:8s}: {acc:5.1f}% acc | {n:4d} matches | "
f"mean_conf={mean_conf:.1f}%")
market_summary["by_pick_direction"][pick_val] = {
"accuracy": round(acc, 2),
"count": int(n),
"mean_confidence": round(mean_conf, 2),
}
summary["markets"][market_label] = market_summary
# ------------------------------------------------------------------
# Step 6 — Train calibration models
# ------------------------------------------------------------------
print("\n" + "=" * 70)
print(" CALIBRATION TRAINING")
print("=" * 70)
calibrator = Calibrator()
# Market config: market_key -> (label for prob, label for actual binary)
calib_market_map = {
"ms_home": "ms_home",
"ms_draw": "ms_draw",
"ms_away": "ms_away",
"ou15": "ou15",
"ou25": "ou25",
"btts": "btts",
}
calibration_results: Dict[str, Dict] = {}
for market_key in calib_market_map:
pairs = calib_data[market_key]
if len(pairs) < 100:
print(f"[Calib] {market_key}: only {len(pairs)} samples — skipping.")
continue
probs = np.array([p for p, _ in pairs])
actuals_bin = np.array([a for _, a in pairs])
# Build a tiny DataFrame to use Calibrator.train_calibration
calib_df = pd.DataFrame({
"prob": probs,
"actual": actuals_bin,
})
metrics = calibrator.train_calibration(
df=calib_df,
market=market_key,
prob_col="prob",
actual_col="actual",
min_samples=100,
save=True,
)
calibration_results[market_key] = metrics.to_dict()
print(f" [Calib] {market_key}: Brier={metrics.brier_score:.4f} | "
f"ECE={metrics.calibration_error:.4f} | n={metrics.sample_count}")
# ------------------------------------------------------------------
# Step 7 — Save results
# ------------------------------------------------------------------
output_path = os.path.join(REPORTS_DIR, "backtest_results.json")
full_report = {
**summary,
"calibration": calibration_results,
"runtime_seconds": round(time.time() - t_start, 1),
}
with open(output_path, "w") as f:
json.dump(full_report, f, indent=2)
print(f"\n[Step 7] Report saved to {output_path}")
# ------------------------------------------------------------------
# Final summary table
# ------------------------------------------------------------------
print("\n" + "=" * 70)
print(" FINAL SUMMARY TABLE")
print("=" * 70)
print(f"{'Market':<8} {'Overall Acc':>12} {'Matches':>8} "
f"{'Best Band (acc)':>18}")
print("-" * 70)
for market_label, _, _, _, _ in markets:
ms = summary["markets"].get(market_label, {})
overall = ms.get("overall_accuracy", 0)
total_m = ms.get("total_matches", 0)
bands_d = ms.get("by_confidence_band", {})
# Find best accuracy band with >= 50 matches
best_band = "-"
best_acc = 0.0
for band, bdata in bands_d.items():
if bdata["count"] >= 50 and bdata["accuracy"] > best_acc:
best_acc = bdata["accuracy"]
best_band = f"{band} ({best_acc:.1f}%)"
print(f"{market_label:<8} {overall:>11.1f}% {total_m:>8d} {best_band:>18s}")
elapsed_total = time.time() - t_start
print(f"\nTotal runtime: {elapsed_total:.1f}s")
print("=" * 70)
if __name__ == "__main__":
main()