""" Calibration Backfill Script ============================ Runs V25 model against historical matches (using pre-computed ai_features + odds) to generate calibration training data, then trains isotonic calibration models. Usage: python ai-engine/scripts/backfill_calibration.py python ai-engine/scripts/backfill_calibration.py --limit 5000 python ai-engine/scripts/backfill_calibration.py --min-samples 50 """ import argparse import json import os import sys import time from typing import Any, Dict, List, Optional, Tuple import numpy as np import pandas as pd import psycopg2 from psycopg2.extras import RealDictCursor from dotenv import load_dotenv AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.insert(0, AI_ENGINE_DIR) from models.v25_ensemble import V25Predictor from models.calibration import get_calibrator load_dotenv() def _normalize_pick(pick) -> str: return str(pick or "").strip().casefold() def resolve_actual(market, pick, score_home, score_away, ht_home, ht_away): if score_home is None or score_away is None: return None market = (market or "").upper() p = _normalize_pick(pick) total = score_home + score_away ht_total = (ht_home or 0) + (ht_away or 0) if ht_home is not None else None if market == "MS": if p == "1": return int(score_home > score_away) if p in {"x", "0"}: return int(score_home == score_away) if p == "2": return int(score_away > score_home) return None if market in {"OU15", "OU25", "OU35"}: line = {"OU15": 1.5, "OU25": 2.5, "OU35": 3.5}[market] if "over" in p or "üst" in p or "ust" in p: return int(total > line) if "under" in p or "alt" in p: return int(total < line) return None if market == "BTTS": both = score_home > 0 and score_away > 0 if "yes" in p or "var" in p: return int(both) if "no" in p or "yok" in p: return int(not both) return None if market == "HT": if ht_home is None or ht_away is None: return None if p == "1": return int(ht_home > ht_away) if p in {"x", "0"}: return int(ht_home == ht_away) if p == "2": return int(ht_away > ht_home) return None if market == "HTFT": if ht_home is None or ht_away is None or "/" not in p: return None ht_p, ft_p = p.split("/") ht_actual = "1" if ht_home > ht_away else "2" if ht_away > ht_home else "x" ft_actual = "1" if score_home > score_away else "2" if score_away > score_home else "x" return int(ht_p.strip() == ht_actual and ft_p.strip() == ft_actual) if market == "DC": norm = p.replace("-", "").upper() if norm == "1X": return int(score_home >= score_away) if norm == "X2": return int(score_away >= score_home) if norm == "12": return int(score_home != score_away) return None return None def calibrator_key(market, pick): m = (market or "").upper() p = _normalize_pick(pick) if m == "MS": if p == "1": return "ms_home" if p in {"x", "0"}: return "ms_draw" if p == "2": return "ms_away" return None if m == "DC": return "dc" if m == "OU15" and ("over" in p or "üst" in p): return "ou15" if m == "OU25" and ("over" in p or "üst" in p): return "ou25" if m == "OU35" and ("over" in p or "üst" in p): return "ou35" if m == "BTTS" and ("yes" in p or "var" in p): return "btts" if m == "HT": if p == "1": return "ht_home" if p in {"x", "0"}: return "ht_draw" if p == "2": return "ht_away" return None if m == "HTFT": return "ht_ft" return None def get_conn(): db_url = os.getenv("DATABASE_URL", "") if "?schema=" in db_url: db_url = db_url.split("?schema=")[0] if not db_url: raise ValueError("DATABASE_URL not set") return psycopg2.connect(db_url, cursor_factory=RealDictCursor) ODD_CAT_MAP = { "maç sonucu": {"1": "ms_h", "0": "ms_d", "x": "ms_d", "2": "ms_a"}, "1. yarı sonucu": {"1": "ht_ms_h", "0": "ht_ms_d", "x": "ht_ms_d", "2": "ht_ms_a"}, } ODD_CAT_KEYWORD_MAP = { "karşılıklı gol": {"var": "btts_y", "yok": "btts_n"}, "0,5 alt/üst": {"alt": "ou05_u", "üst": "ou05_o"}, "1,5 alt/üst": {"alt": "ou15_u", "üst": "ou15_o"}, "2,5 alt/üst": {"alt": "ou25_u", "üst": "ou25_o"}, "3,5 alt/üst": {"alt": "ou35_u", "üst": "ou35_o"}, "ilk yarı 0,5 alt/üst": {"alt": "ht_ou05_u", "üst": "ht_ou05_o"}, "ilk yarı 1,5 alt/üst": {"alt": "ht_ou15_u", "üst": "ht_ou15_o"}, } def load_matches(cur, limit: int) -> List[Dict]: cur.execute(""" SELECT m.id, m.score_home, m.score_away, m.ht_score_home, m.ht_score_away FROM matches m JOIN football_ai_features f ON f.match_id = m.id WHERE m.status = 'FT' AND m.sport = 'football' AND m.score_home IS NOT NULL AND m.score_away IS NOT NULL ORDER BY m.mst_utc DESC LIMIT %s """, (limit,)) return cur.fetchall() def load_ai_features_batch(cur, match_ids: List[str]) -> Dict[str, Dict]: if not match_ids: return {} ph = ",".join(["%s"] * len(match_ids)) cur.execute(f""" SELECT match_id, home_elo AS home_overall_elo, away_elo AS away_overall_elo, elo_diff, home_home_elo, away_away_elo, home_form_elo, away_form_elo, (home_form_elo - away_form_elo) AS form_elo_diff, home_goals_avg_5 AS home_goals_avg, home_conceded_avg_5 AS home_conceded_avg, away_goals_avg_5 AS away_goals_avg, away_conceded_avg_5 AS away_conceded_avg, home_clean_sheet_rate, away_clean_sheet_rate, home_scoring_rate, away_scoring_rate, home_win_streak AS home_winning_streak, away_win_streak AS away_winning_streak, 0 AS home_unbeaten_streak, 0 AS away_unbeaten_streak, h2h_total AS h2h_total_matches, h2h_home_win_rate, (1.0 - h2h_home_win_rate - 0.33) AS h2h_draw_rate, h2h_avg_goals, h2h_btts_rate, h2h_over25_rate, home_avg_possession, away_avg_possession, home_avg_shots_on_target, away_avg_shots_on_target, home_shot_conversion, away_shot_conversion, 0.0 AS home_avg_corners, 0.0 AS away_avg_corners, implied_home, implied_draw, implied_away, league_avg_goals, 0.0 AS league_zero_goal_rate, 0.0 AS home_xga, 0.0 AS away_xga, 0.0 AS upset_atmosphere, 0.0 AS upset_motivation, 0.0 AS upset_fatigue, 0.0 AS upset_potential, referee_home_bias, referee_avg_goals, referee_avg_cards AS referee_cards_total, 0.0 AS referee_avg_yellow, 0.0 AS referee_experience, 0.0 AS home_momentum_score, 0.0 AS away_momentum_score, 0.0 AS momentum_diff, 0.0 AS home_squad_quality, 0.0 AS away_squad_quality, 0.0 AS squad_diff, 0 AS home_key_players, 0 AS away_key_players, missing_players_impact AS home_missing_impact, 0.0 AS away_missing_impact, home_goals_avg_5 AS home_goals_form, away_goals_avg_5 AS away_goals_form FROM football_ai_features WHERE match_id IN ({ph}) """, match_ids) return {str(row["match_id"]): dict(row) for row in cur.fetchall()} def load_odds_batch(cur, match_ids: List[str]) -> Dict[str, Dict[str, float]]: if not match_ids: return {} ph = ",".join(["%s"] * len(match_ids)) cur.execute(f""" SELECT oc.match_id, oc.name AS cat_name, os.name AS sel_name, os.odd_value FROM odd_selections os JOIN odd_categories oc ON os.odd_category_db_id = oc.db_id WHERE oc.match_id IN ({ph}) """, match_ids) odds: Dict[str, Dict[str, float]] = {} for row in cur.fetchall(): mid = str(row["match_id"]) cat = (row["cat_name"] or "").lower().strip() sel = (row["sel_name"] or "").strip() val = float(row["odd_value"]) if row["odd_value"] else 0 if val <= 0: continue if mid not in odds: odds[mid] = {} if cat in ODD_CAT_MAP: key = ODD_CAT_MAP[cat].get(sel.lower()) if key: odds[mid][key] = val else: for cat_pattern, kw_map in ODD_CAT_KEYWORD_MAP.items(): if cat == cat_pattern: for keyword, key in kw_map.items(): if keyword in sel.lower(): odds[mid][key] = val break return odds MARKETS_TO_PREDICT = [ ("MS", "1", lambda p: p[0]), ("MS", "X", lambda p: p[1]), ("MS", "2", lambda p: p[2]), ("OU25", "Over 2.5", lambda p: p[0]), ("BTTS", "Yes", lambda p: p[0]), ("OU15", "Over 1.5", lambda p: p[0]), ("OU35", "Over 3.5", lambda p: p[0]), ("HT", "1", lambda p: p[0]), ("HT", "X", lambda p: p[1]), ("HT", "2", lambda p: p[2]), ] def run_backfill(args): print("=" * 70) print("CALIBRATION BACKFILL") print("=" * 70) conn = get_conn() cur = conn.cursor(cursor_factory=RealDictCursor) t0 = time.time() print(f"Loading matches (limit={args.limit})...") matches = load_matches(cur, args.limit) print(f" Found {len(matches)} finished matches with ai_features") match_ids = [str(m["id"]) for m in matches] match_map = {str(m["id"]): m for m in matches} print("Loading ai_features...") features_map = load_ai_features_batch(cur, match_ids) print(f" Loaded features for {len(features_map)} matches") print("Loading odds...") odds_map = load_odds_batch(cur, match_ids) print(f" Loaded odds for {len(odds_map)} matches") print(f"Data loading: {time.time() - t0:.1f}s") print("\nLoading V25 model...") predictor = V25Predictor() predictor.load_models() feature_cols = predictor.FEATURE_COLS samples: List[Dict[str, Any]] = [] skipped = 0 processed = 0 print(f"\nRunning predictions on {len(match_ids)} matches...") t1 = time.time() for i, mid in enumerate(match_ids): if mid not in features_map: skipped += 1 continue feat_row = features_map[mid] odds_row = odds_map.get(mid, {}) match_row = match_map[mid] feat_dict = {} for col in feature_cols: if col in feat_row and feat_row[col] is not None: feat_dict[col] = float(feat_row[col]) elif col.startswith("odds_") and not col.endswith("_present"): odds_key = col.replace("odds_", "") feat_dict[col] = float(odds_row.get(odds_key, 0)) elif col.endswith("_present"): base = col.replace("_present", "") odds_key = base.replace("odds_", "") feat_dict[col] = 1.0 if odds_row.get(odds_key, 0) > 0 else 0.0 else: feat_dict[col] = 0.0 if odds_row.get("ms_h", 0) > 0: feat_dict["odds_ms_h"] = odds_row["ms_h"] if odds_row.get("ms_d", 0) > 0: feat_dict["odds_ms_d"] = odds_row["ms_d"] if odds_row.get("ms_a", 0) > 0: feat_dict["odds_ms_a"] = odds_row["ms_a"] ms_h = feat_dict.get("odds_ms_h", 0) ms_d = feat_dict.get("odds_ms_d", 0) ms_a = feat_dict.get("odds_ms_a", 0) if ms_h > 0 and ms_d > 0 and ms_a > 0: raw_sum = 1/ms_h + 1/ms_d + 1/ms_a feat_dict["implied_home"] = (1/ms_h) / raw_sum feat_dict["implied_draw"] = (1/ms_d) / raw_sum feat_dict["implied_away"] = (1/ms_a) / raw_sum sh = match_row["score_home"] sa = match_row["score_away"] ht_h = match_row.get("ht_score_home") ht_a = match_row.get("ht_score_away") try: X = pd.DataFrame([{c: feat_dict.get(c, 0.0) for c in feature_cols}]) for market_name, model_key, market_list in [ ("ms", "ms", ["MS"]), ("ou25", "ou25", ["OU25"]), ("btts", "btts", ["BTTS"]), ("ou15", "ou15", ["OU15"]), ("ou35", "ou35", ["OU35"]), ("ht_result", "ht_result", ["HT"]), ]: if model_key not in predictor.models: continue probs = predictor.predict_market(model_key, feat_dict) if probs is None: continue if model_key == "ms": for pick, prob in [("1", probs[0]), ("X", probs[1]), ("2", probs[2])]: actual = resolve_actual("MS", pick, sh, sa, ht_h, ht_a) key = calibrator_key("MS", pick) if actual is not None and key: samples.append({ "match_id": mid, "market": "MS", "pick": pick, "key": key, "raw_prob": float(prob), "actual": int(actual), }) elif model_key == "ht_result": if ht_h is None or ht_a is None: continue for pick, prob in [("1", probs[0]), ("X", probs[1]), ("2", probs[2])]: actual = resolve_actual("HT", pick, sh, sa, ht_h, ht_a) key = calibrator_key("HT", pick) if actual is not None and key: samples.append({ "match_id": mid, "market": "HT", "pick": pick, "key": key, "raw_prob": float(prob), "actual": int(actual), }) elif model_key in ("ou25", "ou15", "ou35"): market_upper = model_key.upper() over_prob = float(probs[0]) if len(probs) > 0 else 0.5 pick = f"Over" actual = resolve_actual(market_upper, "Over", sh, sa, ht_h, ht_a) key = calibrator_key(market_upper, "Over") if actual is not None and key: samples.append({ "match_id": mid, "market": market_upper, "pick": pick, "key": key, "raw_prob": over_prob, "actual": int(actual), }) elif model_key == "btts": yes_prob = float(probs[0]) if len(probs) > 0 else 0.5 actual = resolve_actual("BTTS", "Yes", sh, sa, ht_h, ht_a) key = calibrator_key("BTTS", "Yes") if actual is not None and key: samples.append({ "match_id": mid, "market": "BTTS", "pick": "Yes", "key": key, "raw_prob": yes_prob, "actual": int(actual), }) processed += 1 except Exception as e: skipped += 1 if skipped <= 5: print(f" Error on {mid}: {e}") if (i + 1) % 5000 == 0: elapsed = time.time() - t1 rate = (i + 1) / elapsed print(f" Processed {i+1}/{len(match_ids)} ({rate:.0f} matches/s)") elapsed = time.time() - t1 print(f"\nPrediction complete: {processed} matches, {skipped} skipped, {elapsed:.1f}s") if not samples: print("No calibration samples generated!") cur.close() conn.close() return df = pd.DataFrame(samples) print(f"\nTotal calibration samples: {len(df)}") print(f"Unique matches: {df['match_id'].nunique()}") print(f"\nPer-key counts:") for key, count in df["key"].value_counts().items(): print(f" {key:<14} {count}") print(f"\nTraining isotonic calibration models (min_samples={args.min_samples})...") calibrator = get_calibrator() results: Dict[str, Any] = {} keys = sorted(df["key"].unique()) for key in keys: sub = df[df["key"] == key].copy() sub = sub.drop_duplicates(subset=["match_id", "key"], keep="first") sub = sub.dropna(subset=["raw_prob", "actual"]) sub = sub[(sub["raw_prob"] > 0.0) & (sub["raw_prob"] < 1.0)] n = len(sub) if n < args.min_samples: results[key] = {"status": "skipped", "samples": n} continue metrics = calibrator.train_calibration( df=sub, market=key, prob_col="raw_prob", actual_col="actual", min_samples=args.min_samples, save=True, ) results[key] = { "status": "trained", "samples": metrics.sample_count, "brier": round(metrics.brier_score, 4), "ece": round(metrics.calibration_error, 4), "mean_predicted": round(metrics.mean_predicted, 4), "mean_actual": round(metrics.mean_actual, 4), } print("\n" + "=" * 70) print("CALIBRATION RESULTS") print("=" * 70) print(f"{'market':<14} {'status':<10} {'n':<8} {'brier':<9} {'ece':<8} {'pred_avg':<9} {'actual_avg'}") print("-" * 70) for key, info in sorted(results.items()): if info["status"] == "trained": print( f"{key:<14} {'OK':<10} {info['samples']:<8} " f"{info['brier']:<9.4f} {info['ece']:<8.4f} " f"{info['mean_predicted']:<9.4f} {info['mean_actual']}" ) else: print(f"{key:<14} {'SKIP':<10} {info['samples']:<8}") print("=" * 70) total_time = time.time() - t0 print(f"\nTotal time: {total_time:.1f}s") print(f"Calibration models saved to: {os.path.join(AI_ENGINE_DIR, 'models', 'calibration')}/") cur.close() conn.close() def main(): parser = argparse.ArgumentParser(description="Backfill calibration from historical matches") parser.add_argument("--limit", type=int, default=50000, help="Max matches to process (default: 50000)") parser.add_argument("--min-samples", type=int, default=100, help="Min samples per market for calibration (default: 100)") args = parser.parse_args() run_backfill(args) if __name__ == "__main__": main()