511 lines
19 KiB
Python
511 lines
19 KiB
Python
"""
|
||
Calibration Backfill Script
|
||
============================
|
||
Runs V25 model against historical matches (using pre-computed ai_features + odds)
|
||
to generate calibration training data, then trains isotonic calibration models.
|
||
|
||
Usage:
|
||
python ai-engine/scripts/backfill_calibration.py
|
||
python ai-engine/scripts/backfill_calibration.py --limit 5000
|
||
python ai-engine/scripts/backfill_calibration.py --min-samples 50
|
||
"""
|
||
|
||
import argparse
|
||
import json
|
||
import os
|
||
import sys
|
||
import time
|
||
from typing import Any, Dict, List, Optional, Tuple
|
||
|
||
import numpy as np
|
||
import pandas as pd
|
||
import psycopg2
|
||
from psycopg2.extras import RealDictCursor
|
||
from dotenv import load_dotenv
|
||
|
||
AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||
sys.path.insert(0, AI_ENGINE_DIR)
|
||
|
||
from models.v25_ensemble import V25Predictor
|
||
from models.calibration import get_calibrator
|
||
|
||
load_dotenv()
|
||
|
||
|
||
def _normalize_pick(pick) -> str:
|
||
return str(pick or "").strip().casefold()
|
||
|
||
|
||
def resolve_actual(market, pick, score_home, score_away, ht_home, ht_away):
|
||
if score_home is None or score_away is None:
|
||
return None
|
||
market = (market or "").upper()
|
||
p = _normalize_pick(pick)
|
||
total = score_home + score_away
|
||
ht_total = (ht_home or 0) + (ht_away or 0) if ht_home is not None else None
|
||
|
||
if market == "MS":
|
||
if p == "1": return int(score_home > score_away)
|
||
if p in {"x", "0"}: return int(score_home == score_away)
|
||
if p == "2": return int(score_away > score_home)
|
||
return None
|
||
if market in {"OU15", "OU25", "OU35"}:
|
||
line = {"OU15": 1.5, "OU25": 2.5, "OU35": 3.5}[market]
|
||
if "over" in p or "üst" in p or "ust" in p: return int(total > line)
|
||
if "under" in p or "alt" in p: return int(total < line)
|
||
return None
|
||
if market == "BTTS":
|
||
both = score_home > 0 and score_away > 0
|
||
if "yes" in p or "var" in p: return int(both)
|
||
if "no" in p or "yok" in p: return int(not both)
|
||
return None
|
||
if market == "HT":
|
||
if ht_home is None or ht_away is None: return None
|
||
if p == "1": return int(ht_home > ht_away)
|
||
if p in {"x", "0"}: return int(ht_home == ht_away)
|
||
if p == "2": return int(ht_away > ht_home)
|
||
return None
|
||
if market == "HTFT":
|
||
if ht_home is None or ht_away is None or "/" not in p: return None
|
||
ht_p, ft_p = p.split("/")
|
||
ht_actual = "1" if ht_home > ht_away else "2" if ht_away > ht_home else "x"
|
||
ft_actual = "1" if score_home > score_away else "2" if score_away > score_home else "x"
|
||
return int(ht_p.strip() == ht_actual and ft_p.strip() == ft_actual)
|
||
if market == "DC":
|
||
norm = p.replace("-", "").upper()
|
||
if norm == "1X": return int(score_home >= score_away)
|
||
if norm == "X2": return int(score_away >= score_home)
|
||
if norm == "12": return int(score_home != score_away)
|
||
return None
|
||
return None
|
||
|
||
|
||
def calibrator_key(market, pick):
|
||
m = (market or "").upper()
|
||
p = _normalize_pick(pick)
|
||
if m == "MS":
|
||
if p == "1": return "ms_home"
|
||
if p in {"x", "0"}: return "ms_draw"
|
||
if p == "2": return "ms_away"
|
||
return None
|
||
if m == "DC": return "dc"
|
||
if m == "OU15" and ("over" in p or "üst" in p): return "ou15"
|
||
if m == "OU25" and ("over" in p or "üst" in p): return "ou25"
|
||
if m == "OU35" and ("over" in p or "üst" in p): return "ou35"
|
||
if m == "BTTS" and ("yes" in p or "var" in p): return "btts"
|
||
if m == "HT":
|
||
if p == "1": return "ht_home"
|
||
if p in {"x", "0"}: return "ht_draw"
|
||
if p == "2": return "ht_away"
|
||
return None
|
||
if m == "HTFT": return "ht_ft"
|
||
return None
|
||
|
||
|
||
def get_conn():
|
||
db_url = os.getenv("DATABASE_URL", "")
|
||
if "?schema=" in db_url:
|
||
db_url = db_url.split("?schema=")[0]
|
||
if not db_url:
|
||
raise ValueError("DATABASE_URL not set")
|
||
return psycopg2.connect(db_url, cursor_factory=RealDictCursor)
|
||
|
||
|
||
ODD_CAT_MAP = {
|
||
"maç sonucu": {"1": "ms_h", "0": "ms_d", "x": "ms_d", "2": "ms_a"},
|
||
"1. yarı sonucu": {"1": "ht_ms_h", "0": "ht_ms_d", "x": "ht_ms_d", "2": "ht_ms_a"},
|
||
}
|
||
|
||
ODD_CAT_KEYWORD_MAP = {
|
||
"karşılıklı gol": {"var": "btts_y", "yok": "btts_n"},
|
||
"0,5 alt/üst": {"alt": "ou05_u", "üst": "ou05_o"},
|
||
"1,5 alt/üst": {"alt": "ou15_u", "üst": "ou15_o"},
|
||
"2,5 alt/üst": {"alt": "ou25_u", "üst": "ou25_o"},
|
||
"3,5 alt/üst": {"alt": "ou35_u", "üst": "ou35_o"},
|
||
"ilk yarı 0,5 alt/üst": {"alt": "ht_ou05_u", "üst": "ht_ou05_o"},
|
||
"ilk yarı 1,5 alt/üst": {"alt": "ht_ou15_u", "üst": "ht_ou15_o"},
|
||
}
|
||
|
||
|
||
def load_matches(cur, limit: int) -> List[Dict]:
|
||
cur.execute("""
|
||
SELECT m.id, m.score_home, m.score_away,
|
||
m.ht_score_home, m.ht_score_away
|
||
FROM matches m
|
||
JOIN football_ai_features f ON f.match_id = m.id
|
||
WHERE m.status = 'FT'
|
||
AND m.sport = 'football'
|
||
AND m.score_home IS NOT NULL
|
||
AND m.score_away IS NOT NULL
|
||
ORDER BY m.mst_utc DESC
|
||
LIMIT %s
|
||
""", (limit,))
|
||
return cur.fetchall()
|
||
|
||
|
||
def load_ai_features_batch(cur, match_ids: List[str]) -> Dict[str, Dict]:
|
||
if not match_ids:
|
||
return {}
|
||
ph = ",".join(["%s"] * len(match_ids))
|
||
cur.execute(f"""
|
||
SELECT match_id,
|
||
home_elo AS home_overall_elo,
|
||
away_elo AS away_overall_elo,
|
||
elo_diff,
|
||
home_home_elo, away_away_elo,
|
||
home_form_elo, away_form_elo,
|
||
(home_form_elo - away_form_elo) AS form_elo_diff,
|
||
home_goals_avg_5 AS home_goals_avg,
|
||
home_conceded_avg_5 AS home_conceded_avg,
|
||
away_goals_avg_5 AS away_goals_avg,
|
||
away_conceded_avg_5 AS away_conceded_avg,
|
||
home_clean_sheet_rate, away_clean_sheet_rate,
|
||
home_scoring_rate, away_scoring_rate,
|
||
home_win_streak AS home_winning_streak,
|
||
away_win_streak AS away_winning_streak,
|
||
0 AS home_unbeaten_streak,
|
||
0 AS away_unbeaten_streak,
|
||
h2h_total AS h2h_total_matches,
|
||
h2h_home_win_rate,
|
||
(1.0 - h2h_home_win_rate - 0.33) AS h2h_draw_rate,
|
||
h2h_avg_goals,
|
||
h2h_btts_rate, h2h_over25_rate,
|
||
home_avg_possession, away_avg_possession,
|
||
home_avg_shots_on_target, away_avg_shots_on_target,
|
||
home_shot_conversion, away_shot_conversion,
|
||
0.0 AS home_avg_corners, 0.0 AS away_avg_corners,
|
||
implied_home, implied_draw, implied_away,
|
||
league_avg_goals,
|
||
0.0 AS league_zero_goal_rate,
|
||
0.0 AS home_xga, 0.0 AS away_xga,
|
||
0.0 AS upset_atmosphere, 0.0 AS upset_motivation,
|
||
0.0 AS upset_fatigue, 0.0 AS upset_potential,
|
||
referee_home_bias, referee_avg_goals,
|
||
referee_avg_cards AS referee_cards_total,
|
||
0.0 AS referee_avg_yellow,
|
||
0.0 AS referee_experience,
|
||
0.0 AS home_momentum_score, 0.0 AS away_momentum_score,
|
||
0.0 AS momentum_diff,
|
||
0.0 AS home_squad_quality, 0.0 AS away_squad_quality,
|
||
0.0 AS squad_diff,
|
||
0 AS home_key_players, 0 AS away_key_players,
|
||
missing_players_impact AS home_missing_impact,
|
||
0.0 AS away_missing_impact,
|
||
home_goals_avg_5 AS home_goals_form,
|
||
away_goals_avg_5 AS away_goals_form
|
||
FROM football_ai_features
|
||
WHERE match_id IN ({ph})
|
||
""", match_ids)
|
||
return {str(row["match_id"]): dict(row) for row in cur.fetchall()}
|
||
|
||
|
||
def load_odds_batch(cur, match_ids: List[str]) -> Dict[str, Dict[str, float]]:
|
||
if not match_ids:
|
||
return {}
|
||
ph = ",".join(["%s"] * len(match_ids))
|
||
cur.execute(f"""
|
||
SELECT oc.match_id, oc.name AS cat_name,
|
||
os.name AS sel_name, os.odd_value
|
||
FROM odd_selections os
|
||
JOIN odd_categories oc ON os.odd_category_db_id = oc.db_id
|
||
WHERE oc.match_id IN ({ph})
|
||
""", match_ids)
|
||
|
||
odds: Dict[str, Dict[str, float]] = {}
|
||
for row in cur.fetchall():
|
||
mid = str(row["match_id"])
|
||
cat = (row["cat_name"] or "").lower().strip()
|
||
sel = (row["sel_name"] or "").strip()
|
||
val = float(row["odd_value"]) if row["odd_value"] else 0
|
||
if val <= 0:
|
||
continue
|
||
if mid not in odds:
|
||
odds[mid] = {}
|
||
|
||
if cat in ODD_CAT_MAP:
|
||
key = ODD_CAT_MAP[cat].get(sel.lower())
|
||
if key:
|
||
odds[mid][key] = val
|
||
else:
|
||
for cat_pattern, kw_map in ODD_CAT_KEYWORD_MAP.items():
|
||
if cat == cat_pattern:
|
||
for keyword, key in kw_map.items():
|
||
if keyword in sel.lower():
|
||
odds[mid][key] = val
|
||
break
|
||
return odds
|
||
|
||
|
||
MARKETS_TO_PREDICT = [
|
||
("MS", "1", lambda p: p[0]),
|
||
("MS", "X", lambda p: p[1]),
|
||
("MS", "2", lambda p: p[2]),
|
||
("OU25", "Over 2.5", lambda p: p[0]),
|
||
("BTTS", "Yes", lambda p: p[0]),
|
||
("OU15", "Over 1.5", lambda p: p[0]),
|
||
("OU35", "Over 3.5", lambda p: p[0]),
|
||
("HT", "1", lambda p: p[0]),
|
||
("HT", "X", lambda p: p[1]),
|
||
("HT", "2", lambda p: p[2]),
|
||
]
|
||
|
||
|
||
def run_backfill(args):
|
||
print("=" * 70)
|
||
print("CALIBRATION BACKFILL")
|
||
print("=" * 70)
|
||
|
||
conn = get_conn()
|
||
cur = conn.cursor(cursor_factory=RealDictCursor)
|
||
|
||
t0 = time.time()
|
||
print(f"Loading matches (limit={args.limit})...")
|
||
matches = load_matches(cur, args.limit)
|
||
print(f" Found {len(matches)} finished matches with ai_features")
|
||
|
||
match_ids = [str(m["id"]) for m in matches]
|
||
match_map = {str(m["id"]): m for m in matches}
|
||
|
||
print("Loading ai_features...")
|
||
features_map = load_ai_features_batch(cur, match_ids)
|
||
print(f" Loaded features for {len(features_map)} matches")
|
||
|
||
print("Loading odds...")
|
||
odds_map = load_odds_batch(cur, match_ids)
|
||
print(f" Loaded odds for {len(odds_map)} matches")
|
||
|
||
print(f"Data loading: {time.time() - t0:.1f}s")
|
||
|
||
print("\nLoading V25 model...")
|
||
predictor = V25Predictor()
|
||
predictor.load_models()
|
||
|
||
feature_cols = predictor.FEATURE_COLS
|
||
|
||
samples: List[Dict[str, Any]] = []
|
||
skipped = 0
|
||
processed = 0
|
||
|
||
print(f"\nRunning predictions on {len(match_ids)} matches...")
|
||
t1 = time.time()
|
||
|
||
for i, mid in enumerate(match_ids):
|
||
if mid not in features_map:
|
||
skipped += 1
|
||
continue
|
||
|
||
feat_row = features_map[mid]
|
||
odds_row = odds_map.get(mid, {})
|
||
match_row = match_map[mid]
|
||
|
||
feat_dict = {}
|
||
for col in feature_cols:
|
||
if col in feat_row and feat_row[col] is not None:
|
||
feat_dict[col] = float(feat_row[col])
|
||
elif col.startswith("odds_") and not col.endswith("_present"):
|
||
odds_key = col.replace("odds_", "")
|
||
feat_dict[col] = float(odds_row.get(odds_key, 0))
|
||
elif col.endswith("_present"):
|
||
base = col.replace("_present", "")
|
||
odds_key = base.replace("odds_", "")
|
||
feat_dict[col] = 1.0 if odds_row.get(odds_key, 0) > 0 else 0.0
|
||
else:
|
||
feat_dict[col] = 0.0
|
||
|
||
if odds_row.get("ms_h", 0) > 0:
|
||
feat_dict["odds_ms_h"] = odds_row["ms_h"]
|
||
if odds_row.get("ms_d", 0) > 0:
|
||
feat_dict["odds_ms_d"] = odds_row["ms_d"]
|
||
if odds_row.get("ms_a", 0) > 0:
|
||
feat_dict["odds_ms_a"] = odds_row["ms_a"]
|
||
|
||
ms_h = feat_dict.get("odds_ms_h", 0)
|
||
ms_d = feat_dict.get("odds_ms_d", 0)
|
||
ms_a = feat_dict.get("odds_ms_a", 0)
|
||
if ms_h > 0 and ms_d > 0 and ms_a > 0:
|
||
raw_sum = 1/ms_h + 1/ms_d + 1/ms_a
|
||
feat_dict["implied_home"] = (1/ms_h) / raw_sum
|
||
feat_dict["implied_draw"] = (1/ms_d) / raw_sum
|
||
feat_dict["implied_away"] = (1/ms_a) / raw_sum
|
||
|
||
sh = match_row["score_home"]
|
||
sa = match_row["score_away"]
|
||
ht_h = match_row.get("ht_score_home")
|
||
ht_a = match_row.get("ht_score_away")
|
||
|
||
try:
|
||
X = pd.DataFrame([{c: feat_dict.get(c, 0.0) for c in feature_cols}])
|
||
|
||
for market_name, model_key, market_list in [
|
||
("ms", "ms", ["MS"]),
|
||
("ou25", "ou25", ["OU25"]),
|
||
("btts", "btts", ["BTTS"]),
|
||
("ou15", "ou15", ["OU15"]),
|
||
("ou35", "ou35", ["OU35"]),
|
||
("ht_result", "ht_result", ["HT"]),
|
||
]:
|
||
if model_key not in predictor.models:
|
||
continue
|
||
|
||
probs = predictor.predict_market(model_key, feat_dict)
|
||
if probs is None:
|
||
continue
|
||
|
||
if model_key == "ms":
|
||
for pick, prob in [("1", probs[0]), ("X", probs[1]), ("2", probs[2])]:
|
||
actual = resolve_actual("MS", pick, sh, sa, ht_h, ht_a)
|
||
key = calibrator_key("MS", pick)
|
||
if actual is not None and key:
|
||
samples.append({
|
||
"match_id": mid,
|
||
"market": "MS",
|
||
"pick": pick,
|
||
"key": key,
|
||
"raw_prob": float(prob),
|
||
"actual": int(actual),
|
||
})
|
||
|
||
elif model_key == "ht_result":
|
||
if ht_h is None or ht_a is None:
|
||
continue
|
||
for pick, prob in [("1", probs[0]), ("X", probs[1]), ("2", probs[2])]:
|
||
actual = resolve_actual("HT", pick, sh, sa, ht_h, ht_a)
|
||
key = calibrator_key("HT", pick)
|
||
if actual is not None and key:
|
||
samples.append({
|
||
"match_id": mid,
|
||
"market": "HT",
|
||
"pick": pick,
|
||
"key": key,
|
||
"raw_prob": float(prob),
|
||
"actual": int(actual),
|
||
})
|
||
|
||
elif model_key in ("ou25", "ou15", "ou35"):
|
||
market_upper = model_key.upper()
|
||
over_prob = float(probs[0]) if len(probs) > 0 else 0.5
|
||
pick = f"Over"
|
||
actual = resolve_actual(market_upper, "Over", sh, sa, ht_h, ht_a)
|
||
key = calibrator_key(market_upper, "Over")
|
||
if actual is not None and key:
|
||
samples.append({
|
||
"match_id": mid,
|
||
"market": market_upper,
|
||
"pick": pick,
|
||
"key": key,
|
||
"raw_prob": over_prob,
|
||
"actual": int(actual),
|
||
})
|
||
|
||
elif model_key == "btts":
|
||
yes_prob = float(probs[0]) if len(probs) > 0 else 0.5
|
||
actual = resolve_actual("BTTS", "Yes", sh, sa, ht_h, ht_a)
|
||
key = calibrator_key("BTTS", "Yes")
|
||
if actual is not None and key:
|
||
samples.append({
|
||
"match_id": mid,
|
||
"market": "BTTS",
|
||
"pick": "Yes",
|
||
"key": key,
|
||
"raw_prob": yes_prob,
|
||
"actual": int(actual),
|
||
})
|
||
|
||
processed += 1
|
||
|
||
except Exception as e:
|
||
skipped += 1
|
||
if skipped <= 5:
|
||
print(f" Error on {mid}: {e}")
|
||
|
||
if (i + 1) % 5000 == 0:
|
||
elapsed = time.time() - t1
|
||
rate = (i + 1) / elapsed
|
||
print(f" Processed {i+1}/{len(match_ids)} ({rate:.0f} matches/s)")
|
||
|
||
elapsed = time.time() - t1
|
||
print(f"\nPrediction complete: {processed} matches, {skipped} skipped, {elapsed:.1f}s")
|
||
|
||
if not samples:
|
||
print("No calibration samples generated!")
|
||
cur.close()
|
||
conn.close()
|
||
return
|
||
|
||
df = pd.DataFrame(samples)
|
||
print(f"\nTotal calibration samples: {len(df)}")
|
||
print(f"Unique matches: {df['match_id'].nunique()}")
|
||
print(f"\nPer-key counts:")
|
||
for key, count in df["key"].value_counts().items():
|
||
print(f" {key:<14} {count}")
|
||
|
||
print(f"\nTraining isotonic calibration models (min_samples={args.min_samples})...")
|
||
calibrator = get_calibrator()
|
||
results: Dict[str, Any] = {}
|
||
keys = sorted(df["key"].unique())
|
||
|
||
for key in keys:
|
||
sub = df[df["key"] == key].copy()
|
||
sub = sub.drop_duplicates(subset=["match_id", "key"], keep="first")
|
||
sub = sub.dropna(subset=["raw_prob", "actual"])
|
||
sub = sub[(sub["raw_prob"] > 0.0) & (sub["raw_prob"] < 1.0)]
|
||
|
||
n = len(sub)
|
||
if n < args.min_samples:
|
||
results[key] = {"status": "skipped", "samples": n}
|
||
continue
|
||
|
||
metrics = calibrator.train_calibration(
|
||
df=sub,
|
||
market=key,
|
||
prob_col="raw_prob",
|
||
actual_col="actual",
|
||
min_samples=args.min_samples,
|
||
save=True,
|
||
)
|
||
results[key] = {
|
||
"status": "trained",
|
||
"samples": metrics.sample_count,
|
||
"brier": round(metrics.brier_score, 4),
|
||
"ece": round(metrics.calibration_error, 4),
|
||
"mean_predicted": round(metrics.mean_predicted, 4),
|
||
"mean_actual": round(metrics.mean_actual, 4),
|
||
}
|
||
|
||
print("\n" + "=" * 70)
|
||
print("CALIBRATION RESULTS")
|
||
print("=" * 70)
|
||
print(f"{'market':<14} {'status':<10} {'n':<8} {'brier':<9} {'ece':<8} {'pred_avg':<9} {'actual_avg'}")
|
||
print("-" * 70)
|
||
for key, info in sorted(results.items()):
|
||
if info["status"] == "trained":
|
||
print(
|
||
f"{key:<14} {'OK':<10} {info['samples']:<8} "
|
||
f"{info['brier']:<9.4f} {info['ece']:<8.4f} "
|
||
f"{info['mean_predicted']:<9.4f} {info['mean_actual']}"
|
||
)
|
||
else:
|
||
print(f"{key:<14} {'SKIP':<10} {info['samples']:<8}")
|
||
print("=" * 70)
|
||
|
||
total_time = time.time() - t0
|
||
print(f"\nTotal time: {total_time:.1f}s")
|
||
print(f"Calibration models saved to: {os.path.join(AI_ENGINE_DIR, 'models', 'calibration')}/")
|
||
|
||
cur.close()
|
||
conn.close()
|
||
|
||
|
||
def main():
|
||
parser = argparse.ArgumentParser(description="Backfill calibration from historical matches")
|
||
parser.add_argument("--limit", type=int, default=50000,
|
||
help="Max matches to process (default: 50000)")
|
||
parser.add_argument("--min-samples", type=int, default=100,
|
||
help="Min samples per market for calibration (default: 100)")
|
||
args = parser.parse_args()
|
||
run_backfill(args)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|