iddaai-be/ai-engine/scripts/backfill_calibration.py

"""
Calibration Backfill Script
============================
Runs V25 model against historical matches (using pre-computed ai_features + odds)
to generate calibration training data, then trains isotonic calibration models.

Usage:
    python ai-engine/scripts/backfill_calibration.py
    python ai-engine/scripts/backfill_calibration.py --limit 5000
    python ai-engine/scripts/backfill_calibration.py --min-samples 50
"""

import argparse
import json
import os
import sys
import time
from typing import Any, Dict, List, Optional, Tuple

import numpy as np
import pandas as pd
import psycopg2
from psycopg2.extras import RealDictCursor
from dotenv import load_dotenv

AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, AI_ENGINE_DIR)

from models.v25_ensemble import V25Predictor
from models.calibration import get_calibrator

load_dotenv()


def _normalize_pick(pick) -> str:
    return str(pick or "").strip().casefold()


def resolve_actual(market, pick, score_home, score_away, ht_home, ht_away):
    if score_home is None or score_away is None:
        return None
    market = (market or "").upper()
    p = _normalize_pick(pick)
    total = score_home + score_away
    ht_total = (ht_home or 0) + (ht_away or 0) if ht_home is not None else None

    if market == "MS":
        if p == "1": return int(score_home > score_away)
        if p in {"x", "0"}: return int(score_home == score_away)
        if p == "2": return int(score_away > score_home)
        return None
    if market in {"OU15", "OU25", "OU35"}:
        line = {"OU15": 1.5, "OU25": 2.5, "OU35": 3.5}[market]
        if "over" in p or "üst" in p or "ust" in p: return int(total > line)
        if "under" in p or "alt" in p: return int(total < line)
        return None
    if market == "BTTS":
        both = score_home > 0 and score_away > 0
        if "yes" in p or "var" in p: return int(both)
        if "no" in p or "yok" in p: return int(not both)
        return None
    if market == "HT":
        if ht_home is None or ht_away is None: return None
        if p == "1": return int(ht_home > ht_away)
        if p in {"x", "0"}: return int(ht_home == ht_away)
        if p == "2": return int(ht_away > ht_home)
        return None
    if market == "HTFT":
        if ht_home is None or ht_away is None or "/" not in p: return None
        ht_p, ft_p = p.split("/")
        ht_actual = "1" if ht_home > ht_away else "2" if ht_away > ht_home else "x"
        ft_actual = "1" if score_home > score_away else "2" if score_away > score_home else "x"
        return int(ht_p.strip() == ht_actual and ft_p.strip() == ft_actual)
    if market == "DC":
        norm = p.replace("-", "").upper()
        if norm == "1X": return int(score_home >= score_away)
        if norm == "X2": return int(score_away >= score_home)
        if norm == "12": return int(score_home != score_away)
        return None
    return None


def calibrator_key(market, pick):
    m = (market or "").upper()
    p = _normalize_pick(pick)
    if m == "MS":
        if p == "1": return "ms_home"
        if p in {"x", "0"}: return "ms_draw"
        if p == "2": return "ms_away"
        return None
    if m == "DC": return "dc"
    if m == "OU15" and ("over" in p or "üst" in p): return "ou15"
    if m == "OU25" and ("over" in p or "üst" in p): return "ou25"
    if m == "OU35" and ("over" in p or "üst" in p): return "ou35"
    if m == "BTTS" and ("yes" in p or "var" in p): return "btts"
    if m == "HT":
        if p == "1": return "ht_home"
        if p in {"x", "0"}: return "ht_draw"
        if p == "2": return "ht_away"
        return None
    if m == "HTFT": return "ht_ft"
    return None


def get_conn():
    db_url = os.getenv("DATABASE_URL", "")
    if "?schema=" in db_url:
        db_url = db_url.split("?schema=")[0]
    if not db_url:
        raise ValueError("DATABASE_URL not set")
    return psycopg2.connect(db_url, cursor_factory=RealDictCursor)


ODD_CAT_MAP = {
    "maç sonucu": {"1": "ms_h", "0": "ms_d", "x": "ms_d", "2": "ms_a"},
    "1. yarı sonucu": {"1": "ht_ms_h", "0": "ht_ms_d", "x": "ht_ms_d", "2": "ht_ms_a"},
}

ODD_CAT_KEYWORD_MAP = {
    "karşılıklı gol": {"var": "btts_y", "yok": "btts_n"},
    "0,5 alt/üst": {"alt": "ou05_u", "üst": "ou05_o"},
    "1,5 alt/üst": {"alt": "ou15_u", "üst": "ou15_o"},
    "2,5 alt/üst": {"alt": "ou25_u", "üst": "ou25_o"},
    "3,5 alt/üst": {"alt": "ou35_u", "üst": "ou35_o"},
    "ilk yarı 0,5 alt/üst": {"alt": "ht_ou05_u", "üst": "ht_ou05_o"},
    "ilk yarı 1,5 alt/üst": {"alt": "ht_ou15_u", "üst": "ht_ou15_o"},
}


def load_matches(cur, limit: int) -> List[Dict]:
    cur.execute("""
        SELECT m.id, m.score_home, m.score_away,
               m.ht_score_home, m.ht_score_away
        FROM matches m
        JOIN football_ai_features f ON f.match_id = m.id
        WHERE m.status = 'FT'
          AND m.sport = 'football'
          AND m.score_home IS NOT NULL
          AND m.score_away IS NOT NULL
        ORDER BY m.mst_utc DESC
        LIMIT %s
    """, (limit,))
    return cur.fetchall()


def load_ai_features_batch(cur, match_ids: List[str]) -> Dict[str, Dict]:
    if not match_ids:
        return {}
    ph = ",".join(["%s"] * len(match_ids))
    cur.execute(f"""
        SELECT match_id,
               home_elo AS home_overall_elo,
               away_elo AS away_overall_elo,
               elo_diff,
               home_home_elo, away_away_elo,
               home_form_elo, away_form_elo,
               (home_form_elo - away_form_elo) AS form_elo_diff,
               home_goals_avg_5 AS home_goals_avg,
               home_conceded_avg_5 AS home_conceded_avg,
               away_goals_avg_5 AS away_goals_avg,
               away_conceded_avg_5 AS away_conceded_avg,
               home_clean_sheet_rate, away_clean_sheet_rate,
               home_scoring_rate, away_scoring_rate,
               home_win_streak AS home_winning_streak,
               away_win_streak AS away_winning_streak,
               0 AS home_unbeaten_streak,
               0 AS away_unbeaten_streak,
               h2h_total AS h2h_total_matches,
               h2h_home_win_rate,
               (1.0 - h2h_home_win_rate - 0.33) AS h2h_draw_rate,
               h2h_avg_goals,
               h2h_btts_rate, h2h_over25_rate,
               home_avg_possession, away_avg_possession,
               home_avg_shots_on_target, away_avg_shots_on_target,
               home_shot_conversion, away_shot_conversion,
               0.0 AS home_avg_corners, 0.0 AS away_avg_corners,
               implied_home, implied_draw, implied_away,
               league_avg_goals,
               0.0 AS league_zero_goal_rate,
               0.0 AS home_xga, 0.0 AS away_xga,
               0.0 AS upset_atmosphere, 0.0 AS upset_motivation,
               0.0 AS upset_fatigue, 0.0 AS upset_potential,
               referee_home_bias, referee_avg_goals,
               referee_avg_cards AS referee_cards_total,
               0.0 AS referee_avg_yellow,
               0.0 AS referee_experience,
               0.0 AS home_momentum_score, 0.0 AS away_momentum_score,
               0.0 AS momentum_diff,
               0.0 AS home_squad_quality, 0.0 AS away_squad_quality,
               0.0 AS squad_diff,
               0 AS home_key_players, 0 AS away_key_players,
               missing_players_impact AS home_missing_impact,
               0.0 AS away_missing_impact,
               home_goals_avg_5 AS home_goals_form,
               away_goals_avg_5 AS away_goals_form
        FROM football_ai_features
        WHERE match_id IN ({ph})
    """, match_ids)
    return {str(row["match_id"]): dict(row) for row in cur.fetchall()}


def load_odds_batch(cur, match_ids: List[str]) -> Dict[str, Dict[str, float]]:
    if not match_ids:
        return {}
    ph = ",".join(["%s"] * len(match_ids))
    cur.execute(f"""
        SELECT oc.match_id, oc.name AS cat_name,
               os.name AS sel_name, os.odd_value
        FROM odd_selections os
        JOIN odd_categories oc ON os.odd_category_db_id = oc.db_id
        WHERE oc.match_id IN ({ph})
    """, match_ids)

    odds: Dict[str, Dict[str, float]] = {}
    for row in cur.fetchall():
        mid = str(row["match_id"])
        cat = (row["cat_name"] or "").lower().strip()
        sel = (row["sel_name"] or "").strip()
        val = float(row["odd_value"]) if row["odd_value"] else 0
        if val <= 0:
            continue
        if mid not in odds:
            odds[mid] = {}

        if cat in ODD_CAT_MAP:
            key = ODD_CAT_MAP[cat].get(sel.lower())
            if key:
                odds[mid][key] = val
        else:
            for cat_pattern, kw_map in ODD_CAT_KEYWORD_MAP.items():
                if cat == cat_pattern:
                    for keyword, key in kw_map.items():
                        if keyword in sel.lower():
                            odds[mid][key] = val
                    break
    return odds


MARKETS_TO_PREDICT = [
    ("MS", "1", lambda p: p[0]),
    ("MS", "X", lambda p: p[1]),
    ("MS", "2", lambda p: p[2]),
    ("OU25", "Over 2.5", lambda p: p[0]),
    ("BTTS", "Yes", lambda p: p[0]),
    ("OU15", "Over 1.5", lambda p: p[0]),
    ("OU35", "Over 3.5", lambda p: p[0]),
    ("HT", "1", lambda p: p[0]),
    ("HT", "X", lambda p: p[1]),
    ("HT", "2", lambda p: p[2]),
]


def run_backfill(args):
    print("=" * 70)
    print("CALIBRATION BACKFILL")
    print("=" * 70)

    conn = get_conn()
    cur = conn.cursor(cursor_factory=RealDictCursor)

    t0 = time.time()
    print(f"Loading matches (limit={args.limit})...")
    matches = load_matches(cur, args.limit)
    print(f"  Found {len(matches)} finished matches with ai_features")

    match_ids = [str(m["id"]) for m in matches]
    match_map = {str(m["id"]): m for m in matches}

    print("Loading ai_features...")
    features_map = load_ai_features_batch(cur, match_ids)
    print(f"  Loaded features for {len(features_map)} matches")

    print("Loading odds...")
    odds_map = load_odds_batch(cur, match_ids)
    print(f"  Loaded odds for {len(odds_map)} matches")

    print(f"Data loading: {time.time() - t0:.1f}s")

    print("\nLoading V25 model...")
    predictor = V25Predictor()
    predictor.load_models()

    feature_cols = predictor.FEATURE_COLS

    samples: List[Dict[str, Any]] = []
    skipped = 0
    processed = 0

    print(f"\nRunning predictions on {len(match_ids)} matches...")
    t1 = time.time()

    for i, mid in enumerate(match_ids):
        if mid not in features_map:
            skipped += 1
            continue

        feat_row = features_map[mid]
        odds_row = odds_map.get(mid, {})
        match_row = match_map[mid]

        feat_dict = {}
        for col in feature_cols:
            if col in feat_row and feat_row[col] is not None:
                feat_dict[col] = float(feat_row[col])
            elif col.startswith("odds_") and not col.endswith("_present"):
                odds_key = col.replace("odds_", "")
                feat_dict[col] = float(odds_row.get(odds_key, 0))
            elif col.endswith("_present"):
                base = col.replace("_present", "")
                odds_key = base.replace("odds_", "")
                feat_dict[col] = 1.0 if odds_row.get(odds_key, 0) > 0 else 0.0
            else:
                feat_dict[col] = 0.0

        if odds_row.get("ms_h", 0) > 0:
            feat_dict["odds_ms_h"] = odds_row["ms_h"]
        if odds_row.get("ms_d", 0) > 0:
            feat_dict["odds_ms_d"] = odds_row["ms_d"]
        if odds_row.get("ms_a", 0) > 0:
            feat_dict["odds_ms_a"] = odds_row["ms_a"]

        ms_h = feat_dict.get("odds_ms_h", 0)
        ms_d = feat_dict.get("odds_ms_d", 0)
        ms_a = feat_dict.get("odds_ms_a", 0)
        if ms_h > 0 and ms_d > 0 and ms_a > 0:
            raw_sum = 1/ms_h + 1/ms_d + 1/ms_a
            feat_dict["implied_home"] = (1/ms_h) / raw_sum
            feat_dict["implied_draw"] = (1/ms_d) / raw_sum
            feat_dict["implied_away"] = (1/ms_a) / raw_sum

        sh = match_row["score_home"]
        sa = match_row["score_away"]
        ht_h = match_row.get("ht_score_home")
        ht_a = match_row.get("ht_score_away")

        try:
            X = pd.DataFrame([{c: feat_dict.get(c, 0.0) for c in feature_cols}])

            for market_name, model_key, market_list in [
                ("ms", "ms", ["MS"]),
                ("ou25", "ou25", ["OU25"]),
                ("btts", "btts", ["BTTS"]),
                ("ou15", "ou15", ["OU15"]),
                ("ou35", "ou35", ["OU35"]),
                ("ht_result", "ht_result", ["HT"]),
            ]:
                if model_key not in predictor.models:
                    continue

                probs = predictor.predict_market(model_key, feat_dict)
                if probs is None:
                    continue

                if model_key == "ms":
                    for pick, prob in [("1", probs[0]), ("X", probs[1]), ("2", probs[2])]:
                        actual = resolve_actual("MS", pick, sh, sa, ht_h, ht_a)
                        key = calibrator_key("MS", pick)
                        if actual is not None and key:
                            samples.append({
                                "match_id": mid,
                                "market": "MS",
                                "pick": pick,
                                "key": key,
                                "raw_prob": float(prob),
                                "actual": int(actual),
                            })

                elif model_key == "ht_result":
                    if ht_h is None or ht_a is None:
                        continue
                    for pick, prob in [("1", probs[0]), ("X", probs[1]), ("2", probs[2])]:
                        actual = resolve_actual("HT", pick, sh, sa, ht_h, ht_a)
                        key = calibrator_key("HT", pick)
                        if actual is not None and key:
                            samples.append({
                                "match_id": mid,
                                "market": "HT",
                                "pick": pick,
                                "key": key,
                                "raw_prob": float(prob),
                                "actual": int(actual),
                            })

                elif model_key in ("ou25", "ou15", "ou35"):
                    market_upper = model_key.upper()
                    over_prob = float(probs[0]) if len(probs) > 0 else 0.5
                    pick = f"Over"
                    actual = resolve_actual(market_upper, "Over", sh, sa, ht_h, ht_a)
                    key = calibrator_key(market_upper, "Over")
                    if actual is not None and key:
                        samples.append({
                            "match_id": mid,
                            "market": market_upper,
                            "pick": pick,
                            "key": key,
                            "raw_prob": over_prob,
                            "actual": int(actual),
                        })

                elif model_key == "btts":
                    yes_prob = float(probs[0]) if len(probs) > 0 else 0.5
                    actual = resolve_actual("BTTS", "Yes", sh, sa, ht_h, ht_a)
                    key = calibrator_key("BTTS", "Yes")
                    if actual is not None and key:
                        samples.append({
                            "match_id": mid,
                            "market": "BTTS",
                            "pick": "Yes",
                            "key": key,
                            "raw_prob": yes_prob,
                            "actual": int(actual),
                        })

            processed += 1

        except Exception as e:
            skipped += 1
            if skipped <= 5:
                print(f"  Error on {mid}: {e}")

        if (i + 1) % 5000 == 0:
            elapsed = time.time() - t1
            rate = (i + 1) / elapsed
            print(f"  Processed {i+1}/{len(match_ids)} ({rate:.0f} matches/s)")

    elapsed = time.time() - t1
    print(f"\nPrediction complete: {processed} matches, {skipped} skipped, {elapsed:.1f}s")

    if not samples:
        print("No calibration samples generated!")
        cur.close()
        conn.close()
        return

    df = pd.DataFrame(samples)
    print(f"\nTotal calibration samples: {len(df)}")
    print(f"Unique matches: {df['match_id'].nunique()}")
    print(f"\nPer-key counts:")
    for key, count in df["key"].value_counts().items():
        print(f"  {key:<14} {count}")

    print(f"\nTraining isotonic calibration models (min_samples={args.min_samples})...")
    calibrator = get_calibrator()
    results: Dict[str, Any] = {}
    keys = sorted(df["key"].unique())

    for key in keys:
        sub = df[df["key"] == key].copy()
        sub = sub.drop_duplicates(subset=["match_id", "key"], keep="first")
        sub = sub.dropna(subset=["raw_prob", "actual"])
        sub = sub[(sub["raw_prob"] > 0.0) & (sub["raw_prob"] < 1.0)]

        n = len(sub)
        if n < args.min_samples:
            results[key] = {"status": "skipped", "samples": n}
            continue

        metrics = calibrator.train_calibration(
            df=sub,
            market=key,
            prob_col="raw_prob",
            actual_col="actual",
            min_samples=args.min_samples,
            save=True,
        )
        results[key] = {
            "status": "trained",
            "samples": metrics.sample_count,
            "brier": round(metrics.brier_score, 4),
            "ece": round(metrics.calibration_error, 4),
            "mean_predicted": round(metrics.mean_predicted, 4),
            "mean_actual": round(metrics.mean_actual, 4),
        }

    print("\n" + "=" * 70)
    print("CALIBRATION RESULTS")
    print("=" * 70)
    print(f"{'market':<14} {'status':<10} {'n':<8} {'brier':<9} {'ece':<8} {'pred_avg':<9} {'actual_avg'}")
    print("-" * 70)
    for key, info in sorted(results.items()):
        if info["status"] == "trained":
            print(
                f"{key:<14} {'OK':<10} {info['samples']:<8} "
                f"{info['brier']:<9.4f} {info['ece']:<8.4f} "
                f"{info['mean_predicted']:<9.4f} {info['mean_actual']}"
            )
        else:
            print(f"{key:<14} {'SKIP':<10} {info['samples']:<8}")
    print("=" * 70)

    total_time = time.time() - t0
    print(f"\nTotal time: {total_time:.1f}s")
    print(f"Calibration models saved to: {os.path.join(AI_ENGINE_DIR, 'models', 'calibration')}/")

    cur.close()
    conn.close()


def main():
    parser = argparse.ArgumentParser(description="Backfill calibration from historical matches")
    parser.add_argument("--limit", type=int, default=50000,
                        help="Max matches to process (default: 50000)")
    parser.add_argument("--min-samples", type=int, default=100,
                        help="Min samples per market for calibration (default: 100)")
    args = parser.parse_args()
    run_backfill(args)


if __name__ == "__main__":
    main()