main

2026-05-12 02:43:02 +03:00
parent f8599bdb9a
commit b6d64b59bf
35 changed files with 1400 additions and 630 deletions
@@ -1,63 +1,48 @@
 """
-Calibration Training Script
-===========================
-Trains Isotonic Regression calibration models for all betting markets.
+Calibration Training Script (REWRITTEN)
+=======================================
+Trains Isotonic Regression calibration models for football markets
+using REAL model predictions + actual match outcomes.

-This script:
-1. Fetches historical match data with predictions and actual results
-2. Trains Isotonic Regression models for each market
-3. Calculates calibration metrics (Brier Score, ECE)
-4. Saves models to ai-engine/models/calibration/
+Data sources (combined):
+  - `predictions` table:        Full bet_summary (many markets per match), joined to `matches` for actual results
+  - `prediction_runs` table:    main_pick + value_pick predictions with resolved outcomes
+
+Per market, fits IsotonicRegression(raw_model_prob → actual_hit) so that
+calibrated_prob mirrors empirical hit rate.

 Usage:
-    # Train on last 90 days of data
-    python3 ai-engine/scripts/train_calibration.py
-    
-    # Train on specific date range
-    python3 ai-engine/scripts/train_calibration.py --start 2026-01-01 --end 2026-02-15
-    
-    # Train only specific markets
-    python3 ai-engine/scripts/train_calibration.py --markets ou25 btts ms_home
+    python ai-engine/scripts/train_calibration.py
+    python ai-engine/scripts/train_calibration.py --min-samples 30
+    python ai-engine/scripts/train_calibration.py --markets ms_home ou25 btts
+
+Notes:
+  * Multi-source data extraction tolerates schema drift in payload JSON.
+  * If a market has fewer than --min-samples points, it is skipped
+    (orchestrator will fall back to the multiplier from market_thresholds.json).
 """

+import argparse
 import os
 import sys
-import json
-import argparse
-import psycopg2
-import pandas as pd
-import numpy as np
-from datetime import datetime, timedelta
-from dotenv import load_dotenv
-from typing import Dict, List, Tuple, Any, Optional
+from typing import Any, Dict, List, Optional
+
+import pandas as pd
+import psycopg2
+from dotenv import load_dotenv

-# Setup path for ai-engine imports
 AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 sys.path.insert(0, AI_ENGINE_DIR)

-from models.calibration import get_calibrator, SUPPORTED_MARKETS
+from models.calibration import get_calibrator  # noqa: E402

 load_dotenv()


 # =============================================================================
-# CONFIG
-# =============================================================================
-TOP_LEAGUES_PATH = os.path.join(
-    os.path.dirname(os.path.dirname(AI_ENGINE_DIR)),
-    "top_leagues.json"
-)
-
-# Default: last 90 days
-DEFAULT_START_DATE = (datetime.utcnow() - timedelta(days=90)).strftime("%Y-%m-%d")
-DEFAULT_END_DATE = (datetime.utcnow() - timedelta(days=1)).strftime("%Y-%m-%d")
-
-
-# =============================================================================
-# DB CONNECTION
+# DB
 # =============================================================================
 def get_conn():
-    """Get PostgreSQL connection."""
    db_url = os.getenv("DATABASE_URL")
    if not db_url:
        raise ValueError("DATABASE_URL not set")
@@ -66,354 +51,370 @@ def get_conn():
    return psycopg2.connect(db_url)


-def load_top_league_ids() -> List[str]:
-    """Load top league IDs from JSON file."""
-    if not os.path.exists(TOP_LEAGUES_PATH):
-        print(f"[Warning] top_leagues.json not found at {TOP_LEAGUES_PATH}")
-        return []
-    
-    with open(TOP_LEAGUES_PATH, "r") as f:
-        data = json.load(f)
-        
-    # Handle both list and dict formats
-    if isinstance(data, dict):
-        return data.get("football", [])
-    return data
+# =============================================================================
+# OUTCOME RESOLUTION
+# =============================================================================
+def _normalize_pick(pick: Any) -> str:
+    return str(pick or "").strip().casefold()
+
+
+def _is_over(pick: str) -> bool:
+    norm = _normalize_pick(pick)
+    return "over" in norm or "üst" in norm or "ust" in norm
+
+
+def _is_under(pick: str) -> bool:
+    norm = _normalize_pick(pick)
+    return "under" in norm or "alt" in norm
+
+
+def _is_yes(pick: str) -> bool:
+    norm = _normalize_pick(pick)
+    return "yes" in norm or "var" in norm
+
+
+def resolve_actual(
+    market: str,
+    pick: str,
+    score_home: Optional[int],
+    score_away: Optional[int],
+    ht_home: Optional[int],
+    ht_away: Optional[int],
+) -> Optional[int]:
+    """Return 1 if the (market, pick) hit, 0 if it missed, None if undetermined."""
+    if score_home is None or score_away is None:
+        return None
+    market = (market or "").upper()
+    p = _normalize_pick(pick)
+    total = score_home + score_away
+    ht_total = (ht_home or 0) + (ht_away or 0) if ht_home is not None else None
+
+    if market == "MS":
+        if p == "1":
+            return int(score_home > score_away)
+        if p in {"x", "0", "x/0"}:
+            return int(score_home == score_away)
+        if p == "2":
+            return int(score_away > score_home)
+        return None
+
+    if market == "DC":
+        norm = p.replace("-", "").upper()
+        if norm == "1X":
+            return int(score_home >= score_away)
+        if norm == "X2":
+            return int(score_away >= score_home)
+        if norm == "12":
+            return int(score_home != score_away)
+        return None
+
+    if market in {"OU15", "OU25", "OU35"}:
+        line = {"OU15": 1.5, "OU25": 2.5, "OU35": 3.5}[market]
+        if _is_over(p):
+            return int(total > line)
+        if _is_under(p):
+            return int(total < line)
+        return None
+
+    if market == "BTTS":
+        both_scored = score_home > 0 and score_away > 0
+        if _is_yes(p):
+            return int(both_scored)
+        if "no" in p or "yok" in p:
+            return int(not both_scored)
+        return None
+
+    if market == "HT":
+        if ht_home is None or ht_away is None:
+            return None
+        if p == "1":
+            return int(ht_home > ht_away)
+        if p in {"x", "0"}:
+            return int(ht_home == ht_away)
+        if p == "2":
+            return int(ht_away > ht_home)
+        return None
+
+    if market in {"HT_OU05", "HT_OU15"}:
+        if ht_total is None:
+            return None
+        line = 0.5 if market == "HT_OU05" else 1.5
+        if _is_over(p):
+            return int(ht_total > line)
+        if _is_under(p):
+            return int(ht_total < line)
+        return None
+
+    if market == "OE":
+        if "odd" in p or "tek" in p:
+            return int(total % 2 == 1)
+        if "even" in p or "çift" in p or "cift" in p:
+            return int(total % 2 == 0)
+        return None
+
+    if market == "HTFT":
+        if ht_home is None or ht_away is None or "/" not in p:
+            return None
+        ht_p, ft_p = p.split("/")
+        ht_actual = "1" if ht_home > ht_away else "2" if ht_away > ht_home else "x"
+        ft_actual = "1" if score_home > score_away else "2" if score_away > score_home else "x"
+        return int(ht_p.strip() == ht_actual and ft_p.strip() == ft_actual)
+
+    return None
+
+
+# =============================================================================
+# CALIBRATOR KEY (must mirror orchestrator._calibrator_key)
+# =============================================================================
+def calibrator_key(market: str, pick: str) -> Optional[str]:
+    m = (market or "").upper()
+    p = _normalize_pick(pick)
+    if m == "MS":
+        if p == "1":
+            return "ms_home"
+        if p in {"x", "0"}:
+            return "ms_draw"
+        if p == "2":
+            return "ms_away"
+        return None
+    if m == "DC":
+        return "dc"
+    if m == "OU15" and _is_over(p):
+        return "ou15"
+    if m == "OU25" and _is_over(p):
+        return "ou25"
+    if m == "OU35" and _is_over(p):
+        return "ou35"
+    if m == "BTTS" and _is_yes(p):
+        return "btts"
+    if m == "HT":
+        if p == "1":
+            return "ht_home"
+        if p in {"x", "0"}:
+            return "ht_draw"
+        if p == "2":
+            return "ht_away"
+        return None
+    if m == "HTFT":
+        return "ht_ft"
+    return None


 # =============================================================================
 # DATA EXTRACTION
 # =============================================================================
-def fetch_training_data(
-    cur,
-    start_date: str,
-    end_date: str,
-    league_ids: List[str] = None,
-) -> pd.DataFrame:
+def fetch_predictions_with_outcomes(cur) -> List[Dict[str, Any]]:
    """
-    Fetch match data with odds and results for calibration training.
-    
-    Returns DataFrame with columns:
-    - match_id
-    - home_team, away_team
-    - ms_h, ms_d, ms_a (odds)
-    - score_home, score_away (actual result)
-    - ht_score_home, ht_score_away
-    - ou25_actual, btts_actual, etc.
+    Source 1: `predictions` table joined with `matches` (FT only).
+    Each row of bet_summary becomes a training sample.
    """
-    start_ms = int(datetime.strptime(start_date, "%Y-%m-%d").timestamp() * 1000)
-    end_ms = int(datetime.strptime(end_date, "%Y-%m-%d").timestamp() * 1000) + 86400000  # +1 day
-    
-    # Build league filter
-    league_filter = ""
-    params = [start_ms, end_ms]
-    if league_ids:
-        placeholders = ",".join(["%s"] * len(league_ids))
-        league_filter = f"AND m.league_id IN ({placeholders})"
-        params.extend(league_ids)
-    
-    query = f"""
-    SELECT 
-        m.id as match_id,
-        m.home_team_id,
-        m.away_team_id,
-        m.score_home,
-        m.score_away,
-        m.ht_score_home,
-        m.ht_score_away,
-        m.mst_utc,
-        -- Odds from odd_categories/selections
-        MAX(CASE WHEN oc.name = 'Maç Sonucu' AND os.name = '1' THEN os.odd_value END) as ms_h,
-        MAX(CASE WHEN oc.name = 'Maç Sonucu' AND os.name = 'X' THEN os.odd_value END) as ms_d,
-        MAX(CASE WHEN oc.name = 'Maç Sonucu' AND os.name = '2' THEN os.odd_value END) as ms_a,
-        MAX(CASE WHEN oc.name = '2,5 Alt/Üst' AND os.name = 'Üst' THEN os.odd_value END) as ou25_over,
-        MAX(CASE WHEN oc.name = '2,5 Alt/Üst' AND os.name = 'Alt' THEN os.odd_value END) as ou25_under,
-        MAX(CASE WHEN oc.name = '1,5 Alt/Üst' AND os.name = 'Üst' THEN os.odd_value END) as ou15_over,
-        MAX(CASE WHEN oc.name = '3,5 Alt/Üst' AND os.name = 'Üst' THEN os.odd_value END) as ou35_over,
-        MAX(CASE WHEN oc.name = 'Karşılıklı Gol' AND os.name = 'Var' THEN os.odd_value END) as btts_yes,
-        MAX(CASE WHEN oc.name = 'Karşılıklı Gol' AND os.name = 'Yok' THEN os.odd_value END) as btts_no
-    FROM matches m
-    LEFT JOIN odd_categories oc ON oc.match_id = m.id
-    LEFT JOIN odd_selections os ON os.odd_category_db_id = oc.db_id
-    WHERE m.mst_utc >= %s
-      AND m.mst_utc < %s
-      AND m.status = 'FT'
-      AND m.score_home IS NOT NULL
-      AND m.score_away IS NOT NULL
-      {league_filter}
-    GROUP BY m.id, m.home_team_id, m.away_team_id, m.score_home, m.score_away, 
-             m.ht_score_home, m.ht_score_away, m.mst_utc
-    ORDER BY m.mst_utc DESC
-    """
-    
-    cur.execute(query, params)
+    cur.execute("""
+        SELECT
+            p.match_id,
+            p.prediction_json,
+            m.score_home,
+            m.score_away,
+            m.ht_score_home,
+            m.ht_score_away
+        FROM predictions p
+        JOIN matches m ON m.id = p.match_id
+        WHERE m.sport = 'football'
+          AND m.status = 'FT'
+          AND m.score_home IS NOT NULL
+          AND m.score_away IS NOT NULL
+    """)
    rows = cur.fetchall()
-    columns = [desc[0] for desc in cur.description]
-    
-    df = pd.DataFrame(rows, columns=columns)
-    print(f"[Data] Fetched {len(df)} matches from {start_date} to {end_date}")
-    
-    return df
+    samples: List[Dict[str, Any]] = []
+    for match_id, payload, sh, sa, ht_h, ht_a in rows:
+        if not isinstance(payload, dict):
+            continue
+        bet_summary = payload.get("bet_summary")
+        if not isinstance(bet_summary, list):
+            continue
+        for item in bet_summary:
+            if not isinstance(item, dict):
+                continue
+            market = str(item.get("market") or "")
+            pick = str(item.get("pick") or "")
+            raw_conf = item.get("raw_confidence")
+            if raw_conf is None:
+                continue
+            actual = resolve_actual(market, pick, sh, sa, ht_h, ht_a)
+            if actual is None:
+                continue
+            key = calibrator_key(market, pick)
+            if not key:
+                continue
+            samples.append({
+                "source": "predictions",
+                "match_id": match_id,
+                "market": market,
+                "pick": pick,
+                "key": key,
+                "raw_prob": float(raw_conf) / 100.0,
+                "actual": int(actual),
+            })
+    return samples


-def calculate_actual_outcomes(df: pd.DataFrame) -> pd.DataFrame:
+def fetch_prediction_runs_with_outcomes(cur) -> List[Dict[str, Any]]:
    """
-    Calculate actual binary outcomes for each market.
-    
-    Adds columns:
-    - ms_home_actual: 1 if home won, 0 otherwise
-    - ms_draw_actual: 1 if draw, 0 otherwise
-    - ms_away_actual: 1 if away won, 0 otherwise
-    - ou25_over_actual: 1 if total goals > 2.5, 0 otherwise
-    - ou15_over_actual: 1 if total goals > 1.5, 0 otherwise
-    - ou35_over_actual: 1 if total goals > 3.5, 0 otherwise
-    - btts_yes_actual: 1 if both teams scored, 0 otherwise
+    Source 2: `prediction_runs` table with resolved settlement.
+    Each main_pick / value_pick becomes a training sample.
    """
-    # Total goals
-    df["total_goals"] = df["score_home"] + df["score_away"]
-    df["ht_total_goals"] = df["ht_score_home"].fillna(0) + df["ht_score_away"].fillna(0)
-    
-    # Match result outcomes
-    df["ms_home_actual"] = (df["score_home"] > df["score_away"]).astype(int)
-    df["ms_draw_actual"] = (df["score_home"] == df["score_away"]).astype(int)
-    df["ms_away_actual"] = (df["score_home"] < df["score_away"]).astype(int)
-    
-    # Over/Under outcomes
-    df["ou25_over_actual"] = (df["total_goals"] > 2.5).astype(int)
-    df["ou15_over_actual"] = (df["total_goals"] > 1.5).astype(int)
-    df["ou35_over_actual"] = (df["total_goals"] > 3.5).astype(int)
-    
-    # BTTS outcome
-    df["btts_yes_actual"] = ((df["score_home"] > 0) & (df["score_away"] > 0)).astype(int)
-    
-    # Half-Time result
-    df["ht_home_actual"] = (df["ht_score_home"] > df["ht_score_away"]).astype(int)
-    df["ht_draw_actual"] = (df["ht_score_home"] == df["ht_score_away"]).astype(int)
-    df["ht_away_actual"] = (df["ht_score_home"] < df["ht_score_away"]).astype(int)
-    
-    return df
-
-
-def calculate_implied_probabilities(df: pd.DataFrame) -> pd.DataFrame:
-    """
-    Calculate implied probabilities from odds.
-    
-    Adds columns:
-    - ms_home_prob: implied probability from odds
-    - ms_draw_prob
-    - ms_away_prob
-    - ou25_over_prob
-    - etc.
-    """
-    def safe_implied_prob(odd_str: str) -> float:
-        """Convert odds string to implied probability."""
-        if pd.isna(odd_str) or odd_str is None:
-            return np.nan
-        try:
-            odd = float(odd_str)
-            if odd <= 1.0:
-                return np.nan
-            return 1.0 / odd
-        except (ValueError, TypeError):
-            return np.nan
-    
-    # Match result implied probabilities
-    df["ms_home_prob"] = df["ms_h"].apply(safe_implied_prob)
-    df["ms_draw_prob"] = df["ms_d"].apply(safe_implied_prob)
-    df["ms_away_prob"] = df["ms_a"].apply(safe_implied_prob)
-    
-    # Over/Under implied probabilities
-    df["ou25_over_prob"] = df["ou25_over"].apply(safe_implied_prob)
-    df["ou15_over_prob"] = df["ou15_over"].apply(safe_implied_prob)
-    df["ou35_over_prob"] = df["ou35_over"].apply(safe_implied_prob)
-    
-    # BTTS implied probabilities
-    df["btts_yes_prob"] = df["btts_yes"].apply(safe_implied_prob)
-    
-    # -----------------------------------------------------
-    # CONTEXT-AWARE BUCKETS
-    # Create separate probability and actual columns for odds buckets
-    # ms_home odds: ms_h (note ms_h is the bookmaker odds for home win)
-    # -----------------------------------------------------
-    # Helper to safe-cast to float
-    df['ms_h_num'] = pd.to_numeric(df['ms_h'], errors='coerce')
-    
-    # Bucket 1: Heavy Fav (odds <= 1.40)
-    b1_mask = df['ms_h_num'] <= 1.40
-    df.loc[b1_mask, 'ms_home_heavy_fav_prob'] = df.loc[b1_mask, 'ms_home_prob']
-    df.loc[b1_mask, 'ms_home_heavy_fav_actual'] = df.loc[b1_mask, 'ms_home_actual']
-
-    # Bucket 2: Fav (1.40 < odds <= 1.80)
-    b2_mask = (df['ms_h_num'] > 1.40) & (df['ms_h_num'] <= 1.80)
-    df.loc[b2_mask, 'ms_home_fav_prob'] = df.loc[b2_mask, 'ms_home_prob']
-    df.loc[b2_mask, 'ms_home_fav_actual'] = df.loc[b2_mask, 'ms_home_actual']
-
-    # Bucket 3: Balanced (1.80 < odds <= 2.50)
-    b3_mask = (df['ms_h_num'] > 1.80) & (df['ms_h_num'] <= 2.50)
-    df.loc[b3_mask, 'ms_home_balanced_prob'] = df.loc[b3_mask, 'ms_home_prob']
-    df.loc[b3_mask, 'ms_home_balanced_actual'] = df.loc[b3_mask, 'ms_home_actual']
-
-    # Bucket 4: Underdog (odds > 2.50)
-    b4_mask = df['ms_h_num'] > 2.50
-    df.loc[b4_mask, 'ms_home_underdog_prob'] = df.loc[b4_mask, 'ms_home_prob']
-    df.loc[b4_mask, 'ms_home_underdog_actual'] = df.loc[b4_mask, 'ms_home_actual']
-    
-    return df
+    cur.execute("""
+        SELECT
+            pr.match_id,
+            pr.payload_summary,
+            m.score_home,
+            m.score_away,
+            m.ht_score_home,
+            m.ht_score_away
+        FROM prediction_runs pr
+        JOIN matches m ON m.id = pr.match_id
+        WHERE pr.eventual_outcome IS NOT NULL
+          AND m.score_home IS NOT NULL
+          AND m.score_away IS NOT NULL
+    """)
+    rows = cur.fetchall()
+    samples: List[Dict[str, Any]] = []
+    for match_id, payload, sh, sa, ht_h, ht_a in rows:
+        if not isinstance(payload, dict):
+            continue
+        for source_key in ("main_pick", "value_pick"):
+            item = payload.get(source_key)
+            if not isinstance(item, dict):
+                continue
+            market = str(item.get("market") or "")
+            pick = str(item.get("pick") or "")
+            # Prefer raw_confidence, fall back to calibrated_probability×100 if raw missing
+            raw_conf = item.get("raw_confidence")
+            if raw_conf is None:
+                cal_prob = item.get("calibrated_probability") or item.get("probability")
+                if cal_prob is None:
+                    continue
+                raw_conf = float(cal_prob) * 100.0
+            actual = resolve_actual(market, pick, sh, sa, ht_h, ht_a)
+            if actual is None:
+                continue
+            key = calibrator_key(market, pick)
+            if not key:
+                continue
+            samples.append({
+                "source": f"runs.{source_key}",
+                "match_id": match_id,
+                "market": market,
+                "pick": pick,
+                "key": key,
+                "raw_prob": float(raw_conf) / 100.0,
+                "actual": int(actual),
+            })
+    return samples


 # =============================================================================
-# MODEL PREDICTIONS (Optional - if you want to calibrate model outputs)
+# TRAINING
 # =============================================================================
-def get_model_predictions(
+def train_per_key(
    df: pd.DataFrame,
-    cur,
-) -> pd.DataFrame:
-    """
-    Get model predictions for each match.
-    
-    This is optional - if you want to calibrate model outputs rather than
-    raw odds-implied probabilities.
-    
-    TODO: Implement if needed. For now, we use odds-implied probabilities
-    as a proxy for model predictions.
-    """
-    # For now, return odds-implied probabilities as "model predictions"
-    # In a full implementation, you would:
-    # 1. Load the V20 predictor
-    # 2. Run predictions for each match
-    # 3. Store raw model probabilities
-    
-    return df
-
-
-# =============================================================================
-# MAIN TRAINING
-# =============================================================================
-def train_calibration_models(
-    df: pd.DataFrame,
-    markets: List[str] = None,
-    min_samples: int = 100,
+    min_samples: int,
+    markets_filter: Optional[List[str]] = None,
 ) -> Dict[str, Any]:
-    """
-    Train calibration models for specified markets.
-    
-    Args:
-        df: DataFrame with probabilities and actual outcomes
-        markets: List of markets to train (default: all supported)
-        min_samples: Minimum samples required per market
-        
-    Returns:
-        Dict with training results
-    """
-    if markets is None:
-        markets = SUPPORTED_MARKETS
-    
    calibrator = get_calibrator()
-    
-    # Define market config: market -> (prob_col, actual_col)
-    market_config = {
-        "ms_home": ("ms_home_prob", "ms_home_actual"),
-        "ms_home_heavy_fav": ("ms_home_heavy_fav_prob", "ms_home_heavy_fav_actual"),
-        "ms_home_fav": ("ms_home_fav_prob", "ms_home_fav_actual"),
-        "ms_home_balanced": ("ms_home_balanced_prob", "ms_home_balanced_actual"),
-        "ms_home_underdog": ("ms_home_underdog_prob", "ms_home_underdog_actual"),
-        "ms_draw": ("ms_draw_prob", "ms_draw_actual"),
-        "ms_away": ("ms_away_prob", "ms_away_actual"),
-        "ou15": ("ou15_over_prob", "ou15_over_actual"),
-        "ou25": ("ou25_over_prob", "ou25_over_actual"),
-        "ou35": ("ou35_over_prob", "ou35_over_actual"),
-        "btts": ("btts_yes_prob", "btts_yes_actual"),
-        "ht_home": ("ht_home_prob", "ht_home_actual"),  # Note: need to add ht probs
-        "ht_draw": ("ht_draw_prob", "ht_draw_actual"),
-        "ht_away": ("ht_away_prob", "ht_away_actual"),
-    }
-    
-    # Filter to requested markets
-    market_config = {k: v for k, v in market_config.items() if k in markets}
-    
-    # Train all markets
-    results = calibrator.train_all_markets(
-        df=df,
-        market_config=market_config,
-        min_samples=min_samples,
-    )
-    
+    results: Dict[str, Any] = {}
+    keys = sorted(df["key"].unique())
+
+    for key in keys:
+        if markets_filter and key not in markets_filter:
+            continue
+        sub = df[df["key"] == key]
+        # Drop duplicates by (match_id, key) to avoid double-counting across sources
+        sub = sub.drop_duplicates(subset=["match_id", "key"], keep="first")
+        sub = sub.dropna(subset=["raw_prob", "actual"])
+        # Clamp probabilities to (0, 1) for isotonic stability
+        sub = sub[(sub["raw_prob"] > 0.0) & (sub["raw_prob"] < 1.0)]
+
+        n = len(sub)
+        if n < min_samples:
+            results[key] = {
+                "status": "skipped",
+                "samples": n,
+                "reason": f"need ≥{min_samples}, have {n}",
+            }
+            continue
+
+        metrics = calibrator.train_calibration(
+            df=sub,
+            market=key,
+            prob_col="raw_prob",
+            actual_col="actual",
+            min_samples=min_samples,
+            save=True,
+        )
+        results[key] = {
+            "status": "trained",
+            "samples": metrics.sample_count,
+            "brier": round(metrics.brier_score, 4),
+            "ece": round(metrics.calibration_error, 4),
+            "mean_predicted": round(metrics.mean_predicted, 4),
+            "mean_actual": round(metrics.mean_actual, 4),
+        }
    return results


-def print_calibration_report(results: Dict[str, Any]):
-    """Print a formatted calibration report."""
-    print("\n" + "=" * 70)
+def print_report(results: Dict[str, Any], total_samples: int) -> None:
+    print("\n" + "=" * 78)
    print("CALIBRATION TRAINING REPORT")
-    print("=" * 70)
-    
-    print(f"\n{'Market':<15} {'Brier':<10} {'ECE':<10} {'Samples':<10} {'Status'}")
-    print("-" * 60)
-    
-    for market, metrics in results.items():
-        status = "✓ Trained" if metrics.sample_count >= 100 else "⚠ Insufficient"
-        print(f"{market:<15} {metrics.brier_score:<10.4f} {metrics.calibration_error:<10.4f} "
-              f"{metrics.sample_count:<10} {status}")
-    
-    print("\n" + "=" * 70)
-    print("Interpretation:")
-    print("  - Brier Score: Lower is better (0 = perfect, 0.25 = random)")
-    print("  - ECE (Expected Calibration Error): Lower is better (0 = perfect)")
-    print("  - Models saved to: ai-engine/models/calibration/")
-    print("=" * 70)
+    print("=" * 78)
+    print(f"Total samples across all markets: {total_samples}")
+    print(f"\n{'market':<14} {'status':<10} {'n':<6} {'brier':<9} {'ece':<8} {'pred_avg':<9} {'actual_avg':<10}")
+    print("-" * 78)
+    for key, info in sorted(results.items()):
+        if info["status"] == "trained":
+            print(
+                f"{key:<14} {'✓ ok':<10} {info['samples']:<6} "
+                f"{info['brier']:<9.4f} {info['ece']:<8.4f} "
+                f"{info['mean_predicted']:<8.3f} {info['mean_actual']:<8.3f}"
+            )
+        else:
+            print(f"{key:<14} {'⊘ skip':<10} {info['samples']:<6} -- {info.get('reason', '')}")
+    print("=" * 78)
+    print("Trained models saved to: ai-engine/models/calibration/")
+    print("Skipped markets fall back to the multiplier in market_thresholds.json.")
+    print("=" * 78)


 # =============================================================================
 # CLI
 # =============================================================================
 def main():
-    parser = argparse.ArgumentParser(description="Train calibration models")
-    parser.add_argument("--start", type=str, default=DEFAULT_START_DATE,
-                        help="Start date (YYYY-MM-DD)")
-    parser.add_argument("--end", type=str, default=DEFAULT_END_DATE,
-                        help="End date (YYYY-MM-DD)")
+    parser = argparse.ArgumentParser(description="Train isotonic calibration on real data")
+    parser.add_argument("--min-samples", type=int, default=30,
+                        help="Minimum samples required per market (default: 30)")
    parser.add_argument("--markets", nargs="+", default=None,
-                        help="Markets to train (default: all)")
-    parser.add_argument("--min-samples", type=int, default=100,
-                        help="Minimum samples per market")
-    parser.add_argument("--top-leagues-only", action="store_true",
-                        help="Only use top leagues data")
-    
+                        help="Limit to specific calibrator keys (e.g., ms_home ou25)")
    args = parser.parse_args()
-    
-    print(f"\n[Calibration Training] {args.start} to {args.end}")
-    
-    # Load top leagues if requested
-    league_ids = None
-    if args.top_leagues_only:
-        league_ids = load_top_league_ids()
-        print(f"[Data] Filtering to {len(league_ids)} top leagues")
-    
-    # Fetch data
+
    conn = get_conn()
    cur = conn.cursor()
-    
    try:
-        df = fetch_training_data(cur, args.start, args.end, league_ids)
-        
-        if len(df) == 0:
-            print("[Error] No data found for the specified date range")
+        s1 = fetch_predictions_with_outcomes(cur)
+        s2 = fetch_prediction_runs_with_outcomes(cur)
+        print(f"[Data] predictions table: {len(s1)} samples")
+        print(f"[Data] prediction_runs:   {len(s2)} samples")
+        all_samples = s1 + s2
+        if not all_samples:
+            print("[Error] No training samples available")
            return
-        
-        # Calculate outcomes and probabilities
-        df = calculate_actual_outcomes(df)
-        df = calculate_implied_probabilities(df)
-        
-        # Train models
-        results = train_calibration_models(
-            df=df,
-            markets=args.markets,
-            min_samples=args.min_samples,
-        )
-        
-        # Print report
-        print_calibration_report(results)
-        
+        df = pd.DataFrame(all_samples)
+        print(f"[Data] Combined:          {len(df)} samples")
+        print(f"[Data] Unique matches:    {df['match_id'].nunique()}")
+        print(f"[Data] Per-key counts:")
+        for key, count in df["key"].value_counts().items():
+            print(f"  {key:<14} {count}")
+
+        results = train_per_key(df, args.min_samples, args.markets)
+        print_report(results, total_samples=len(df))
    finally:
        cur.close()
        conn.close()