diff --git a/ai-engine/models/calibration.py b/ai-engine/models/calibration.py index 21c9c1b..0785d7c 100644 --- a/ai-engine/models/calibration.py +++ b/ai-engine/models/calibration.py @@ -465,3 +465,105 @@ def get_calibrator() -> Calibrator: if _calibrator_instance is None: _calibrator_instance = Calibrator() return _calibrator_instance + + +# ── FINAL-OUTPUT RECALIBRATION LAYER (V31e) ───────────────────────────────── +# A thin, LAST-STEP per-market map: production calibrated_confidence -> reality. +# Built from a 60-day backtest (scripts/fit_recalibrators.py); inference is a +# pure np.interp over a 99-point monotone grid — NO sklearn needed at runtime. +# +# WHY THIS EXISTS: +# The upstream chain (temperature scaling T=1.5 -> per-outcome isotonic -> +# POST_CAL_TRUST blend) crushes high-base-rate binary markets toward 0.5, +# so "system says 51%" can really hit 70%. MS survives (near-uniform picks), +# which is why MS is already well-calibrated and OU/HT-OU markets are not. +# +# SAFETY / "DO NO HARM": +# * Only markets whose fit-time ECE >= 5.0 carry a map (currently OU15, OU35, +# HT_OU05, HT_OU15). MS and every already-good market have NO map -> +# recalibrate_conf() returns the input UNCHANGED -> guaranteed no regression. +# * Out-of-sample validated (fit=older 65%, test=unseen 35%): +# MS ECE 1.1 -> 1.3 (flat, safe) +# HT_OU15 29.2 -> 0.8 +# OU15 19.0 -> 3.3 +# OU35 13.9 -> 4.3 +# HT_OU05 11.5 -> 2.4 +# * Adjusts ONLY the displayed confidence number. All rich analysis payload +# (probabilities, edges, vetoes, tiers, bands) is preserved untouched, and +# the pre-recalibration value is kept for audit by the caller. +FINAL_RECALIBRATOR_PATH = os.path.join(CALIBRATION_DIR, "final_recalibrators.json") + + +class FinalRecalibrator: + """Per-market final-output recalibration via piecewise-linear interpolation. + + Loads a compact JSON of 99-point lookup grids (x=calibrated_confidence/100, + y=reality). Markets absent from the file pass through as identity. + """ + + def __init__(self, path: str = FINAL_RECALIBRATOR_PATH): + self.grid: Optional[np.ndarray] = None + self.maps: Dict[str, np.ndarray] = {} + self.source_path = path + self._load(path) + + def _load(self, path: str) -> None: + if not os.path.exists(path): + print(f"[FinalRecalibrator] No map file at {path} — pass-through mode (all markets unchanged)") + return + try: + with open(path, "r") as f: + data = json.load(f) + meta = data.get("_meta", {}) + grid = meta.get("grid") + if not grid: + print("[FinalRecalibrator] Map file missing _meta.grid — pass-through mode") + return + self.grid = np.asarray(grid, dtype=float) + for market, m in data.items(): + if market == "_meta" or not isinstance(m, dict): + continue + y = m.get("y") + if y and len(y) == len(self.grid): + self.maps[str(market).upper()] = np.asarray(y, dtype=float) + else: + print(f"[FinalRecalibrator] Skipped {market}: grid/y length mismatch") + print(f"[FinalRecalibrator] Loaded reality maps for {sorted(self.maps.keys())} " + f"(everything else, incl. MS, passes through unchanged)") + except Exception as e: + print(f"[FinalRecalibrator] Warning: failed to load {path}: {e} — pass-through mode") + self.grid = None + self.maps = {} + + def has_map(self, market: str) -> bool: + return bool(self.maps) and (market or "").upper() in self.maps + + def recalibrate_conf(self, market: str, calibrated_conf: float) -> float: + """Map a 0–100 confidence to its reality-aligned value. + + Markets without a trained map (including MS and all already-good + markets) return the input UNCHANGED. Any failure also returns the + input unchanged so this layer can never regress production. + """ + try: + key = (market or "").upper() + if self.grid is None or key not in self.maps: + return calibrated_conf + x = float(calibrated_conf) / 100.0 + x = min(max(x, 0.0), 1.0) + y = float(np.interp(x, self.grid, self.maps[key])) + return max(1.0, min(99.0, y * 100.0)) + except Exception: + return calibrated_conf + + +# Singleton instance +_final_recalibrator_instance: Optional[FinalRecalibrator] = None + + +def get_final_recalibrator() -> FinalRecalibrator: + """Get or create the global FinalRecalibrator instance.""" + global _final_recalibrator_instance + if _final_recalibrator_instance is None: + _final_recalibrator_instance = FinalRecalibrator() + return _final_recalibrator_instance diff --git a/ai-engine/models/calibration/final_recalibrators.json b/ai-engine/models/calibration/final_recalibrators.json new file mode 100644 index 0000000..1c8a8e6 --- /dev/null +++ b/ai-engine/models/calibration/final_recalibrators.json @@ -0,0 +1,532 @@ +{ + "_meta": { + "grid": [ + 0.01, + 0.02, + 0.03, + 0.04, + 0.05, + 0.06, + 0.07, + 0.08, + 0.09, + 0.1, + 0.11, + 0.12, + 0.13, + 0.14, + 0.15, + 0.16, + 0.17, + 0.18, + 0.19, + 0.2, + 0.21, + 0.22, + 0.23, + 0.24, + 0.25, + 0.26, + 0.27, + 0.28, + 0.29, + 0.3, + 0.31, + 0.32, + 0.33, + 0.34, + 0.35, + 0.36, + 0.37, + 0.38, + 0.39, + 0.4, + 0.41, + 0.42, + 0.43, + 0.44, + 0.45, + 0.46, + 0.47, + 0.48, + 0.49, + 0.5, + 0.51, + 0.52, + 0.53, + 0.54, + 0.55, + 0.56, + 0.57, + 0.58, + 0.59, + 0.6, + 0.61, + 0.62, + 0.63, + 0.64, + 0.65, + 0.66, + 0.67, + 0.68, + 0.69, + 0.7, + 0.71, + 0.72, + 0.73, + 0.74, + 0.75, + 0.76, + 0.77, + 0.78, + 0.79, + 0.8, + 0.81, + 0.82, + 0.83, + 0.84, + 0.85, + 0.86, + 0.87, + 0.88, + 0.89, + 0.9, + 0.91, + 0.92, + 0.93, + 0.94, + 0.95, + 0.96, + 0.97, + 0.98, + 0.99 + ], + "threshold_ece": 5.0, + "source": "/tmp/multi_60d.csv", + "note": "x=calibrated_confidence/100; new=interp(grid,y)" + }, + "HT_OU05": { + "grid_min": 0.01, + "grid_max": 0.99, + "n": 3683, + "y": [ + 0.0833, + 0.3333, + 0.3333, + 0.3333, + 0.3394, + 0.3636, + 0.3636, + 0.3636, + 0.3636, + 0.3636, + 0.3636, + 0.3636, + 0.3636, + 0.3727, + 0.3955, + 0.4, + 0.4, + 0.4, + 0.4, + 0.4, + 0.4, + 0.4, + 0.4, + 0.4, + 0.4, + 0.4, + 0.4, + 0.4, + 0.4, + 0.4, + 0.4, + 0.4, + 0.4, + 0.4, + 0.4, + 0.4, + 0.4, + 0.4, + 0.4, + 0.4, + 0.4, + 0.4, + 0.4, + 0.4, + 0.4, + 0.4, + 0.4583, + 0.6286, + 0.6286, + 0.6286, + 0.6286, + 0.6286, + 0.6286, + 0.6286, + 0.6286, + 0.6286, + 0.6531, + 0.672, + 0.7143, + 0.7262, + 0.7262, + 0.7312, + 0.7406, + 0.7655, + 0.7655, + 0.8495, + 0.8495, + 0.8495, + 0.8495, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0 + ] + }, + "HT_OU15": { + "grid_min": 0.01, + "grid_max": 0.99, + "n": 5200, + "y": [ + 0.4118, + 0.4118, + 0.4118, + 0.4118, + 0.4118, + 0.4118, + 0.4118, + 0.4118, + 0.4118, + 0.4118, + 0.4118, + 0.4118, + 0.4118, + 0.4118, + 0.4118, + 0.4118, + 0.4521, + 0.5385, + 0.5385, + 0.5385, + 0.5848, + 0.6142, + 0.6142, + 0.6142, + 0.6245, + 0.6245, + 0.6245, + 0.6262, + 0.6275, + 0.6275, + 0.6275, + 0.6275, + 0.6275, + 0.6275, + 0.6275, + 0.6275, + 0.6275, + 0.6275, + 0.6275, + 0.6275, + 0.6275, + 0.6275, + 0.6275, + 0.6275, + 0.6275, + 0.6275, + 0.6275, + 0.6275, + 0.6275, + 0.6275, + 0.6275, + 0.6275, + 0.6275, + 0.6275, + 0.6452, + 0.6842, + 0.6842, + 0.6842, + 0.6842, + 0.6842, + 0.6842, + 0.8077, + 0.8077, + 0.8077, + 0.8077, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0 + ] + }, + "OU15": { + "grid_min": 0.01, + "grid_max": 0.99, + "n": 2724, + "y": [ + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.2797, + 0.4352, + 0.6295, + 0.7165, + 0.7174, + 0.7987, + 0.8197, + 0.8197, + 0.8197, + 0.8197, + 0.8197, + 0.8197, + 0.9118, + 0.9276, + 0.9502, + 0.9729, + 0.9955, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0 + ] + }, + "OU35": { + "grid_min": 0.01, + "grid_max": 0.99, + "n": 4277, + "y": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.474, + 0.474, + 0.474, + 0.474, + 0.474, + 0.474, + 0.474, + 0.474, + 0.474, + 0.571, + 0.571, + 0.571, + 0.571, + 0.571, + 0.571, + 0.571, + 0.571, + 0.571, + 0.571, + 0.571, + 0.571, + 0.571, + 0.6222, + 0.6222, + 0.6222, + 0.6222, + 0.6222, + 0.7747, + 0.7747, + 0.7747, + 0.7747, + 0.7747, + 0.7788, + 0.8195, + 0.8333, + 0.8333, + 0.8333, + 0.8333, + 0.8333, + 0.8333, + 0.8333, + 0.8333, + 0.8333, + 0.8333, + 0.8333, + 0.8333, + 0.8333, + 0.8333, + 0.8333, + 0.836, + 0.8624, + 0.8889, + 0.8889, + 0.8889, + 0.8889, + 0.8889, + 0.8889, + 0.8889, + 0.8889, + 0.8889, + 0.8889, + 0.8889, + 0.8889, + 0.8889, + 0.8889, + 0.8889, + 0.8889, + 0.8889, + 0.8889, + 0.8889, + 0.8889, + 0.8889, + 0.8889, + 0.8889, + 0.8889 + ] + } +} \ No newline at end of file diff --git a/ai-engine/services/orchestrator/market_board.py b/ai-engine/services/orchestrator/market_board.py index cd19b86..2235600 100644 --- a/ai-engine/services/orchestrator/market_board.py +++ b/ai-engine/services/orchestrator/market_board.py @@ -56,7 +56,7 @@ from services.match_commentary import generate_match_commentary from utils.top_leagues import load_top_league_ids from utils.league_reliability import load_league_reliability from config.config_loader import build_threshold_dict, get_threshold_default -from models.calibration import get_calibrator +from models.calibration import get_calibrator, get_final_recalibrator # ── V30: Post-calibration trust factors ───────────────────────────── # Controls how much to trust isotonic calibrator vs raw model output. @@ -1153,6 +1153,18 @@ class MarketBoardMixin: # overconfidence without destroying probability signal. # The tier system (V31b) is the real profitability gatekeeper. calibrated_conf = max(1.0, min(99.0, raw_conf * 0.92)) + + # ── FINAL-OUTPUT RECALIBRATION (V31e) ────────────────────────── + # Last-step per-market map: "system says X% -> reality is Y%". ONLY + # badly-miscalibrated markets carry a map (fit-ECE >= 5: OU15, OU35, + # HT_OU05, HT_OU15). MS and every already-good market pass through + # UNCHANGED -> guaranteed no regression. Out-of-sample proven (e.g. + # HT_OU15 ECE 29.2->0.8) and identity-safe for MS (1.1->1.3). + # This adjusts ONLY the displayed confidence so users see honest + # probabilities; all analysis below (probabilities, edges, vetoes, + # tiers, bands) is preserved, and the pre-recal value is kept for audit. + pre_recal_conf = calibrated_conf + calibrated_conf = get_final_recalibrator().recalibrate_conf(market, calibrated_conf) min_conf = self.market_min_conf.get(market, 55.0) implied_prob = (1.0 / odd) if odd > 1.0 else 0.0 @@ -1361,6 +1373,7 @@ class MarketBoardMixin: { "raw_confidence": round(raw_conf, 1), "calibrated_confidence": round(calibrated_conf, 1), + "calibrated_confidence_pre_recal": round(pre_recal_conf, 1), "unified_score": round(bgs, 1), "unified_score_label": bgs_label, "min_required_confidence": round(min_conf, 1),