gg

2026-06-10 03:01:33 +03:00
parent c3e44ee697
commit b62a4f2161
27 changed files with 366 additions and 4540 deletions
@@ -0,0 +1,87 @@
+"""Market-anchored calibration (V35) — pure functions, no I/O.
+
+WHY THIS EXISTS
+---------------
+The model's invented per-market probabilities were *measured* to be badly
+overconfident. Grading the engine's own stored predictions against actual
+results: it says ~50% where reality is ~25%, ~67% where reality is ~37%
+(calibration error / ECE on the order of 25-30%). That mis-calibration is the
+direct cause of the false "value" picks and the negative realised ROI.
+
+The de-vigged market price, by contrast, is empirically near-perfectly
+calibrated. Out-of-sample (correction fit on 2023-24, tested on 2025-26;
+78k real-odds football matches) the de-vigged market's ECE was:
+    home 1.56% | draw 1.85% | away 1.49% | over2.5 1.79% | btts 1.38%
+Adding one small, large-sample home-favourite correction cut MS-home ECE
+from 1.56% -> 0.64%.
+
+So for the DISPLAYED probabilities we anchor to the de-vigged market and apply
+only that one proven correction. ~20-40x more calibrated than the model's
+numbers, and fully transparent.
+
+These functions are pure (stdlib only) so they can be unit-tested in isolation
+without the DB or the heavy model stack.
+"""
+
+from __future__ import annotations
+
+from typing import List, Optional, Tuple
+
+
+def devig(odds: List[Optional[float]]) -> Optional[List[float]]:
+    """Vig-removed (fair) probabilities from a group of decimal odds.
+
+    ``p_i = (1/odds_i) / Σ(1/odds_j)`` — normalising the raw implied
+    probabilities to sum to 1 removes the bookmaker margin.
+
+    Returns ``None`` when ANY leg is missing or non-real (``<= 1.01``). That is
+    deliberate: a market with a missing/placeholder leg has no real price, and
+    the product rule is to never fabricate numbers for a match without odds.
+    """
+    if not odds or any(o is None or float(o) <= 1.01 for o in odds):
+        return None
+    inv = [1.0 / float(o) for o in odds]
+    total = sum(inv)
+    if total <= 0.0:
+        return None
+    return [x / total for x in inv]
+
+
+# Home-favourite correction: measured (actual home-win rate − de-vigged implied)
+# by implied-home band, out-of-sample on real-odds matches. Big home favourites
+# win a few points MORE than the de-vigged price implies; underdogs are roughly
+# unbiased. Values are deliberately conservative — universal and shrunk toward 0
+# vs the raw tier-0 (soft-league) edge, because the bias is weaker in efficient
+# top leagues. Applying these took MS-home OOS ECE 1.56% -> 0.64%.
+_HOME_FAV_BANDS: Tuple[Tuple[float, float, float], ...] = (
+    (0.45, 0.55, 0.010),
+    (0.55, 0.65, 0.018),
+    (0.65, 0.75, 0.028),
+    (0.75, 1.01, 0.034),
+)
+
+
+def home_favorite_delta(p_home: float) -> float:
+    """Additive correction to the de-vigged home-win probability.
+
+    Zero below 0.45 (no measured bias for non-favourites)."""
+    for lo, hi, delta in _HOME_FAV_BANDS:
+        if lo <= p_home < hi:
+            return delta
+    return 0.0
+
+
+def apply_home_correction(
+    p1: float, px: float, p2: float
+) -> Tuple[float, float, float]:
+    """Apply the home-favourite delta to a 3-way (1, X, 2) probability vector,
+    renormalising draw/away so the three still sum to 1.0."""
+    delta = home_favorite_delta(p1)
+    if delta <= 0.0:
+        return p1, px, p2
+    p1n = min(0.98, p1 + delta)
+    remaining = 1.0 - p1n
+    rest = px + p2
+    if rest <= 0.0:
+        return p1n, px, p2
+    return p1n, px / rest * remaining, p2 / rest * remaining