@@ -0,0 +1,87 @@
|
||||
"""Market-anchored calibration (V35) — pure functions, no I/O.
|
||||
|
||||
WHY THIS EXISTS
|
||||
---------------
|
||||
The model's invented per-market probabilities were *measured* to be badly
|
||||
overconfident. Grading the engine's own stored predictions against actual
|
||||
results: it says ~50% where reality is ~25%, ~67% where reality is ~37%
|
||||
(calibration error / ECE on the order of 25-30%). That mis-calibration is the
|
||||
direct cause of the false "value" picks and the negative realised ROI.
|
||||
|
||||
The de-vigged market price, by contrast, is empirically near-perfectly
|
||||
calibrated. Out-of-sample (correction fit on 2023-24, tested on 2025-26;
|
||||
78k real-odds football matches) the de-vigged market's ECE was:
|
||||
home 1.56% | draw 1.85% | away 1.49% | over2.5 1.79% | btts 1.38%
|
||||
Adding one small, large-sample home-favourite correction cut MS-home ECE
|
||||
from 1.56% -> 0.64%.
|
||||
|
||||
So for the DISPLAYED probabilities we anchor to the de-vigged market and apply
|
||||
only that one proven correction. ~20-40x more calibrated than the model's
|
||||
numbers, and fully transparent.
|
||||
|
||||
These functions are pure (stdlib only) so they can be unit-tested in isolation
|
||||
without the DB or the heavy model stack.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
|
||||
def devig(odds: List[Optional[float]]) -> Optional[List[float]]:
|
||||
"""Vig-removed (fair) probabilities from a group of decimal odds.
|
||||
|
||||
``p_i = (1/odds_i) / Σ(1/odds_j)`` — normalising the raw implied
|
||||
probabilities to sum to 1 removes the bookmaker margin.
|
||||
|
||||
Returns ``None`` when ANY leg is missing or non-real (``<= 1.01``). That is
|
||||
deliberate: a market with a missing/placeholder leg has no real price, and
|
||||
the product rule is to never fabricate numbers for a match without odds.
|
||||
"""
|
||||
if not odds or any(o is None or float(o) <= 1.01 for o in odds):
|
||||
return None
|
||||
inv = [1.0 / float(o) for o in odds]
|
||||
total = sum(inv)
|
||||
if total <= 0.0:
|
||||
return None
|
||||
return [x / total for x in inv]
|
||||
|
||||
|
||||
# Home-favourite correction: measured (actual home-win rate − de-vigged implied)
|
||||
# by implied-home band, out-of-sample on real-odds matches. Big home favourites
|
||||
# win a few points MORE than the de-vigged price implies; underdogs are roughly
|
||||
# unbiased. Values are deliberately conservative — universal and shrunk toward 0
|
||||
# vs the raw tier-0 (soft-league) edge, because the bias is weaker in efficient
|
||||
# top leagues. Applying these took MS-home OOS ECE 1.56% -> 0.64%.
|
||||
_HOME_FAV_BANDS: Tuple[Tuple[float, float, float], ...] = (
|
||||
(0.45, 0.55, 0.010),
|
||||
(0.55, 0.65, 0.018),
|
||||
(0.65, 0.75, 0.028),
|
||||
(0.75, 1.01, 0.034),
|
||||
)
|
||||
|
||||
|
||||
def home_favorite_delta(p_home: float) -> float:
|
||||
"""Additive correction to the de-vigged home-win probability.
|
||||
|
||||
Zero below 0.45 (no measured bias for non-favourites)."""
|
||||
for lo, hi, delta in _HOME_FAV_BANDS:
|
||||
if lo <= p_home < hi:
|
||||
return delta
|
||||
return 0.0
|
||||
|
||||
|
||||
def apply_home_correction(
|
||||
p1: float, px: float, p2: float
|
||||
) -> Tuple[float, float, float]:
|
||||
"""Apply the home-favourite delta to a 3-way (1, X, 2) probability vector,
|
||||
renormalising draw/away so the three still sum to 1.0."""
|
||||
delta = home_favorite_delta(p1)
|
||||
if delta <= 0.0:
|
||||
return p1, px, p2
|
||||
p1n = min(0.98, p1 + delta)
|
||||
remaining = 1.0 - p1n
|
||||
rest = px + p2
|
||||
if rest <= 0.0:
|
||||
return p1n, px, p2
|
||||
return p1n, px / rest * remaining, p2 / rest * remaining
|
||||
Reference in New Issue
Block a user