88 lines
3.3 KiB
Python
88 lines
3.3 KiB
Python
"""Market-anchored calibration (V35) — pure functions, no I/O.
|
||
|
||
WHY THIS EXISTS
|
||
---------------
|
||
The model's invented per-market probabilities were *measured* to be badly
|
||
overconfident. Grading the engine's own stored predictions against actual
|
||
results: it says ~50% where reality is ~25%, ~67% where reality is ~37%
|
||
(calibration error / ECE on the order of 25-30%). That mis-calibration is the
|
||
direct cause of the false "value" picks and the negative realised ROI.
|
||
|
||
The de-vigged market price, by contrast, is empirically near-perfectly
|
||
calibrated. Out-of-sample (correction fit on 2023-24, tested on 2025-26;
|
||
78k real-odds football matches) the de-vigged market's ECE was:
|
||
home 1.56% | draw 1.85% | away 1.49% | over2.5 1.79% | btts 1.38%
|
||
Adding one small, large-sample home-favourite correction cut MS-home ECE
|
||
from 1.56% -> 0.64%.
|
||
|
||
So for the DISPLAYED probabilities we anchor to the de-vigged market and apply
|
||
only that one proven correction. ~20-40x more calibrated than the model's
|
||
numbers, and fully transparent.
|
||
|
||
These functions are pure (stdlib only) so they can be unit-tested in isolation
|
||
without the DB or the heavy model stack.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from typing import List, Optional, Tuple
|
||
|
||
|
||
def devig(odds: List[Optional[float]]) -> Optional[List[float]]:
|
||
"""Vig-removed (fair) probabilities from a group of decimal odds.
|
||
|
||
``p_i = (1/odds_i) / Σ(1/odds_j)`` — normalising the raw implied
|
||
probabilities to sum to 1 removes the bookmaker margin.
|
||
|
||
Returns ``None`` when ANY leg is missing or non-real (``<= 1.01``). That is
|
||
deliberate: a market with a missing/placeholder leg has no real price, and
|
||
the product rule is to never fabricate numbers for a match without odds.
|
||
"""
|
||
if not odds or any(o is None or float(o) <= 1.01 for o in odds):
|
||
return None
|
||
inv = [1.0 / float(o) for o in odds]
|
||
total = sum(inv)
|
||
if total <= 0.0:
|
||
return None
|
||
return [x / total for x in inv]
|
||
|
||
|
||
# Home-favourite correction: measured (actual home-win rate − de-vigged implied)
|
||
# by implied-home band, out-of-sample on real-odds matches. Big home favourites
|
||
# win a few points MORE than the de-vigged price implies; underdogs are roughly
|
||
# unbiased. Values are deliberately conservative — universal and shrunk toward 0
|
||
# vs the raw tier-0 (soft-league) edge, because the bias is weaker in efficient
|
||
# top leagues. Applying these took MS-home OOS ECE 1.56% -> 0.64%.
|
||
_HOME_FAV_BANDS: Tuple[Tuple[float, float, float], ...] = (
|
||
(0.45, 0.55, 0.010),
|
||
(0.55, 0.65, 0.018),
|
||
(0.65, 0.75, 0.028),
|
||
(0.75, 1.01, 0.034),
|
||
)
|
||
|
||
|
||
def home_favorite_delta(p_home: float) -> float:
|
||
"""Additive correction to the de-vigged home-win probability.
|
||
|
||
Zero below 0.45 (no measured bias for non-favourites)."""
|
||
for lo, hi, delta in _HOME_FAV_BANDS:
|
||
if lo <= p_home < hi:
|
||
return delta
|
||
return 0.0
|
||
|
||
|
||
def apply_home_correction(
|
||
p1: float, px: float, p2: float
|
||
) -> Tuple[float, float, float]:
|
||
"""Apply the home-favourite delta to a 3-way (1, X, 2) probability vector,
|
||
renormalising draw/away so the three still sum to 1.0."""
|
||
delta = home_favorite_delta(p1)
|
||
if delta <= 0.0:
|
||
return p1, px, p2
|
||
p1n = min(0.98, p1 + delta)
|
||
remaining = 1.0 - p1n
|
||
rest = px + p2
|
||
if rest <= 0.0:
|
||
return p1n, px, p2
|
||
return p1n, px / rest * remaining, p2 / rest * remaining
|