iddaai-be/ai-engine/models/market_anchor.py

"""Market-anchored calibration (V35) — pure functions, no I/O.

WHY THIS EXISTS
---------------
The model's invented per-market probabilities were *measured* to be badly
overconfident. Grading the engine's own stored predictions against actual
results: it says ~50% where reality is ~25%, ~67% where reality is ~37%
(calibration error / ECE on the order of 25-30%). That mis-calibration is the
direct cause of the false "value" picks and the negative realised ROI.

The de-vigged market price, by contrast, is empirically near-perfectly
calibrated. Out-of-sample (correction fit on 2023-24, tested on 2025-26;
78k real-odds football matches) the de-vigged market's ECE was:
    home 1.56% | draw 1.85% | away 1.49% | over2.5 1.79% | btts 1.38%
Adding one small, large-sample home-favourite correction cut MS-home ECE
from 1.56% -> 0.64%.

So for the DISPLAYED probabilities we anchor to the de-vigged market and apply
only that one proven correction. ~20-40x more calibrated than the model's
numbers, and fully transparent.

These functions are pure (stdlib only) so they can be unit-tested in isolation
without the DB or the heavy model stack.
"""

from __future__ import annotations

from typing import List, Optional, Tuple


def devig(odds: List[Optional[float]]) -> Optional[List[float]]:
    """Vig-removed (fair) probabilities from a group of decimal odds.

    ``p_i = (1/odds_i) / Σ(1/odds_j)`` — normalising the raw implied
    probabilities to sum to 1 removes the bookmaker margin.

    Returns ``None`` when ANY leg is missing or non-real (``<= 1.01``). That is
    deliberate: a market with a missing/placeholder leg has no real price, and
    the product rule is to never fabricate numbers for a match without odds.
    """
    if not odds or any(o is None or float(o) <= 1.01 for o in odds):
        return None
    inv = [1.0 / float(o) for o in odds]
    total = sum(inv)
    if total <= 0.0:
        return None
    return [x / total for x in inv]


# Home-favourite correction: measured (actual home-win rate − de-vigged implied)
# by implied-home band, out-of-sample on real-odds matches. Big home favourites
# win a few points MORE than the de-vigged price implies; underdogs are roughly
# unbiased. Values are deliberately conservative — universal and shrunk toward 0
# vs the raw tier-0 (soft-league) edge, because the bias is weaker in efficient
# top leagues. Applying these took MS-home OOS ECE 1.56% -> 0.64%.
_HOME_FAV_BANDS: Tuple[Tuple[float, float, float], ...] = (
    (0.45, 0.55, 0.010),
    (0.55, 0.65, 0.018),
    (0.65, 0.75, 0.028),
    (0.75, 1.01, 0.034),
)


def home_favorite_delta(p_home: float) -> float:
    """Additive correction to the de-vigged home-win probability.

    Zero below 0.45 (no measured bias for non-favourites)."""
    for lo, hi, delta in _HOME_FAV_BANDS:
        if lo <= p_home < hi:
            return delta
    return 0.0


def apply_home_correction(
    p1: float, px: float, p2: float
) -> Tuple[float, float, float]:
    """Apply the home-favourite delta to a 3-way (1, X, 2) probability vector,
    renormalising draw/away so the three still sum to 1.0."""
    delta = home_favorite_delta(p1)
    if delta <= 0.0:
        return p1, px, p2
    p1n = min(0.98, p1 + delta)
    remaining = 1.0 - p1n
    rest = px + p2
    if rest <= 0.0:
        return p1n, px, p2
    return p1n, px / rest * remaining, p2 / rest * remaining