Files
iddaai-be/ai-engine/models/market_anchor.py
T
fahricansecer 4c137fbab6
Deploy Iddaai Backend / build-and-deploy (push) Successful in 1m7s
wow
2026-06-11 00:25:45 +03:00

259 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Market-anchored calibration (V35) — pure functions, no I/O.
WHY THIS EXISTS
---------------
The model's invented per-market probabilities were *measured* to be badly
overconfident. Grading the engine's own stored predictions against actual
results: it says ~50% where reality is ~25%, ~67% where reality is ~37%
(calibration error / ECE on the order of 25-30%). That mis-calibration is the
direct cause of the false "value" picks and the negative realised ROI.
The de-vigged market price, by contrast, is empirically near-perfectly
calibrated. Out-of-sample (correction fit on 2023-24, tested on 2025-26;
78k real-odds football matches) the de-vigged market's ECE was:
home 1.56% | draw 1.85% | away 1.49% | over2.5 1.79% | btts 1.38%
Adding one small, large-sample home-favourite correction cut MS-home ECE
from 1.56% -> 0.64%.
So for the DISPLAYED probabilities we anchor to the de-vigged market and apply
only that one proven correction. ~20-40x more calibrated than the model's
numbers, and fully transparent.
These functions are pure (stdlib only) so they can be unit-tested in isolation
without the DB or the heavy model stack.
"""
from __future__ import annotations
import json
import os
import threading
import time
from typing import Any, Dict, List, Optional, Tuple
def devig(odds: List[Optional[float]]) -> Optional[List[float]]:
"""Vig-removed (fair) probabilities from a group of decimal odds.
``p_i = (1/odds_i) / Σ(1/odds_j)`` — normalising the raw implied
probabilities to sum to 1 removes the bookmaker margin.
Returns ``None`` when ANY leg is missing or non-real (``<= 1.01``). That is
deliberate: a market with a missing/placeholder leg has no real price, and
the product rule is to never fabricate numbers for a match without odds.
"""
if not odds or any(o is None or float(o) <= 1.01 for o in odds):
return None
inv = [1.0 / float(o) for o in odds]
total = sum(inv)
if total <= 0.0:
return None
return [x / total for x in inv]
# Home-favourite correction: measured (actual home-win rate de-vigged implied)
# by implied-home band, out-of-sample on real-odds matches. Big home favourites
# win a few points MORE than the de-vigged price implies; underdogs are roughly
# unbiased. Values are deliberately conservative — universal and shrunk toward 0
# vs the raw tier-0 (soft-league) edge, because the bias is weaker in efficient
# top leagues. Applying these took MS-home OOS ECE 1.56% -> 0.64%.
#
# These static bands are the BUILT-IN FALLBACK. The live values come from the
# versioned artifact `config/market_anchor_corrections.json`, refreshed by
# `scripts/fit_anchor_corrections.py` (the guarded self-correction loop:
# measure on settled matches -> shrink/clip/min-sample gates -> out-of-sample
# acceptance -> write table). The engine only ever consumes the TABLE — the
# loop never modifies code.
_HOME_FAV_BANDS: Tuple[Tuple[float, float, float], ...] = (
(0.45, 0.55, 0.010),
(0.55, 0.65, 0.018),
(0.65, 0.75, 0.028),
(0.75, 1.01, 0.034),
)
_DEFAULT_CORRECTIONS_PATH = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "..", "config",
"market_anchor_corrections.json",
)
def _corrections_path() -> str:
return os.environ.get(
"MARKET_ANCHOR_CORRECTIONS_PATH", _DEFAULT_CORRECTIONS_PATH
)
_corrections_lock = threading.Lock()
_corrections_cache: Optional[Dict[str, Any]] = None
_corrections_ts: float = 0.0
# Re-check sources at most every 10 minutes: the self-correction cron writes a
# new table to app_settings; running engines pick it up WITHOUT a restart.
_CORRECTIONS_TTL_S = 600.0
def _parse_corrections(raw: Dict[str, Any]) -> Optional[Dict[str, Any]]:
parsed_table: Dict[str, Any] = {}
for key in ("ms_home", "ms_away"):
bands = raw.get("corrections", {}).get(key)
if not (isinstance(bands, list) and bands):
continue
parsed = []
for b in bands:
lo = float(b["lo"]); hi = float(b["hi"]); delta = float(b["delta"])
if not (0.0 <= lo < hi <= 1.01) or abs(delta) > 0.10:
raise ValueError(f"correction band out of range: {b}")
parsed.append((lo, hi, delta))
parsed_table[key] = tuple(parsed)
if not parsed_table:
return None
parsed_table["version"] = str(raw.get("version", "?"))
return parsed_table
def _db_corrections_raw() -> Optional[Dict[str, Any]]:
"""Fetch the correction artifact from app_settings (the deployment's shared
medium — the ai-engine container has no volume mounts, so a host-side cron
can only reach the running engine through the database). Guarded: any
failure → None, never breaks a prediction. Disable with MARKET_ANCHOR_DB=0."""
if os.environ.get("MARKET_ANCHOR_DB", "1") == "0":
return None
try:
import psycopg2 # local import: keeps module usable without DB deps
from data.db import get_clean_dsn
with psycopg2.connect(get_clean_dsn(), connect_timeout=3) as conn:
with conn.cursor() as cur:
cur.execute(
"SELECT value FROM app_settings"
" WHERE key = 'market_anchor_corrections'"
)
row = cur.fetchone()
if row and row[0]:
return json.loads(row[0])
except Exception:
return None
return None
def _load_corrections() -> Optional[Dict[str, Any]]:
"""Resolve the active correction table (thread-safe, TTL-cached).
Source order:
1. MARKET_ANCHOR_CORRECTIONS_PATH env file (tests/dev — file-only mode,
malformed → static fallback, DB and default file are NOT consulted)
2. app_settings DB row 'market_anchor_corrections' (production path —
refreshed by scripts/fit_anchor_corrections.py)
3. bundled config/market_anchor_corrections.json
4. None → built-in static fallback bands
"""
global _corrections_cache, _corrections_ts
now = time.time()
if now - _corrections_ts < _CORRECTIONS_TTL_S:
return _corrections_cache
with _corrections_lock:
if now - _corrections_ts < _CORRECTIONS_TTL_S:
return _corrections_cache
table: Optional[Dict[str, Any]] = None
env_path = os.environ.get("MARKET_ANCHOR_CORRECTIONS_PATH")
if env_path:
try:
with open(env_path, "r", encoding="utf-8") as fh:
table = _parse_corrections(json.load(fh))
except (OSError, ValueError, KeyError, TypeError, json.JSONDecodeError):
table = None
else:
raw = _db_corrections_raw()
if raw is not None:
try:
table = _parse_corrections(raw)
except (ValueError, KeyError, TypeError):
table = None
if table is None:
try:
with open(_corrections_path(), "r", encoding="utf-8") as fh:
table = _parse_corrections(json.load(fh))
except (OSError, ValueError, KeyError, TypeError, json.JSONDecodeError):
table = None
_corrections_cache = table
_corrections_ts = time.time()
return _corrections_cache
def reload_corrections() -> None:
"""Force re-read of the correction sources (used after a refresh/tests)."""
global _corrections_ts, _corrections_cache
with _corrections_lock:
_corrections_ts = 0.0
_corrections_cache = None
def home_favorite_delta(p_home: float) -> float:
"""Additive correction to the de-vigged home-win probability.
Band semantics: a fitted-artifact band OVERRIDES the static prior where it
exists (including an explicit delta of 0 — evidence of "no bias"). Where
the artifact is SILENT (a range that never passed the min-sample gate,
e.g. big favourites 0.75+), the static prior still applies — missing
evidence must not silently erase proven knowledge."""
table = _load_corrections()
if table and "ms_home" in table:
for lo, hi, delta in table["ms_home"]:
if lo <= p_home < hi:
return delta
for lo, hi, delta in _HOME_FAV_BANDS:
if lo <= p_home < hi:
return delta
return 0.0
def away_favorite_delta(p_away: float) -> float:
"""Additive correction to the de-vigged away-win probability.
Scoreboard measurement (2026-06): away favourites also win a few points
MORE than the de-vigged price implies (+2.6..+4.2pt). Unlike the home
side there is NO built-in fallback — away corrections must be EARNED via
the fitted artifact (scripts/fit_anchor_corrections.py passing its
out-of-sample acceptance gate). No artifact → zero → prior behaviour."""
table = _load_corrections()
bands = table.get("ms_away", ()) if table else ()
for lo, hi, delta in bands:
if lo <= p_away < hi:
return delta
return 0.0
def apply_corrections(
p1: float, px: float, p2: float
) -> Tuple[float, float, float]:
"""Apply favourite corrections to a 3-way (1, X, 2) vector.
In practice only one side can be a favourite (both ≥0.45 would leave no
room for the draw); if both bands somehow fire, the larger delta wins.
The other two outcomes are renormalised so the vector still sums to 1."""
d1 = home_favorite_delta(p1)
d2 = away_favorite_delta(p2)
if d1 <= 0.0 and d2 <= 0.0:
return p1, px, p2
if d1 >= d2:
return apply_home_correction(p1, px, p2)
p2n = min(0.98, p2 + d2)
remaining = 1.0 - p2n
rest = p1 + px
if rest <= 0.0:
return p1, px, p2n
return p1 / rest * remaining, px / rest * remaining, p2n
def apply_home_correction(
p1: float, px: float, p2: float
) -> Tuple[float, float, float]:
"""Apply the home-favourite delta to a 3-way (1, X, 2) probability vector,
renormalising draw/away so the three still sum to 1.0."""
delta = home_favorite_delta(p1)
if delta <= 0.0:
return p1, px, p2
p1n = min(0.98, p1 + delta)
remaining = 1.0 - p1n
rest = px + p2
if rest <= 0.0:
return p1n, px, p2
return p1n, px / rest * remaining, p2 / rest * remaining