@@ -0,0 +1,184 @@
|
||||
"""Live-conditioned score projection (V38) — pure functions, no I/O.
|
||||
|
||||
Answers, DURING a match, questions like "1-0 at 80' — what is the REAL
|
||||
probability the away team still scores?" by conditioning the same calibrated
|
||||
market-anchored lambdas (V35/V36) on the current score and minute.
|
||||
|
||||
Mechanics — a minute-stepped Markov chain over remaining goals:
|
||||
|
||||
1. Pre-match lambdas come from the SAME source the score card uses
|
||||
(de-vigged 1X2 + over2.5, models/score_matrix solvers) — one consistent
|
||||
probability spine pre-match and in-play.
|
||||
2. Each remaining minute contributes lambda_side x minute_share(t) goals,
|
||||
where minute_share is the EMPIRICAL goal-time intensity curve measured
|
||||
on 38,779 clean-timeline real-odds matches (1H share 44.4%, late-game
|
||||
intensity rises, stoppage spikes at 45' and 90+').
|
||||
3. Each minute's intensity is scaled by the MEASURED score-state
|
||||
multiplier: trailing teams push (+9%, +17% after 70'), leading teams
|
||||
shut up shop (-5%/-7%), 2+ ahead opens up. The chain updates the state
|
||||
as virtual goals happen, so multipliers switch mid-projection exactly
|
||||
like they do on the pitch.
|
||||
|
||||
All constants are fitted on the train window (matches older than the last 90
|
||||
days); the held-out window validates calibration out-of-sample before any of
|
||||
this reaches the screen.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
from models.score_matrix import split_lambdas, total_lambda_from_over25
|
||||
|
||||
MAX_MINUTE = 94 # 90 + folded stoppage
|
||||
LATE_PHASE_FROM = 70 # measured multipliers switch here
|
||||
MAX_EXTRA_GOALS = 7 # per side, absorbing cap for the chain
|
||||
|
||||
# Empirical goal-time intensity: share of a match's goals per 5-min bucket
|
||||
# (0-5, ..., 90-94+). Measured on 105k goals; 45' and 90+' buckets carry the
|
||||
# folded stoppage-time spikes.
|
||||
INTENSITY_SHARES: Tuple[float, ...] = (
|
||||
0.036, 0.045, 0.047, 0.047, 0.045, 0.046, 0.048, 0.049, 0.081,
|
||||
0.048, 0.057, 0.055, 0.054, 0.053, 0.052, 0.053, 0.052, 0.056, 0.076,
|
||||
)
|
||||
|
||||
# Score-state goal-intensity multipliers, measured (actual/expected) by the
|
||||
# scoring side's goal difference, split early (<70') / late (>=70').
|
||||
_STATE_MULT_EARLY: Dict[int, float] = {-2: 1.095, -1: 1.045, 0: 0.966, 1: 0.952, 2: 1.011}
|
||||
_STATE_MULT_LATE: Dict[int, float] = {-2: 1.123, -1: 1.174, 0: 1.015, 1: 0.930, 2: 1.011}
|
||||
|
||||
|
||||
def _minute_share(minute: int) -> float:
|
||||
"""Per-minute share of match-total goal intensity at `minute` (1-based)."""
|
||||
b = min(len(INTENSITY_SHARES) - 1, max(0, (minute - 1) // 5))
|
||||
return INTENSITY_SHARES[b] / 5.0
|
||||
|
||||
|
||||
def state_multiplier(diff: int, minute: int) -> float:
|
||||
"""Intensity multiplier for a side whose current goal difference is
|
||||
`diff` (own − opponent), at `minute`."""
|
||||
d = max(-2, min(2, diff))
|
||||
table = _STATE_MULT_LATE if minute >= LATE_PHASE_FROM else _STATE_MULT_EARLY
|
||||
return table[d]
|
||||
|
||||
|
||||
def estimate_minute(match_date_ms: Optional[int], now_ms: int) -> Optional[int]:
|
||||
"""Approximate current match minute from kickoff time (no feed minute is
|
||||
available: live_matches.substate carries none). Folds the ~15' half-time
|
||||
break; accuracy is ±2-3 minutes which barely moves the projection."""
|
||||
if not match_date_ms:
|
||||
return None
|
||||
elapsed = (now_ms - int(match_date_ms)) / 60000.0
|
||||
if elapsed < 0:
|
||||
return None
|
||||
if elapsed <= 48: # first half (+stoppage)
|
||||
minute = elapsed
|
||||
elif elapsed <= 63: # half-time break window
|
||||
minute = 46
|
||||
else:
|
||||
minute = elapsed - 15.0 # second half, break folded out
|
||||
return int(max(1, min(MAX_MINUTE, minute)))
|
||||
|
||||
|
||||
def _chain(
|
||||
lam_h: float,
|
||||
lam_a: float,
|
||||
cur_h: int,
|
||||
cur_a: int,
|
||||
minute: int,
|
||||
) -> Dict[Tuple[int, int], float]:
|
||||
"""Distribution over (extra home goals, extra away goals) from `minute`
|
||||
to full time, with state-dependent intensities."""
|
||||
dist: Dict[Tuple[int, int], float] = {(0, 0): 1.0}
|
||||
for t in range(minute, MAX_MINUTE + 1):
|
||||
share = _minute_share(t)
|
||||
nxt: Dict[Tuple[int, int], float] = {}
|
||||
for (eh, ea), p in dist.items():
|
||||
diff = (cur_h + eh) - (cur_a + ea)
|
||||
ph = lam_h * share * state_multiplier(diff, t)
|
||||
pa = lam_a * share * state_multiplier(-diff, t)
|
||||
ph = min(ph, 0.30); pa = min(pa, 0.30)
|
||||
stay = max(0.0, 1.0 - ph - pa)
|
||||
nxt[(eh, ea)] = nxt.get((eh, ea), 0.0) + p * stay
|
||||
if eh < MAX_EXTRA_GOALS:
|
||||
nxt[(eh + 1, ea)] = nxt.get((eh + 1, ea), 0.0) + p * ph
|
||||
else:
|
||||
nxt[(eh, ea)] = nxt.get((eh, ea), 0.0) + p * ph
|
||||
if ea < MAX_EXTRA_GOALS:
|
||||
nxt[(eh, ea + 1)] = nxt.get((eh, ea + 1), 0.0) + p * pa
|
||||
else:
|
||||
nxt[(eh, ea)] = nxt.get((eh, ea), 0.0) + p * pa
|
||||
dist = nxt
|
||||
return dist
|
||||
|
||||
|
||||
def build_live_projection(
|
||||
p1: float,
|
||||
px: float,
|
||||
p2: float,
|
||||
p_over25: float,
|
||||
cur_h: int,
|
||||
cur_a: int,
|
||||
minute: int,
|
||||
) -> Dict[str, object]:
|
||||
"""Live projection from the anchored pre-match probabilities + the pitch
|
||||
state. Returns honest, score/minute-aware probabilities.
|
||||
|
||||
(p1, px, p2) and p_over25 are the CALIBRATED (V35-anchored) numbers; the
|
||||
same spine the pre-match cards display.
|
||||
"""
|
||||
minute = int(max(1, min(MAX_MINUTE, minute)))
|
||||
cur_h = max(0, int(cur_h)); cur_a = max(0, int(cur_a))
|
||||
total = total_lambda_from_over25(p_over25)
|
||||
lam_h, lam_a = split_lambdas(total, p1, p2)
|
||||
|
||||
dist = _chain(lam_h, lam_a, cur_h, cur_a, minute)
|
||||
|
||||
p_home_win = p_draw = p_away_win = 0.0
|
||||
p_home_scores = p_away_scores = 0.0
|
||||
exp_goals = 0.0
|
||||
scores: Dict[str, float] = {}
|
||||
for (eh, ea), p in dist.items():
|
||||
fh, fa = cur_h + eh, cur_a + ea
|
||||
if fh > fa: p_home_win += p
|
||||
elif fh == fa: p_draw += p
|
||||
else: p_away_win += p
|
||||
if eh > 0: p_home_scores += p
|
||||
if ea > 0: p_away_scores += p
|
||||
exp_goals += p * (eh + ea)
|
||||
key = f"{min(fh,9)}-{min(fa,9)}"
|
||||
scores[key] = scores.get(key, 0.0) + p
|
||||
|
||||
top = sorted(scores.items(), key=lambda kv: kv[1], reverse=True)[:5]
|
||||
total_now = cur_h + cur_a
|
||||
p_over25_live = sum(
|
||||
p for (eh, ea), p in dist.items() if total_now + eh + ea >= 3
|
||||
)
|
||||
|
||||
# "comeback": the side currently behind at least draws / currently level
|
||||
# match does NOT stay level
|
||||
if cur_h > cur_a:
|
||||
p_comeback = p_draw + p_away_win
|
||||
elif cur_a > cur_h:
|
||||
p_comeback = p_draw + p_home_win
|
||||
else:
|
||||
p_comeback = p_home_win + p_away_win # deadlock breaks
|
||||
|
||||
return {
|
||||
"minute": minute,
|
||||
"current_score": f"{cur_h}-{cur_a}",
|
||||
"probs": {
|
||||
"1": round(p_home_win, 4),
|
||||
"X": round(p_draw, 4),
|
||||
"2": round(p_away_win, 4),
|
||||
},
|
||||
"p_home_scores_again": round(p_home_scores, 4),
|
||||
"p_away_scores_again": round(p_away_scores, 4),
|
||||
"p_comeback": round(p_comeback, 4),
|
||||
"p_over25": round(p_over25_live, 4),
|
||||
"expected_remaining_goals": round(exp_goals, 2),
|
||||
"scenario_top5": [
|
||||
{"score": s, "prob": round(p, 4)} for s, p in top
|
||||
],
|
||||
"calibration_source": "live_matrix_v38",
|
||||
}
|
||||
@@ -25,7 +25,11 @@ without the DB or the heavy model stack.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import List, Optional, Tuple
|
||||
import json
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
|
||||
def devig(odds: List[Optional[float]]) -> Optional[List[float]]:
|
||||
@@ -53,6 +57,13 @@ def devig(odds: List[Optional[float]]) -> Optional[List[float]]:
|
||||
# unbiased. Values are deliberately conservative — universal and shrunk toward 0
|
||||
# vs the raw tier-0 (soft-league) edge, because the bias is weaker in efficient
|
||||
# top leagues. Applying these took MS-home OOS ECE 1.56% -> 0.64%.
|
||||
#
|
||||
# These static bands are the BUILT-IN FALLBACK. The live values come from the
|
||||
# versioned artifact `config/market_anchor_corrections.json`, refreshed by
|
||||
# `scripts/fit_anchor_corrections.py` (the guarded self-correction loop:
|
||||
# measure on settled matches -> shrink/clip/min-sample gates -> out-of-sample
|
||||
# acceptance -> write table). The engine only ever consumes the TABLE — the
|
||||
# loop never modifies code.
|
||||
_HOME_FAV_BANDS: Tuple[Tuple[float, float, float], ...] = (
|
||||
(0.45, 0.55, 0.010),
|
||||
(0.55, 0.65, 0.018),
|
||||
@@ -60,17 +71,177 @@ _HOME_FAV_BANDS: Tuple[Tuple[float, float, float], ...] = (
|
||||
(0.75, 1.01, 0.034),
|
||||
)
|
||||
|
||||
_DEFAULT_CORRECTIONS_PATH = os.path.join(
|
||||
os.path.dirname(os.path.abspath(__file__)), "..", "config",
|
||||
"market_anchor_corrections.json",
|
||||
)
|
||||
|
||||
|
||||
def _corrections_path() -> str:
|
||||
return os.environ.get(
|
||||
"MARKET_ANCHOR_CORRECTIONS_PATH", _DEFAULT_CORRECTIONS_PATH
|
||||
)
|
||||
_corrections_lock = threading.Lock()
|
||||
_corrections_cache: Optional[Dict[str, Any]] = None
|
||||
_corrections_ts: float = 0.0
|
||||
# Re-check sources at most every 10 minutes: the self-correction cron writes a
|
||||
# new table to app_settings; running engines pick it up WITHOUT a restart.
|
||||
_CORRECTIONS_TTL_S = 600.0
|
||||
|
||||
|
||||
def _parse_corrections(raw: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
||||
parsed_table: Dict[str, Any] = {}
|
||||
for key in ("ms_home", "ms_away"):
|
||||
bands = raw.get("corrections", {}).get(key)
|
||||
if not (isinstance(bands, list) and bands):
|
||||
continue
|
||||
parsed = []
|
||||
for b in bands:
|
||||
lo = float(b["lo"]); hi = float(b["hi"]); delta = float(b["delta"])
|
||||
if not (0.0 <= lo < hi <= 1.01) or abs(delta) > 0.10:
|
||||
raise ValueError(f"correction band out of range: {b}")
|
||||
parsed.append((lo, hi, delta))
|
||||
parsed_table[key] = tuple(parsed)
|
||||
if not parsed_table:
|
||||
return None
|
||||
parsed_table["version"] = str(raw.get("version", "?"))
|
||||
return parsed_table
|
||||
|
||||
|
||||
def _db_corrections_raw() -> Optional[Dict[str, Any]]:
|
||||
"""Fetch the correction artifact from app_settings (the deployment's shared
|
||||
medium — the ai-engine container has no volume mounts, so a host-side cron
|
||||
can only reach the running engine through the database). Guarded: any
|
||||
failure → None, never breaks a prediction. Disable with MARKET_ANCHOR_DB=0."""
|
||||
if os.environ.get("MARKET_ANCHOR_DB", "1") == "0":
|
||||
return None
|
||||
try:
|
||||
import psycopg2 # local import: keeps module usable without DB deps
|
||||
from data.db import get_clean_dsn
|
||||
|
||||
with psycopg2.connect(get_clean_dsn(), connect_timeout=3) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"SELECT value FROM app_settings"
|
||||
" WHERE key = 'market_anchor_corrections'"
|
||||
)
|
||||
row = cur.fetchone()
|
||||
if row and row[0]:
|
||||
return json.loads(row[0])
|
||||
except Exception:
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def _load_corrections() -> Optional[Dict[str, Any]]:
|
||||
"""Resolve the active correction table (thread-safe, TTL-cached).
|
||||
|
||||
Source order:
|
||||
1. MARKET_ANCHOR_CORRECTIONS_PATH env file (tests/dev — file-only mode,
|
||||
malformed → static fallback, DB and default file are NOT consulted)
|
||||
2. app_settings DB row 'market_anchor_corrections' (production path —
|
||||
refreshed by scripts/fit_anchor_corrections.py)
|
||||
3. bundled config/market_anchor_corrections.json
|
||||
4. None → built-in static fallback bands
|
||||
"""
|
||||
global _corrections_cache, _corrections_ts
|
||||
now = time.time()
|
||||
if now - _corrections_ts < _CORRECTIONS_TTL_S:
|
||||
return _corrections_cache
|
||||
with _corrections_lock:
|
||||
if now - _corrections_ts < _CORRECTIONS_TTL_S:
|
||||
return _corrections_cache
|
||||
table: Optional[Dict[str, Any]] = None
|
||||
env_path = os.environ.get("MARKET_ANCHOR_CORRECTIONS_PATH")
|
||||
if env_path:
|
||||
try:
|
||||
with open(env_path, "r", encoding="utf-8") as fh:
|
||||
table = _parse_corrections(json.load(fh))
|
||||
except (OSError, ValueError, KeyError, TypeError, json.JSONDecodeError):
|
||||
table = None
|
||||
else:
|
||||
raw = _db_corrections_raw()
|
||||
if raw is not None:
|
||||
try:
|
||||
table = _parse_corrections(raw)
|
||||
except (ValueError, KeyError, TypeError):
|
||||
table = None
|
||||
if table is None:
|
||||
try:
|
||||
with open(_corrections_path(), "r", encoding="utf-8") as fh:
|
||||
table = _parse_corrections(json.load(fh))
|
||||
except (OSError, ValueError, KeyError, TypeError, json.JSONDecodeError):
|
||||
table = None
|
||||
_corrections_cache = table
|
||||
_corrections_ts = time.time()
|
||||
return _corrections_cache
|
||||
|
||||
|
||||
def reload_corrections() -> None:
|
||||
"""Force re-read of the correction sources (used after a refresh/tests)."""
|
||||
global _corrections_ts, _corrections_cache
|
||||
with _corrections_lock:
|
||||
_corrections_ts = 0.0
|
||||
_corrections_cache = None
|
||||
|
||||
|
||||
def home_favorite_delta(p_home: float) -> float:
|
||||
"""Additive correction to the de-vigged home-win probability.
|
||||
|
||||
Zero below 0.45 (no measured bias for non-favourites)."""
|
||||
Band semantics: a fitted-artifact band OVERRIDES the static prior where it
|
||||
exists (including an explicit delta of 0 — evidence of "no bias"). Where
|
||||
the artifact is SILENT (a range that never passed the min-sample gate,
|
||||
e.g. big favourites 0.75+), the static prior still applies — missing
|
||||
evidence must not silently erase proven knowledge."""
|
||||
table = _load_corrections()
|
||||
if table and "ms_home" in table:
|
||||
for lo, hi, delta in table["ms_home"]:
|
||||
if lo <= p_home < hi:
|
||||
return delta
|
||||
for lo, hi, delta in _HOME_FAV_BANDS:
|
||||
if lo <= p_home < hi:
|
||||
return delta
|
||||
return 0.0
|
||||
|
||||
|
||||
def away_favorite_delta(p_away: float) -> float:
|
||||
"""Additive correction to the de-vigged away-win probability.
|
||||
|
||||
Scoreboard measurement (2026-06): away favourites also win a few points
|
||||
MORE than the de-vigged price implies (+2.6..+4.2pt). Unlike the home
|
||||
side there is NO built-in fallback — away corrections must be EARNED via
|
||||
the fitted artifact (scripts/fit_anchor_corrections.py passing its
|
||||
out-of-sample acceptance gate). No artifact → zero → prior behaviour."""
|
||||
table = _load_corrections()
|
||||
bands = table.get("ms_away", ()) if table else ()
|
||||
for lo, hi, delta in bands:
|
||||
if lo <= p_away < hi:
|
||||
return delta
|
||||
return 0.0
|
||||
|
||||
|
||||
def apply_corrections(
|
||||
p1: float, px: float, p2: float
|
||||
) -> Tuple[float, float, float]:
|
||||
"""Apply favourite corrections to a 3-way (1, X, 2) vector.
|
||||
|
||||
In practice only one side can be a favourite (both ≥0.45 would leave no
|
||||
room for the draw); if both bands somehow fire, the larger delta wins.
|
||||
The other two outcomes are renormalised so the vector still sums to 1."""
|
||||
d1 = home_favorite_delta(p1)
|
||||
d2 = away_favorite_delta(p2)
|
||||
if d1 <= 0.0 and d2 <= 0.0:
|
||||
return p1, px, p2
|
||||
if d1 >= d2:
|
||||
return apply_home_correction(p1, px, p2)
|
||||
p2n = min(0.98, p2 + d2)
|
||||
remaining = 1.0 - p2n
|
||||
rest = p1 + px
|
||||
if rest <= 0.0:
|
||||
return p1, px, p2n
|
||||
return p1 / rest * remaining, px / rest * remaining, p2n
|
||||
|
||||
|
||||
def apply_home_correction(
|
||||
p1: float, px: float, p2: float
|
||||
) -> Tuple[float, float, float]:
|
||||
|
||||
Reference in New Issue
Block a user