"""Live-conditioned score projection (V38) — pure functions, no I/O. Answers, DURING a match, questions like "1-0 at 80' — what is the REAL probability the away team still scores?" by conditioning the same calibrated market-anchored lambdas (V35/V36) on the current score and minute. Mechanics — a minute-stepped Markov chain over remaining goals: 1. Pre-match lambdas come from the SAME source the score card uses (de-vigged 1X2 + over2.5, models/score_matrix solvers) — one consistent probability spine pre-match and in-play. 2. Each remaining minute contributes lambda_side x minute_share(t) goals, where minute_share is the EMPIRICAL goal-time intensity curve measured on 38,779 clean-timeline real-odds matches (1H share 44.4%, late-game intensity rises, stoppage spikes at 45' and 90+'). 3. Each minute's intensity is scaled by the MEASURED score-state multiplier: trailing teams push (+9%, +17% after 70'), leading teams shut up shop (-5%/-7%), 2+ ahead opens up. The chain updates the state as virtual goals happen, so multipliers switch mid-projection exactly like they do on the pitch. All constants are fitted on the train window (matches older than the last 90 days); the held-out window validates calibration out-of-sample before any of this reaches the screen. """ from __future__ import annotations from typing import Dict, List, Optional, Tuple from models.score_matrix import split_lambdas, total_lambda_from_over25 MAX_MINUTE = 94 # 90 + folded stoppage LATE_PHASE_FROM = 70 # measured multipliers switch here MAX_EXTRA_GOALS = 7 # per side, absorbing cap for the chain # Empirical goal-time intensity: share of a match's goals per 5-min bucket # (0-5, ..., 90-94+). Measured on 105k goals; 45' and 90+' buckets carry the # folded stoppage-time spikes. INTENSITY_SHARES: Tuple[float, ...] = ( 0.036, 0.045, 0.047, 0.047, 0.045, 0.046, 0.048, 0.049, 0.081, 0.048, 0.057, 0.055, 0.054, 0.053, 0.052, 0.053, 0.052, 0.056, 0.076, ) # Score-state goal-intensity multipliers, measured (actual/expected) by the # scoring side's goal difference, split early (<70') / late (>=70'). _STATE_MULT_EARLY: Dict[int, float] = {-2: 1.095, -1: 1.045, 0: 0.966, 1: 0.952, 2: 1.011} _STATE_MULT_LATE: Dict[int, float] = {-2: 1.123, -1: 1.174, 0: 1.015, 1: 0.930, 2: 1.011} def _minute_share(minute: int) -> float: """Per-minute share of match-total goal intensity at `minute` (1-based).""" b = min(len(INTENSITY_SHARES) - 1, max(0, (minute - 1) // 5)) return INTENSITY_SHARES[b] / 5.0 def state_multiplier(diff: int, minute: int) -> float: """Intensity multiplier for a side whose current goal difference is `diff` (own − opponent), at `minute`.""" d = max(-2, min(2, diff)) table = _STATE_MULT_LATE if minute >= LATE_PHASE_FROM else _STATE_MULT_EARLY return table[d] def estimate_minute(match_date_ms: Optional[int], now_ms: int) -> Optional[int]: """Approximate current match minute from kickoff time (no feed minute is available: live_matches.substate carries none). Folds the ~15' half-time break; accuracy is ±2-3 minutes which barely moves the projection.""" if not match_date_ms: return None elapsed = (now_ms - int(match_date_ms)) / 60000.0 if elapsed < 0: return None if elapsed <= 48: # first half (+stoppage) minute = elapsed elif elapsed <= 63: # half-time break window minute = 46 else: minute = elapsed - 15.0 # second half, break folded out return int(max(1, min(MAX_MINUTE, minute))) def _chain( lam_h: float, lam_a: float, cur_h: int, cur_a: int, minute: int, ) -> Dict[Tuple[int, int], float]: """Distribution over (extra home goals, extra away goals) from `minute` to full time, with state-dependent intensities.""" dist: Dict[Tuple[int, int], float] = {(0, 0): 1.0} for t in range(minute, MAX_MINUTE + 1): share = _minute_share(t) nxt: Dict[Tuple[int, int], float] = {} for (eh, ea), p in dist.items(): diff = (cur_h + eh) - (cur_a + ea) ph = lam_h * share * state_multiplier(diff, t) pa = lam_a * share * state_multiplier(-diff, t) ph = min(ph, 0.30); pa = min(pa, 0.30) stay = max(0.0, 1.0 - ph - pa) nxt[(eh, ea)] = nxt.get((eh, ea), 0.0) + p * stay if eh < MAX_EXTRA_GOALS: nxt[(eh + 1, ea)] = nxt.get((eh + 1, ea), 0.0) + p * ph else: nxt[(eh, ea)] = nxt.get((eh, ea), 0.0) + p * ph if ea < MAX_EXTRA_GOALS: nxt[(eh, ea + 1)] = nxt.get((eh, ea + 1), 0.0) + p * pa else: nxt[(eh, ea)] = nxt.get((eh, ea), 0.0) + p * pa dist = nxt return dist def build_live_projection( p1: float, px: float, p2: float, p_over25: float, cur_h: int, cur_a: int, minute: int, ) -> Dict[str, object]: """Live projection from the anchored pre-match probabilities + the pitch state. Returns honest, score/minute-aware probabilities. (p1, px, p2) and p_over25 are the CALIBRATED (V35-anchored) numbers; the same spine the pre-match cards display. """ minute = int(max(1, min(MAX_MINUTE, minute))) cur_h = max(0, int(cur_h)); cur_a = max(0, int(cur_a)) total = total_lambda_from_over25(p_over25) lam_h, lam_a = split_lambdas(total, p1, p2) dist = _chain(lam_h, lam_a, cur_h, cur_a, minute) p_home_win = p_draw = p_away_win = 0.0 p_home_scores = p_away_scores = 0.0 exp_goals = 0.0 scores: Dict[str, float] = {} for (eh, ea), p in dist.items(): fh, fa = cur_h + eh, cur_a + ea if fh > fa: p_home_win += p elif fh == fa: p_draw += p else: p_away_win += p if eh > 0: p_home_scores += p if ea > 0: p_away_scores += p exp_goals += p * (eh + ea) key = f"{min(fh,9)}-{min(fa,9)}" scores[key] = scores.get(key, 0.0) + p top = sorted(scores.items(), key=lambda kv: kv[1], reverse=True)[:5] total_now = cur_h + cur_a p_over25_live = sum( p for (eh, ea), p in dist.items() if total_now + eh + ea >= 3 ) # "comeback": the side currently behind at least draws / currently level # match does NOT stay level if cur_h > cur_a: p_comeback = p_draw + p_away_win elif cur_a > cur_h: p_comeback = p_draw + p_home_win else: p_comeback = p_home_win + p_away_win # deadlock breaks return { "minute": minute, "current_score": f"{cur_h}-{cur_a}", "probs": { "1": round(p_home_win, 4), "X": round(p_draw, 4), "2": round(p_away_win, 4), }, "p_home_scores_again": round(p_home_scores, 4), "p_away_scores_again": round(p_away_scores, 4), "p_comeback": round(p_comeback, 4), "p_over25": round(p_over25_live, 4), "expected_remaining_goals": round(exp_goals, 2), "scenario_top5": [ {"score": s, "prob": round(p, 4)} for s, p in top ], "calibration_source": "live_matrix_v38", }