wow

2026-06-11 00:25:45 +03:00
parent bb911176df
commit 4c137fbab6
9 changed files with 1246 additions and 6 deletions
@@ -0,0 +1,184 @@
+"""Live-conditioned score projection (V38) — pure functions, no I/O.
+
+Answers, DURING a match, questions like "1-0 at 80' — what is the REAL
+probability the away team still scores?" by conditioning the same calibrated
+market-anchored lambdas (V35/V36) on the current score and minute.
+
+Mechanics — a minute-stepped Markov chain over remaining goals:
+
+  1. Pre-match lambdas come from the SAME source the score card uses
+     (de-vigged 1X2 + over2.5, models/score_matrix solvers) — one consistent
+     probability spine pre-match and in-play.
+  2. Each remaining minute contributes lambda_side x minute_share(t) goals,
+     where minute_share is the EMPIRICAL goal-time intensity curve measured
+     on 38,779 clean-timeline real-odds matches (1H share 44.4%, late-game
+     intensity rises, stoppage spikes at 45' and 90+').
+  3. Each minute's intensity is scaled by the MEASURED score-state
+     multiplier: trailing teams push (+9%, +17% after 70'), leading teams
+     shut up shop (-5%/-7%), 2+ ahead opens up. The chain updates the state
+     as virtual goals happen, so multipliers switch mid-projection exactly
+     like they do on the pitch.
+
+All constants are fitted on the train window (matches older than the last 90
+days); the held-out window validates calibration out-of-sample before any of
+this reaches the screen.
+"""
+
+from __future__ import annotations
+
+from typing import Dict, List, Optional, Tuple
+
+from models.score_matrix import split_lambdas, total_lambda_from_over25
+
+MAX_MINUTE = 94          # 90 + folded stoppage
+LATE_PHASE_FROM = 70     # measured multipliers switch here
+MAX_EXTRA_GOALS = 7      # per side, absorbing cap for the chain
+
+# Empirical goal-time intensity: share of a match's goals per 5-min bucket
+# (0-5, ..., 90-94+). Measured on 105k goals; 45' and 90+' buckets carry the
+# folded stoppage-time spikes.
+INTENSITY_SHARES: Tuple[float, ...] = (
+    0.036, 0.045, 0.047, 0.047, 0.045, 0.046, 0.048, 0.049, 0.081,
+    0.048, 0.057, 0.055, 0.054, 0.053, 0.052, 0.053, 0.052, 0.056, 0.076,
+)
+
+# Score-state goal-intensity multipliers, measured (actual/expected) by the
+# scoring side's goal difference, split early (<70') / late (>=70').
+_STATE_MULT_EARLY: Dict[int, float] = {-2: 1.095, -1: 1.045, 0: 0.966, 1: 0.952, 2: 1.011}
+_STATE_MULT_LATE: Dict[int, float] = {-2: 1.123, -1: 1.174, 0: 1.015, 1: 0.930, 2: 1.011}
+
+
+def _minute_share(minute: int) -> float:
+    """Per-minute share of match-total goal intensity at `minute` (1-based)."""
+    b = min(len(INTENSITY_SHARES) - 1, max(0, (minute - 1) // 5))
+    return INTENSITY_SHARES[b] / 5.0
+
+
+def state_multiplier(diff: int, minute: int) -> float:
+    """Intensity multiplier for a side whose current goal difference is
+    `diff` (own − opponent), at `minute`."""
+    d = max(-2, min(2, diff))
+    table = _STATE_MULT_LATE if minute >= LATE_PHASE_FROM else _STATE_MULT_EARLY
+    return table[d]
+
+
+def estimate_minute(match_date_ms: Optional[int], now_ms: int) -> Optional[int]:
+    """Approximate current match minute from kickoff time (no feed minute is
+    available: live_matches.substate carries none). Folds the ~15' half-time
+    break; accuracy is ±2-3 minutes which barely moves the projection."""
+    if not match_date_ms:
+        return None
+    elapsed = (now_ms - int(match_date_ms)) / 60000.0
+    if elapsed < 0:
+        return None
+    if elapsed <= 48:                      # first half (+stoppage)
+        minute = elapsed
+    elif elapsed <= 63:                    # half-time break window
+        minute = 46
+    else:
+        minute = elapsed - 15.0            # second half, break folded out
+    return int(max(1, min(MAX_MINUTE, minute)))
+
+
+def _chain(
+    lam_h: float,
+    lam_a: float,
+    cur_h: int,
+    cur_a: int,
+    minute: int,
+) -> Dict[Tuple[int, int], float]:
+    """Distribution over (extra home goals, extra away goals) from `minute`
+    to full time, with state-dependent intensities."""
+    dist: Dict[Tuple[int, int], float] = {(0, 0): 1.0}
+    for t in range(minute, MAX_MINUTE + 1):
+        share = _minute_share(t)
+        nxt: Dict[Tuple[int, int], float] = {}
+        for (eh, ea), p in dist.items():
+            diff = (cur_h + eh) - (cur_a + ea)
+            ph = lam_h * share * state_multiplier(diff, t)
+            pa = lam_a * share * state_multiplier(-diff, t)
+            ph = min(ph, 0.30); pa = min(pa, 0.30)
+            stay = max(0.0, 1.0 - ph - pa)
+            nxt[(eh, ea)] = nxt.get((eh, ea), 0.0) + p * stay
+            if eh < MAX_EXTRA_GOALS:
+                nxt[(eh + 1, ea)] = nxt.get((eh + 1, ea), 0.0) + p * ph
+            else:
+                nxt[(eh, ea)] = nxt.get((eh, ea), 0.0) + p * ph
+            if ea < MAX_EXTRA_GOALS:
+                nxt[(eh, ea + 1)] = nxt.get((eh, ea + 1), 0.0) + p * pa
+            else:
+                nxt[(eh, ea)] = nxt.get((eh, ea), 0.0) + p * pa
+        dist = nxt
+    return dist
+
+
+def build_live_projection(
+    p1: float,
+    px: float,
+    p2: float,
+    p_over25: float,
+    cur_h: int,
+    cur_a: int,
+    minute: int,
+) -> Dict[str, object]:
+    """Live projection from the anchored pre-match probabilities + the pitch
+    state. Returns honest, score/minute-aware probabilities.
+
+    (p1, px, p2) and p_over25 are the CALIBRATED (V35-anchored) numbers; the
+    same spine the pre-match cards display.
+    """
+    minute = int(max(1, min(MAX_MINUTE, minute)))
+    cur_h = max(0, int(cur_h)); cur_a = max(0, int(cur_a))
+    total = total_lambda_from_over25(p_over25)
+    lam_h, lam_a = split_lambdas(total, p1, p2)
+
+    dist = _chain(lam_h, lam_a, cur_h, cur_a, minute)
+
+    p_home_win = p_draw = p_away_win = 0.0
+    p_home_scores = p_away_scores = 0.0
+    exp_goals = 0.0
+    scores: Dict[str, float] = {}
+    for (eh, ea), p in dist.items():
+        fh, fa = cur_h + eh, cur_a + ea
+        if fh > fa: p_home_win += p
+        elif fh == fa: p_draw += p
+        else: p_away_win += p
+        if eh > 0: p_home_scores += p
+        if ea > 0: p_away_scores += p
+        exp_goals += p * (eh + ea)
+        key = f"{min(fh,9)}-{min(fa,9)}"
+        scores[key] = scores.get(key, 0.0) + p
+
+    top = sorted(scores.items(), key=lambda kv: kv[1], reverse=True)[:5]
+    total_now = cur_h + cur_a
+    p_over25_live = sum(
+        p for (eh, ea), p in dist.items() if total_now + eh + ea >= 3
+    )
+
+    # "comeback": the side currently behind at least draws / currently level
+    # match does NOT stay level
+    if cur_h > cur_a:
+        p_comeback = p_draw + p_away_win
+    elif cur_a > cur_h:
+        p_comeback = p_draw + p_home_win
+    else:
+        p_comeback = p_home_win + p_away_win  # deadlock breaks
+
+    return {
+        "minute": minute,
+        "current_score": f"{cur_h}-{cur_a}",
+        "probs": {
+            "1": round(p_home_win, 4),
+            "X": round(p_draw, 4),
+            "2": round(p_away_win, 4),
+        },
+        "p_home_scores_again": round(p_home_scores, 4),
+        "p_away_scores_again": round(p_away_scores, 4),
+        "p_comeback": round(p_comeback, 4),
+        "p_over25": round(p_over25_live, 4),
+        "expected_remaining_goals": round(exp_goals, 2),
+        "scenario_top5": [
+            {"score": s, "prob": round(p, 4)} for s, p in top
+        ],
+        "calibration_source": "live_matrix_v38",
+    }