185 lines
7.1 KiB
Python
185 lines
7.1 KiB
Python
"""Live-conditioned score projection (V38) — pure functions, no I/O.
|
||
|
||
Answers, DURING a match, questions like "1-0 at 80' — what is the REAL
|
||
probability the away team still scores?" by conditioning the same calibrated
|
||
market-anchored lambdas (V35/V36) on the current score and minute.
|
||
|
||
Mechanics — a minute-stepped Markov chain over remaining goals:
|
||
|
||
1. Pre-match lambdas come from the SAME source the score card uses
|
||
(de-vigged 1X2 + over2.5, models/score_matrix solvers) — one consistent
|
||
probability spine pre-match and in-play.
|
||
2. Each remaining minute contributes lambda_side x minute_share(t) goals,
|
||
where minute_share is the EMPIRICAL goal-time intensity curve measured
|
||
on 38,779 clean-timeline real-odds matches (1H share 44.4%, late-game
|
||
intensity rises, stoppage spikes at 45' and 90+').
|
||
3. Each minute's intensity is scaled by the MEASURED score-state
|
||
multiplier: trailing teams push (+9%, +17% after 70'), leading teams
|
||
shut up shop (-5%/-7%), 2+ ahead opens up. The chain updates the state
|
||
as virtual goals happen, so multipliers switch mid-projection exactly
|
||
like they do on the pitch.
|
||
|
||
All constants are fitted on the train window (matches older than the last 90
|
||
days); the held-out window validates calibration out-of-sample before any of
|
||
this reaches the screen.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from typing import Dict, List, Optional, Tuple
|
||
|
||
from models.score_matrix import split_lambdas, total_lambda_from_over25
|
||
|
||
MAX_MINUTE = 94 # 90 + folded stoppage
|
||
LATE_PHASE_FROM = 70 # measured multipliers switch here
|
||
MAX_EXTRA_GOALS = 7 # per side, absorbing cap for the chain
|
||
|
||
# Empirical goal-time intensity: share of a match's goals per 5-min bucket
|
||
# (0-5, ..., 90-94+). Measured on 105k goals; 45' and 90+' buckets carry the
|
||
# folded stoppage-time spikes.
|
||
INTENSITY_SHARES: Tuple[float, ...] = (
|
||
0.036, 0.045, 0.047, 0.047, 0.045, 0.046, 0.048, 0.049, 0.081,
|
||
0.048, 0.057, 0.055, 0.054, 0.053, 0.052, 0.053, 0.052, 0.056, 0.076,
|
||
)
|
||
|
||
# Score-state goal-intensity multipliers, measured (actual/expected) by the
|
||
# scoring side's goal difference, split early (<70') / late (>=70').
|
||
_STATE_MULT_EARLY: Dict[int, float] = {-2: 1.095, -1: 1.045, 0: 0.966, 1: 0.952, 2: 1.011}
|
||
_STATE_MULT_LATE: Dict[int, float] = {-2: 1.123, -1: 1.174, 0: 1.015, 1: 0.930, 2: 1.011}
|
||
|
||
|
||
def _minute_share(minute: int) -> float:
|
||
"""Per-minute share of match-total goal intensity at `minute` (1-based)."""
|
||
b = min(len(INTENSITY_SHARES) - 1, max(0, (minute - 1) // 5))
|
||
return INTENSITY_SHARES[b] / 5.0
|
||
|
||
|
||
def state_multiplier(diff: int, minute: int) -> float:
|
||
"""Intensity multiplier for a side whose current goal difference is
|
||
`diff` (own − opponent), at `minute`."""
|
||
d = max(-2, min(2, diff))
|
||
table = _STATE_MULT_LATE if minute >= LATE_PHASE_FROM else _STATE_MULT_EARLY
|
||
return table[d]
|
||
|
||
|
||
def estimate_minute(match_date_ms: Optional[int], now_ms: int) -> Optional[int]:
|
||
"""Approximate current match minute from kickoff time (no feed minute is
|
||
available: live_matches.substate carries none). Folds the ~15' half-time
|
||
break; accuracy is ±2-3 minutes which barely moves the projection."""
|
||
if not match_date_ms:
|
||
return None
|
||
elapsed = (now_ms - int(match_date_ms)) / 60000.0
|
||
if elapsed < 0:
|
||
return None
|
||
if elapsed <= 48: # first half (+stoppage)
|
||
minute = elapsed
|
||
elif elapsed <= 63: # half-time break window
|
||
minute = 46
|
||
else:
|
||
minute = elapsed - 15.0 # second half, break folded out
|
||
return int(max(1, min(MAX_MINUTE, minute)))
|
||
|
||
|
||
def _chain(
|
||
lam_h: float,
|
||
lam_a: float,
|
||
cur_h: int,
|
||
cur_a: int,
|
||
minute: int,
|
||
) -> Dict[Tuple[int, int], float]:
|
||
"""Distribution over (extra home goals, extra away goals) from `minute`
|
||
to full time, with state-dependent intensities."""
|
||
dist: Dict[Tuple[int, int], float] = {(0, 0): 1.0}
|
||
for t in range(minute, MAX_MINUTE + 1):
|
||
share = _minute_share(t)
|
||
nxt: Dict[Tuple[int, int], float] = {}
|
||
for (eh, ea), p in dist.items():
|
||
diff = (cur_h + eh) - (cur_a + ea)
|
||
ph = lam_h * share * state_multiplier(diff, t)
|
||
pa = lam_a * share * state_multiplier(-diff, t)
|
||
ph = min(ph, 0.30); pa = min(pa, 0.30)
|
||
stay = max(0.0, 1.0 - ph - pa)
|
||
nxt[(eh, ea)] = nxt.get((eh, ea), 0.0) + p * stay
|
||
if eh < MAX_EXTRA_GOALS:
|
||
nxt[(eh + 1, ea)] = nxt.get((eh + 1, ea), 0.0) + p * ph
|
||
else:
|
||
nxt[(eh, ea)] = nxt.get((eh, ea), 0.0) + p * ph
|
||
if ea < MAX_EXTRA_GOALS:
|
||
nxt[(eh, ea + 1)] = nxt.get((eh, ea + 1), 0.0) + p * pa
|
||
else:
|
||
nxt[(eh, ea)] = nxt.get((eh, ea), 0.0) + p * pa
|
||
dist = nxt
|
||
return dist
|
||
|
||
|
||
def build_live_projection(
|
||
p1: float,
|
||
px: float,
|
||
p2: float,
|
||
p_over25: float,
|
||
cur_h: int,
|
||
cur_a: int,
|
||
minute: int,
|
||
) -> Dict[str, object]:
|
||
"""Live projection from the anchored pre-match probabilities + the pitch
|
||
state. Returns honest, score/minute-aware probabilities.
|
||
|
||
(p1, px, p2) and p_over25 are the CALIBRATED (V35-anchored) numbers; the
|
||
same spine the pre-match cards display.
|
||
"""
|
||
minute = int(max(1, min(MAX_MINUTE, minute)))
|
||
cur_h = max(0, int(cur_h)); cur_a = max(0, int(cur_a))
|
||
total = total_lambda_from_over25(p_over25)
|
||
lam_h, lam_a = split_lambdas(total, p1, p2)
|
||
|
||
dist = _chain(lam_h, lam_a, cur_h, cur_a, minute)
|
||
|
||
p_home_win = p_draw = p_away_win = 0.0
|
||
p_home_scores = p_away_scores = 0.0
|
||
exp_goals = 0.0
|
||
scores: Dict[str, float] = {}
|
||
for (eh, ea), p in dist.items():
|
||
fh, fa = cur_h + eh, cur_a + ea
|
||
if fh > fa: p_home_win += p
|
||
elif fh == fa: p_draw += p
|
||
else: p_away_win += p
|
||
if eh > 0: p_home_scores += p
|
||
if ea > 0: p_away_scores += p
|
||
exp_goals += p * (eh + ea)
|
||
key = f"{min(fh,9)}-{min(fa,9)}"
|
||
scores[key] = scores.get(key, 0.0) + p
|
||
|
||
top = sorted(scores.items(), key=lambda kv: kv[1], reverse=True)[:5]
|
||
total_now = cur_h + cur_a
|
||
p_over25_live = sum(
|
||
p for (eh, ea), p in dist.items() if total_now + eh + ea >= 3
|
||
)
|
||
|
||
# "comeback": the side currently behind at least draws / currently level
|
||
# match does NOT stay level
|
||
if cur_h > cur_a:
|
||
p_comeback = p_draw + p_away_win
|
||
elif cur_a > cur_h:
|
||
p_comeback = p_draw + p_home_win
|
||
else:
|
||
p_comeback = p_home_win + p_away_win # deadlock breaks
|
||
|
||
return {
|
||
"minute": minute,
|
||
"current_score": f"{cur_h}-{cur_a}",
|
||
"probs": {
|
||
"1": round(p_home_win, 4),
|
||
"X": round(p_draw, 4),
|
||
"2": round(p_away_win, 4),
|
||
},
|
||
"p_home_scores_again": round(p_home_scores, 4),
|
||
"p_away_scores_again": round(p_away_scores, 4),
|
||
"p_comeback": round(p_comeback, 4),
|
||
"p_over25": round(p_over25_live, 4),
|
||
"expected_remaining_goals": round(exp_goals, 2),
|
||
"scenario_top5": [
|
||
{"score": s, "prob": round(p, 4)} for s, p in top
|
||
],
|
||
"calibration_source": "live_matrix_v38",
|
||
}
|