Files
iddaai-be/ai-engine/models/live_matrix.py
T
fahricansecer 4c137fbab6
Deploy Iddaai Backend / build-and-deploy (push) Successful in 1m7s
wow
2026-06-11 00:25:45 +03:00

185 lines
7.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Live-conditioned score projection (V38) — pure functions, no I/O.
Answers, DURING a match, questions like "1-0 at 80' — what is the REAL
probability the away team still scores?" by conditioning the same calibrated
market-anchored lambdas (V35/V36) on the current score and minute.
Mechanics — a minute-stepped Markov chain over remaining goals:
1. Pre-match lambdas come from the SAME source the score card uses
(de-vigged 1X2 + over2.5, models/score_matrix solvers) — one consistent
probability spine pre-match and in-play.
2. Each remaining minute contributes lambda_side x minute_share(t) goals,
where minute_share is the EMPIRICAL goal-time intensity curve measured
on 38,779 clean-timeline real-odds matches (1H share 44.4%, late-game
intensity rises, stoppage spikes at 45' and 90+').
3. Each minute's intensity is scaled by the MEASURED score-state
multiplier: trailing teams push (+9%, +17% after 70'), leading teams
shut up shop (-5%/-7%), 2+ ahead opens up. The chain updates the state
as virtual goals happen, so multipliers switch mid-projection exactly
like they do on the pitch.
All constants are fitted on the train window (matches older than the last 90
days); the held-out window validates calibration out-of-sample before any of
this reaches the screen.
"""
from __future__ import annotations
from typing import Dict, List, Optional, Tuple
from models.score_matrix import split_lambdas, total_lambda_from_over25
MAX_MINUTE = 94 # 90 + folded stoppage
LATE_PHASE_FROM = 70 # measured multipliers switch here
MAX_EXTRA_GOALS = 7 # per side, absorbing cap for the chain
# Empirical goal-time intensity: share of a match's goals per 5-min bucket
# (0-5, ..., 90-94+). Measured on 105k goals; 45' and 90+' buckets carry the
# folded stoppage-time spikes.
INTENSITY_SHARES: Tuple[float, ...] = (
0.036, 0.045, 0.047, 0.047, 0.045, 0.046, 0.048, 0.049, 0.081,
0.048, 0.057, 0.055, 0.054, 0.053, 0.052, 0.053, 0.052, 0.056, 0.076,
)
# Score-state goal-intensity multipliers, measured (actual/expected) by the
# scoring side's goal difference, split early (<70') / late (>=70').
_STATE_MULT_EARLY: Dict[int, float] = {-2: 1.095, -1: 1.045, 0: 0.966, 1: 0.952, 2: 1.011}
_STATE_MULT_LATE: Dict[int, float] = {-2: 1.123, -1: 1.174, 0: 1.015, 1: 0.930, 2: 1.011}
def _minute_share(minute: int) -> float:
"""Per-minute share of match-total goal intensity at `minute` (1-based)."""
b = min(len(INTENSITY_SHARES) - 1, max(0, (minute - 1) // 5))
return INTENSITY_SHARES[b] / 5.0
def state_multiplier(diff: int, minute: int) -> float:
"""Intensity multiplier for a side whose current goal difference is
`diff` (own opponent), at `minute`."""
d = max(-2, min(2, diff))
table = _STATE_MULT_LATE if minute >= LATE_PHASE_FROM else _STATE_MULT_EARLY
return table[d]
def estimate_minute(match_date_ms: Optional[int], now_ms: int) -> Optional[int]:
"""Approximate current match minute from kickoff time (no feed minute is
available: live_matches.substate carries none). Folds the ~15' half-time
break; accuracy is ±2-3 minutes which barely moves the projection."""
if not match_date_ms:
return None
elapsed = (now_ms - int(match_date_ms)) / 60000.0
if elapsed < 0:
return None
if elapsed <= 48: # first half (+stoppage)
minute = elapsed
elif elapsed <= 63: # half-time break window
minute = 46
else:
minute = elapsed - 15.0 # second half, break folded out
return int(max(1, min(MAX_MINUTE, minute)))
def _chain(
lam_h: float,
lam_a: float,
cur_h: int,
cur_a: int,
minute: int,
) -> Dict[Tuple[int, int], float]:
"""Distribution over (extra home goals, extra away goals) from `minute`
to full time, with state-dependent intensities."""
dist: Dict[Tuple[int, int], float] = {(0, 0): 1.0}
for t in range(minute, MAX_MINUTE + 1):
share = _minute_share(t)
nxt: Dict[Tuple[int, int], float] = {}
for (eh, ea), p in dist.items():
diff = (cur_h + eh) - (cur_a + ea)
ph = lam_h * share * state_multiplier(diff, t)
pa = lam_a * share * state_multiplier(-diff, t)
ph = min(ph, 0.30); pa = min(pa, 0.30)
stay = max(0.0, 1.0 - ph - pa)
nxt[(eh, ea)] = nxt.get((eh, ea), 0.0) + p * stay
if eh < MAX_EXTRA_GOALS:
nxt[(eh + 1, ea)] = nxt.get((eh + 1, ea), 0.0) + p * ph
else:
nxt[(eh, ea)] = nxt.get((eh, ea), 0.0) + p * ph
if ea < MAX_EXTRA_GOALS:
nxt[(eh, ea + 1)] = nxt.get((eh, ea + 1), 0.0) + p * pa
else:
nxt[(eh, ea)] = nxt.get((eh, ea), 0.0) + p * pa
dist = nxt
return dist
def build_live_projection(
p1: float,
px: float,
p2: float,
p_over25: float,
cur_h: int,
cur_a: int,
minute: int,
) -> Dict[str, object]:
"""Live projection from the anchored pre-match probabilities + the pitch
state. Returns honest, score/minute-aware probabilities.
(p1, px, p2) and p_over25 are the CALIBRATED (V35-anchored) numbers; the
same spine the pre-match cards display.
"""
minute = int(max(1, min(MAX_MINUTE, minute)))
cur_h = max(0, int(cur_h)); cur_a = max(0, int(cur_a))
total = total_lambda_from_over25(p_over25)
lam_h, lam_a = split_lambdas(total, p1, p2)
dist = _chain(lam_h, lam_a, cur_h, cur_a, minute)
p_home_win = p_draw = p_away_win = 0.0
p_home_scores = p_away_scores = 0.0
exp_goals = 0.0
scores: Dict[str, float] = {}
for (eh, ea), p in dist.items():
fh, fa = cur_h + eh, cur_a + ea
if fh > fa: p_home_win += p
elif fh == fa: p_draw += p
else: p_away_win += p
if eh > 0: p_home_scores += p
if ea > 0: p_away_scores += p
exp_goals += p * (eh + ea)
key = f"{min(fh,9)}-{min(fa,9)}"
scores[key] = scores.get(key, 0.0) + p
top = sorted(scores.items(), key=lambda kv: kv[1], reverse=True)[:5]
total_now = cur_h + cur_a
p_over25_live = sum(
p for (eh, ea), p in dist.items() if total_now + eh + ea >= 3
)
# "comeback": the side currently behind at least draws / currently level
# match does NOT stay level
if cur_h > cur_a:
p_comeback = p_draw + p_away_win
elif cur_a > cur_h:
p_comeback = p_draw + p_home_win
else:
p_comeback = p_home_win + p_away_win # deadlock breaks
return {
"minute": minute,
"current_score": f"{cur_h}-{cur_a}",
"probs": {
"1": round(p_home_win, 4),
"X": round(p_draw, 4),
"2": round(p_away_win, 4),
},
"p_home_scores_again": round(p_home_scores, 4),
"p_away_scores_again": round(p_away_scores, 4),
"p_comeback": round(p_comeback, 4),
"p_over25": round(p_over25_live, 4),
"expected_remaining_goals": round(exp_goals, 2),
"scenario_top5": [
{"score": s, "prob": round(p, 4)} for s, p in top
],
"calibration_source": "live_matrix_v38",
}