iddaai-be/ai-engine/models/live_matrix.py

"""Live-conditioned score projection (V38) — pure functions, no I/O.

Answers, DURING a match, questions like "1-0 at 80' — what is the REAL
probability the away team still scores?" by conditioning the same calibrated
market-anchored lambdas (V35/V36) on the current score and minute.

Mechanics — a minute-stepped Markov chain over remaining goals:

  1. Pre-match lambdas come from the SAME source the score card uses
     (de-vigged 1X2 + over2.5, models/score_matrix solvers) — one consistent
     probability spine pre-match and in-play.
  2. Each remaining minute contributes lambda_side x minute_share(t) goals,
     where minute_share is the EMPIRICAL goal-time intensity curve measured
     on 38,779 clean-timeline real-odds matches (1H share 44.4%, late-game
     intensity rises, stoppage spikes at 45' and 90+').
  3. Each minute's intensity is scaled by the MEASURED score-state
     multiplier: trailing teams push (+9%, +17% after 70'), leading teams
     shut up shop (-5%/-7%), 2+ ahead opens up. The chain updates the state
     as virtual goals happen, so multipliers switch mid-projection exactly
     like they do on the pitch.

All constants are fitted on the train window (matches older than the last 90
days); the held-out window validates calibration out-of-sample before any of
this reaches the screen.
"""

from __future__ import annotations

from typing import Dict, List, Optional, Tuple

from models.score_matrix import split_lambdas, total_lambda_from_over25

MAX_MINUTE = 94          # 90 + folded stoppage
LATE_PHASE_FROM = 70     # measured multipliers switch here
MAX_EXTRA_GOALS = 7      # per side, absorbing cap for the chain

# Empirical goal-time intensity: share of a match's goals per 5-min bucket
# (0-5, ..., 90-94+). Measured on 105k goals; 45' and 90+' buckets carry the
# folded stoppage-time spikes.
INTENSITY_SHARES: Tuple[float, ...] = (
    0.036, 0.045, 0.047, 0.047, 0.045, 0.046, 0.048, 0.049, 0.081,
    0.048, 0.057, 0.055, 0.054, 0.053, 0.052, 0.053, 0.052, 0.056, 0.076,
)

# Score-state goal-intensity multipliers, measured (actual/expected) by the
# scoring side's goal difference, split early (<70') / late (>=70').
_STATE_MULT_EARLY: Dict[int, float] = {-2: 1.095, -1: 1.045, 0: 0.966, 1: 0.952, 2: 1.011}
_STATE_MULT_LATE: Dict[int, float] = {-2: 1.123, -1: 1.174, 0: 1.015, 1: 0.930, 2: 1.011}


def _minute_share(minute: int) -> float:
    """Per-minute share of match-total goal intensity at `minute` (1-based)."""
    b = min(len(INTENSITY_SHARES) - 1, max(0, (minute - 1) // 5))
    return INTENSITY_SHARES[b] / 5.0


def state_multiplier(diff: int, minute: int) -> float:
    """Intensity multiplier for a side whose current goal difference is
    `diff` (own − opponent), at `minute`."""
    d = max(-2, min(2, diff))
    table = _STATE_MULT_LATE if minute >= LATE_PHASE_FROM else _STATE_MULT_EARLY
    return table[d]


def estimate_minute(match_date_ms: Optional[int], now_ms: int) -> Optional[int]:
    """Approximate current match minute from kickoff time (no feed minute is
    available: live_matches.substate carries none). Folds the ~15' half-time
    break; accuracy is ±2-3 minutes which barely moves the projection."""
    if not match_date_ms:
        return None
    elapsed = (now_ms - int(match_date_ms)) / 60000.0
    if elapsed < 0:
        return None
    if elapsed <= 48:                      # first half (+stoppage)
        minute = elapsed
    elif elapsed <= 63:                    # half-time break window
        minute = 46
    else:
        minute = elapsed - 15.0            # second half, break folded out
    return int(max(1, min(MAX_MINUTE, minute)))


def _chain(
    lam_h: float,
    lam_a: float,
    cur_h: int,
    cur_a: int,
    minute: int,
) -> Dict[Tuple[int, int], float]:
    """Distribution over (extra home goals, extra away goals) from `minute`
    to full time, with state-dependent intensities."""
    dist: Dict[Tuple[int, int], float] = {(0, 0): 1.0}
    for t in range(minute, MAX_MINUTE + 1):
        share = _minute_share(t)
        nxt: Dict[Tuple[int, int], float] = {}
        for (eh, ea), p in dist.items():
            diff = (cur_h + eh) - (cur_a + ea)
            ph = lam_h * share * state_multiplier(diff, t)
            pa = lam_a * share * state_multiplier(-diff, t)
            ph = min(ph, 0.30); pa = min(pa, 0.30)
            stay = max(0.0, 1.0 - ph - pa)
            nxt[(eh, ea)] = nxt.get((eh, ea), 0.0) + p * stay
            if eh < MAX_EXTRA_GOALS:
                nxt[(eh + 1, ea)] = nxt.get((eh + 1, ea), 0.0) + p * ph
            else:
                nxt[(eh, ea)] = nxt.get((eh, ea), 0.0) + p * ph
            if ea < MAX_EXTRA_GOALS:
                nxt[(eh, ea + 1)] = nxt.get((eh, ea + 1), 0.0) + p * pa
            else:
                nxt[(eh, ea)] = nxt.get((eh, ea), 0.0) + p * pa
        dist = nxt
    return dist


def build_live_projection(
    p1: float,
    px: float,
    p2: float,
    p_over25: float,
    cur_h: int,
    cur_a: int,
    minute: int,
) -> Dict[str, object]:
    """Live projection from the anchored pre-match probabilities + the pitch
    state. Returns honest, score/minute-aware probabilities.

    (p1, px, p2) and p_over25 are the CALIBRATED (V35-anchored) numbers; the
    same spine the pre-match cards display.
    """
    minute = int(max(1, min(MAX_MINUTE, minute)))
    cur_h = max(0, int(cur_h)); cur_a = max(0, int(cur_a))
    total = total_lambda_from_over25(p_over25)
    lam_h, lam_a = split_lambdas(total, p1, p2)

    dist = _chain(lam_h, lam_a, cur_h, cur_a, minute)

    p_home_win = p_draw = p_away_win = 0.0
    p_home_scores = p_away_scores = 0.0
    exp_goals = 0.0
    scores: Dict[str, float] = {}
    for (eh, ea), p in dist.items():
        fh, fa = cur_h + eh, cur_a + ea
        if fh > fa: p_home_win += p
        elif fh == fa: p_draw += p
        else: p_away_win += p
        if eh > 0: p_home_scores += p
        if ea > 0: p_away_scores += p
        exp_goals += p * (eh + ea)
        key = f"{min(fh,9)}-{min(fa,9)}"
        scores[key] = scores.get(key, 0.0) + p

    top = sorted(scores.items(), key=lambda kv: kv[1], reverse=True)[:5]
    total_now = cur_h + cur_a
    p_over25_live = sum(
        p for (eh, ea), p in dist.items() if total_now + eh + ea >= 3
    )

    # "comeback": the side currently behind at least draws / currently level
    # match does NOT stay level
    if cur_h > cur_a:
        p_comeback = p_draw + p_away_win
    elif cur_a > cur_h:
        p_comeback = p_draw + p_home_win
    else:
        p_comeback = p_home_win + p_away_win  # deadlock breaks

    return {
        "minute": minute,
        "current_score": f"{cur_h}-{cur_a}",
        "probs": {
            "1": round(p_home_win, 4),
            "X": round(p_draw, 4),
            "2": round(p_away_win, 4),
        },
        "p_home_scores_again": round(p_home_scores, 4),
        "p_away_scores_again": round(p_away_scores, 4),
        "p_comeback": round(p_comeback, 4),
        "p_over25": round(p_over25_live, 4),
        "expected_remaining_goals": round(exp_goals, 2),
        "scenario_top5": [
            {"score": s, "prob": round(p, 4)} for s, p in top
        ],
        "calibration_source": "live_matrix_v38",
    }