This commit is contained in:
@@ -0,0 +1,166 @@
|
||||
"""Market-anchored score matrix (V36) — pure functions, no I/O.
|
||||
|
||||
WHY THIS EXISTS
|
||||
---------------
|
||||
The engine's displayed score predictions (`score_prediction`, `scenario_top5`)
|
||||
come from the model's invented xG, so they can contradict the calibrated
|
||||
market-anchored probabilities shown right next to them (V35). Example seen in
|
||||
production: MS card says home 78% while the score card's distribution implies
|
||||
something else entirely.
|
||||
|
||||
This module derives the FULL scoreline distribution from the SAME calibrated
|
||||
(de-vigged) market probabilities that the V35 market anchor displays:
|
||||
|
||||
1. Solve total-goals lambda T from the calibrated P(over 2.5)
|
||||
(total goals ~ Poisson(T): P(N>=3) = 1 - e^-T (1 + T + T^2/2)).
|
||||
2. Split T into (lambda_home, lambda_away) so the independent-Poisson
|
||||
matrix's home/away win gap matches the calibrated 1X2.
|
||||
3. Build the score matrix, then IPF-scale the three outcome regions
|
||||
(home-win cells, draw cells, away-win cells) so they sum EXACTLY to the
|
||||
calibrated (p1, px, pX2) — guaranteeing the score card and the MS card
|
||||
can never disagree again.
|
||||
4. Half-time matrix: same machinery with lambdas scaled by the measured
|
||||
first-half goal share, optionally IPF'd to the anchored HT 1X2.
|
||||
|
||||
All stdlib (math only) → unit-testable in isolation, no model/DB deps.
|
||||
|
||||
Validated on 63,681 real-odds matches (2025-26, out-of-sample constants):
|
||||
see tests + the calibration session notes. Honest ceiling reminder: even a
|
||||
perfect correct-score predictor only hits the modal score ~12-15% of the time;
|
||||
the value here is honest, consistent probabilities — not certainty.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
# Measured on 63,681 real-odds matches (2025-26): share of full-time goals
|
||||
# scored in the first half, per side (home 0.4440, away 0.4428).
|
||||
HT_GOAL_SHARE_HOME = 0.44
|
||||
HT_GOAL_SHARE_AWAY = 0.44
|
||||
|
||||
MAX_GOALS = 10 # matrix is (0..10)x(0..10); tail mass beyond is negligible
|
||||
|
||||
|
||||
def _pois_pmf(lam: float, k: int) -> float:
|
||||
return math.exp(-lam) * lam**k / math.factorial(k)
|
||||
|
||||
|
||||
def total_lambda_from_over25(p_over25: float) -> float:
|
||||
"""Solve T such that P(Poisson(T) >= 3) == p_over25, by bisection."""
|
||||
p = min(max(p_over25, 0.01), 0.99)
|
||||
|
||||
def p_over(t: float) -> float:
|
||||
return 1.0 - math.exp(-t) * (1.0 + t + t * t / 2.0)
|
||||
|
||||
lo, hi = 0.05, 8.0
|
||||
for _ in range(60):
|
||||
mid = (lo + hi) / 2.0
|
||||
if p_over(mid) < p:
|
||||
lo = mid
|
||||
else:
|
||||
hi = mid
|
||||
return (lo + hi) / 2.0
|
||||
|
||||
|
||||
def _raw_matrix(lh: float, la: float) -> List[List[float]]:
|
||||
ph = [_pois_pmf(lh, i) for i in range(MAX_GOALS + 1)]
|
||||
pa = [_pois_pmf(la, j) for j in range(MAX_GOALS + 1)]
|
||||
return [[ph[i] * pa[j] for j in range(MAX_GOALS + 1)] for i in range(MAX_GOALS + 1)]
|
||||
|
||||
|
||||
def _outcome_sums(mat: List[List[float]]) -> Tuple[float, float, float]:
|
||||
w = d = l = 0.0
|
||||
for i in range(MAX_GOALS + 1):
|
||||
for j in range(MAX_GOALS + 1):
|
||||
if i > j:
|
||||
w += mat[i][j]
|
||||
elif i == j:
|
||||
d += mat[i][j]
|
||||
else:
|
||||
l += mat[i][j]
|
||||
return w, d, l
|
||||
|
||||
|
||||
def split_lambdas(total: float, p1: float, p2: float) -> Tuple[float, float]:
|
||||
"""Split total lambda into (home, away) so the matrix's win-prob gap
|
||||
matches the calibrated 1X2 gap, by bisection on the home share."""
|
||||
target_gap = p1 - p2
|
||||
lo, hi = 0.10, 0.90
|
||||
for _ in range(40):
|
||||
s = (lo + hi) / 2.0
|
||||
w, _, l = _outcome_sums(_raw_matrix(total * s, total * (1.0 - s)))
|
||||
if (w - l) < target_gap:
|
||||
lo = s
|
||||
else:
|
||||
hi = s
|
||||
s = (lo + hi) / 2.0
|
||||
return total * s, total * (1.0 - s)
|
||||
|
||||
|
||||
def ipf_to_outcomes(
|
||||
mat: List[List[float]], p1: float, px: float, p2: float
|
||||
) -> List[List[float]]:
|
||||
"""Scale the home-win / draw / away-win regions so each sums EXACTLY to the
|
||||
calibrated (p1, px, p2). This is what makes the score card mathematically
|
||||
consistent with the displayed MS probabilities."""
|
||||
w, d, l = _outcome_sums(mat)
|
||||
if min(w, d, l) <= 0.0:
|
||||
return mat
|
||||
fw, fd, fl = p1 / w, px / d, p2 / l
|
||||
out = [[0.0] * (MAX_GOALS + 1) for _ in range(MAX_GOALS + 1)]
|
||||
for i in range(MAX_GOALS + 1):
|
||||
for j in range(MAX_GOALS + 1):
|
||||
f = fw if i > j else fd if i == j else fl
|
||||
out[i][j] = mat[i][j] * f
|
||||
return out
|
||||
|
||||
|
||||
def top_scores(mat: List[List[float]], n: int = 5) -> List[Dict[str, object]]:
|
||||
cells = [
|
||||
(mat[i][j], i, j)
|
||||
for i in range(MAX_GOALS + 1)
|
||||
for j in range(MAX_GOALS + 1)
|
||||
]
|
||||
cells.sort(reverse=True)
|
||||
return [
|
||||
{"score": f"{i}-{j}", "prob": round(p, 4)}
|
||||
for p, i, j in cells[:n]
|
||||
]
|
||||
|
||||
|
||||
def build_calibrated_score_package(
|
||||
p1: float,
|
||||
px: float,
|
||||
p2: float,
|
||||
p_over25: float,
|
||||
ht_probs: Optional[Tuple[float, float, float]] = None,
|
||||
) -> Dict[str, object]:
|
||||
"""Full calibrated score card from the V35-anchored probabilities.
|
||||
|
||||
Returns {ft, ht, xg_home, xg_away, xg_total, scenario_top5, ht_top}.
|
||||
xg_* here are MARKET-implied goal expectations (the lambdas), so every
|
||||
number on the card comes from one consistent source.
|
||||
"""
|
||||
total = total_lambda_from_over25(p_over25)
|
||||
lh, la = split_lambdas(total, p1, p2)
|
||||
ft_mat = ipf_to_outcomes(_raw_matrix(lh, la), p1, px, p2)
|
||||
ft_top = top_scores(ft_mat, 5)
|
||||
|
||||
lh_ht, la_ht = lh * HT_GOAL_SHARE_HOME, la * HT_GOAL_SHARE_AWAY
|
||||
ht_mat = _raw_matrix(lh_ht, la_ht)
|
||||
if ht_probs is not None:
|
||||
ht_mat = ipf_to_outcomes(ht_mat, *ht_probs)
|
||||
ht_top = top_scores(ht_mat, 3)
|
||||
|
||||
return {
|
||||
"ft": str(ft_top[0]["score"]) if ft_top else None,
|
||||
"ht": str(ht_top[0]["score"]) if ht_top else None,
|
||||
"xg_home": round(lh, 2),
|
||||
"xg_away": round(la, 2),
|
||||
"xg_total": round(lh + la, 2),
|
||||
"scenario_top5": ft_top,
|
||||
"ht_top": ht_top,
|
||||
"calibration_source": "market_anchor_v36_score",
|
||||
}
|
||||
Reference in New Issue
Block a user