feat(ai): expand training to 68K+ matches, add score model, backfill implied odds
Deploy Iddaai Backend / build-and-deploy (push) Successful in 6s

- extract_training_data.py: switch from top_leagues.json (23) to qualified_leagues.json (265)
- update_implied_odds.py: new script to backfill implied odds from real market data
- train_score_model.py: rewrite with v25 102-feature set + temporal split
- single_match_orchestrator.py: integrate ML score model with heuristic fallback
This commit is contained in:
2026-05-05 16:04:00 +03:00
parent 9bb8f39bca
commit 244d8f5366
4 changed files with 626 additions and 173 deletions
+79 -21
View File
@@ -16,6 +16,7 @@ import re
import time
import math
import os
import pickle
import pandas as pd
import numpy as np
from collections import defaultdict
@@ -258,6 +259,51 @@ class SingleMatchOrchestrator:
self._v27 = None
return None
def _get_score_model(self) -> Optional[Dict]:
"""Load XGBoost score prediction model (non-fatal)."""
if hasattr(self, "_score_model_cache"):
return self._score_model_cache
score_model_path = os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
"models", "xgb_score.pkl",
)
try:
if os.path.exists(score_model_path):
with open(score_model_path, "rb") as f:
model_data = pickle.load(f)
if all(k in model_data for k in ("home_model", "away_model", "ht_home_model", "ht_away_model", "features")):
self._score_model_cache = model_data
print(f"[SCORE] ✅ Score model loaded ({len(model_data['features'])} features)")
return self._score_model_cache
except Exception as e:
print(f"[SCORE] ⚠ Load failed (non-fatal, using heuristic): {e}")
self._score_model_cache = None
return None
def _predict_score_with_model(self, features: Dict[str, float]) -> Optional[Dict[str, float]]:
"""Predict FT/HT scores using XGBoost score model."""
score_model = self._get_score_model()
if score_model is None:
return None
try:
import pandas as _pd
model_features = score_model["features"]
row = {f: float(features.get(f, 0)) for f in model_features}
df = _pd.DataFrame([row])
ft_home = max(0.0, float(score_model["home_model"].predict(df)[0]))
ft_away = max(0.0, float(score_model["away_model"].predict(df)[0]))
ht_home = max(0.0, float(score_model["ht_home_model"].predict(df)[0]))
ht_away = max(0.0, float(score_model["ht_away_model"].predict(df)[0]))
return {
"ft_home": round(ft_home, 2),
"ft_away": round(ft_away, 2),
"ht_home": round(ht_home, 2),
"ht_away": round(ht_away, 2),
}
except Exception as e:
print(f"[SCORE] ⚠ Prediction error (fallback to heuristic): {e}")
return None
def _build_v25_features(self, data: MatchData) -> Dict[str, float]:
"""
Build the single authoritative V25 pre-match feature vector.
@@ -869,27 +915,39 @@ class SingleMatchOrchestrator:
prediction.handicap_pick, hcap_top = self._best_prob_pick(hcap_probs)
prediction.handicap_confidence = hcap_top * 100.0
base_home_xg = max(0.25, (float(data.home_goals_avg) + float(features.get("away_xga", data.away_conceded_avg))) / 2.0)
base_away_xg = max(0.25, (float(data.away_goals_avg) + float(features.get("home_xga", data.home_conceded_avg))) / 2.0)
ms_edge = prediction.ms_home_prob - prediction.ms_away_prob
total_target = max(
1.4,
min(
4.8,
(float(features.get("league_avg_goals", 2.7)) * 0.55)
+ ((float(data.home_goals_avg) + float(data.away_goals_avg)) * 0.45)
+ ((prediction.over_25_prob - prediction.under_25_prob) * 1.15),
),
)
home_xg = max(0.2, base_home_xg + (ms_edge * 0.55) + ((prediction.btts_yes_prob - 0.5) * 0.18))
away_xg = max(0.2, base_away_xg - (ms_edge * 0.55) + ((prediction.btts_yes_prob - 0.5) * 0.18))
scale = total_target / max(home_xg + away_xg, 0.1)
prediction.home_xg = round(home_xg * scale, 2)
prediction.away_xg = round(away_xg * scale, 2)
prediction.total_xg = round(prediction.home_xg + prediction.away_xg, 2)
prediction.predicted_ft_score = f"{int(round(prediction.home_xg))}-{int(round(prediction.away_xg))}"
prediction.predicted_ht_score = f"{int(round(prediction.home_xg * 0.45))}-{int(round(prediction.away_xg * 0.45))}"
# ── Score Prediction: Model-first, heuristic fallback ──────────
score_result = self._predict_score_with_model(features)
if score_result is not None:
# ML model predicted scores
prediction.home_xg = score_result["ft_home"]
prediction.away_xg = score_result["ft_away"]
prediction.total_xg = round(prediction.home_xg + prediction.away_xg, 2)
ht_home_xg = score_result["ht_home"]
ht_away_xg = score_result["ht_away"]
prediction.predicted_ft_score = f"{int(round(prediction.home_xg))}-{int(round(prediction.away_xg))}"
prediction.predicted_ht_score = f"{int(round(ht_home_xg))}-{int(round(ht_away_xg))}"
else:
# Heuristic fallback (original formula)
base_home_xg = max(0.25, (float(data.home_goals_avg) + float(features.get("away_xga", data.away_conceded_avg))) / 2.0)
base_away_xg = max(0.25, (float(data.away_goals_avg) + float(features.get("home_xga", data.home_conceded_avg))) / 2.0)
ms_edge = prediction.ms_home_prob - prediction.ms_away_prob
total_target = max(
1.4,
min(
4.8,
(float(features.get("league_avg_goals", 2.7)) * 0.55)
+ ((float(data.home_goals_avg) + float(data.away_goals_avg)) * 0.45)
+ ((prediction.over_25_prob - prediction.under_25_prob) * 1.15),
),
)
home_xg = max(0.2, base_home_xg + (ms_edge * 0.55) + ((prediction.btts_yes_prob - 0.5) * 0.18))
away_xg = max(0.2, base_away_xg - (ms_edge * 0.55) + ((prediction.btts_yes_prob - 0.5) * 0.18))
scale = total_target / max(home_xg + away_xg, 0.1)
prediction.home_xg = round(home_xg * scale, 2)
prediction.away_xg = round(away_xg * scale, 2)
prediction.total_xg = round(prediction.home_xg + prediction.away_xg, 2)
prediction.predicted_ft_score = f"{int(round(prediction.home_xg))}-{int(round(prediction.away_xg))}"
prediction.predicted_ht_score = f"{int(round(prediction.home_xg * 0.45))}-{int(round(prediction.away_xg * 0.45))}"
prediction.ft_scores_top5 = self._poisson_score_top5(prediction.home_xg, prediction.away_xg)
max_market_conf = max(