feat(ai): expand training to 68K+ matches, add score model, backfill implied odds
Deploy Iddaai Backend / build-and-deploy (push) Successful in 6s
Deploy Iddaai Backend / build-and-deploy (push) Successful in 6s
- extract_training_data.py: switch from top_leagues.json (23) to qualified_leagues.json (265) - update_implied_odds.py: new script to backfill implied odds from real market data - train_score_model.py: rewrite with v25 102-feature set + temporal split - single_match_orchestrator.py: integrate ML score model with heuristic fallback
This commit is contained in:
@@ -16,6 +16,7 @@ import re
|
||||
import time
|
||||
import math
|
||||
import os
|
||||
import pickle
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from collections import defaultdict
|
||||
@@ -258,6 +259,51 @@ class SingleMatchOrchestrator:
|
||||
self._v27 = None
|
||||
return None
|
||||
|
||||
def _get_score_model(self) -> Optional[Dict]:
|
||||
"""Load XGBoost score prediction model (non-fatal)."""
|
||||
if hasattr(self, "_score_model_cache"):
|
||||
return self._score_model_cache
|
||||
score_model_path = os.path.join(
|
||||
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
|
||||
"models", "xgb_score.pkl",
|
||||
)
|
||||
try:
|
||||
if os.path.exists(score_model_path):
|
||||
with open(score_model_path, "rb") as f:
|
||||
model_data = pickle.load(f)
|
||||
if all(k in model_data for k in ("home_model", "away_model", "ht_home_model", "ht_away_model", "features")):
|
||||
self._score_model_cache = model_data
|
||||
print(f"[SCORE] ✅ Score model loaded ({len(model_data['features'])} features)")
|
||||
return self._score_model_cache
|
||||
except Exception as e:
|
||||
print(f"[SCORE] ⚠ Load failed (non-fatal, using heuristic): {e}")
|
||||
self._score_model_cache = None
|
||||
return None
|
||||
|
||||
def _predict_score_with_model(self, features: Dict[str, float]) -> Optional[Dict[str, float]]:
|
||||
"""Predict FT/HT scores using XGBoost score model."""
|
||||
score_model = self._get_score_model()
|
||||
if score_model is None:
|
||||
return None
|
||||
try:
|
||||
import pandas as _pd
|
||||
model_features = score_model["features"]
|
||||
row = {f: float(features.get(f, 0)) for f in model_features}
|
||||
df = _pd.DataFrame([row])
|
||||
ft_home = max(0.0, float(score_model["home_model"].predict(df)[0]))
|
||||
ft_away = max(0.0, float(score_model["away_model"].predict(df)[0]))
|
||||
ht_home = max(0.0, float(score_model["ht_home_model"].predict(df)[0]))
|
||||
ht_away = max(0.0, float(score_model["ht_away_model"].predict(df)[0]))
|
||||
return {
|
||||
"ft_home": round(ft_home, 2),
|
||||
"ft_away": round(ft_away, 2),
|
||||
"ht_home": round(ht_home, 2),
|
||||
"ht_away": round(ht_away, 2),
|
||||
}
|
||||
except Exception as e:
|
||||
print(f"[SCORE] ⚠ Prediction error (fallback to heuristic): {e}")
|
||||
return None
|
||||
|
||||
def _build_v25_features(self, data: MatchData) -> Dict[str, float]:
|
||||
"""
|
||||
Build the single authoritative V25 pre-match feature vector.
|
||||
@@ -869,27 +915,39 @@ class SingleMatchOrchestrator:
|
||||
prediction.handicap_pick, hcap_top = self._best_prob_pick(hcap_probs)
|
||||
prediction.handicap_confidence = hcap_top * 100.0
|
||||
|
||||
base_home_xg = max(0.25, (float(data.home_goals_avg) + float(features.get("away_xga", data.away_conceded_avg))) / 2.0)
|
||||
base_away_xg = max(0.25, (float(data.away_goals_avg) + float(features.get("home_xga", data.home_conceded_avg))) / 2.0)
|
||||
ms_edge = prediction.ms_home_prob - prediction.ms_away_prob
|
||||
total_target = max(
|
||||
1.4,
|
||||
min(
|
||||
4.8,
|
||||
(float(features.get("league_avg_goals", 2.7)) * 0.55)
|
||||
+ ((float(data.home_goals_avg) + float(data.away_goals_avg)) * 0.45)
|
||||
+ ((prediction.over_25_prob - prediction.under_25_prob) * 1.15),
|
||||
),
|
||||
)
|
||||
home_xg = max(0.2, base_home_xg + (ms_edge * 0.55) + ((prediction.btts_yes_prob - 0.5) * 0.18))
|
||||
away_xg = max(0.2, base_away_xg - (ms_edge * 0.55) + ((prediction.btts_yes_prob - 0.5) * 0.18))
|
||||
scale = total_target / max(home_xg + away_xg, 0.1)
|
||||
prediction.home_xg = round(home_xg * scale, 2)
|
||||
prediction.away_xg = round(away_xg * scale, 2)
|
||||
prediction.total_xg = round(prediction.home_xg + prediction.away_xg, 2)
|
||||
|
||||
prediction.predicted_ft_score = f"{int(round(prediction.home_xg))}-{int(round(prediction.away_xg))}"
|
||||
prediction.predicted_ht_score = f"{int(round(prediction.home_xg * 0.45))}-{int(round(prediction.away_xg * 0.45))}"
|
||||
# ── Score Prediction: Model-first, heuristic fallback ──────────
|
||||
score_result = self._predict_score_with_model(features)
|
||||
if score_result is not None:
|
||||
# ML model predicted scores
|
||||
prediction.home_xg = score_result["ft_home"]
|
||||
prediction.away_xg = score_result["ft_away"]
|
||||
prediction.total_xg = round(prediction.home_xg + prediction.away_xg, 2)
|
||||
ht_home_xg = score_result["ht_home"]
|
||||
ht_away_xg = score_result["ht_away"]
|
||||
prediction.predicted_ft_score = f"{int(round(prediction.home_xg))}-{int(round(prediction.away_xg))}"
|
||||
prediction.predicted_ht_score = f"{int(round(ht_home_xg))}-{int(round(ht_away_xg))}"
|
||||
else:
|
||||
# Heuristic fallback (original formula)
|
||||
base_home_xg = max(0.25, (float(data.home_goals_avg) + float(features.get("away_xga", data.away_conceded_avg))) / 2.0)
|
||||
base_away_xg = max(0.25, (float(data.away_goals_avg) + float(features.get("home_xga", data.home_conceded_avg))) / 2.0)
|
||||
ms_edge = prediction.ms_home_prob - prediction.ms_away_prob
|
||||
total_target = max(
|
||||
1.4,
|
||||
min(
|
||||
4.8,
|
||||
(float(features.get("league_avg_goals", 2.7)) * 0.55)
|
||||
+ ((float(data.home_goals_avg) + float(data.away_goals_avg)) * 0.45)
|
||||
+ ((prediction.over_25_prob - prediction.under_25_prob) * 1.15),
|
||||
),
|
||||
)
|
||||
home_xg = max(0.2, base_home_xg + (ms_edge * 0.55) + ((prediction.btts_yes_prob - 0.5) * 0.18))
|
||||
away_xg = max(0.2, base_away_xg - (ms_edge * 0.55) + ((prediction.btts_yes_prob - 0.5) * 0.18))
|
||||
scale = total_target / max(home_xg + away_xg, 0.1)
|
||||
prediction.home_xg = round(home_xg * scale, 2)
|
||||
prediction.away_xg = round(away_xg * scale, 2)
|
||||
prediction.total_xg = round(prediction.home_xg + prediction.away_xg, 2)
|
||||
prediction.predicted_ft_score = f"{int(round(prediction.home_xg))}-{int(round(prediction.away_xg))}"
|
||||
prediction.predicted_ht_score = f"{int(round(prediction.home_xg * 0.45))}-{int(round(prediction.away_xg * 0.45))}"
|
||||
prediction.ft_scores_top5 = self._poisson_score_top5(prediction.home_xg, prediction.away_xg)
|
||||
|
||||
max_market_conf = max(
|
||||
|
||||
Reference in New Issue
Block a user