Files
fahricansecer 920ae7ce38
Deploy Iddaai Backend / build-and-deploy (push) Successful in 59s
gg
2026-05-24 02:58:53 +03:00

357 lines
14 KiB
Python

"""
Basketball V25 Predictor Package
=================================
Loads trained XGBoost + LightGBM models for basketball market predictions:
- ML (Money Line — home / away win)
- Total (Over/Under total points)
- Spread (ATS home cover / away cover)
Model files live in this directory:
xgb_basketball_v25_{market}.json — XGBoost (primary)
lgb_basketball_v25_{market}.txt — LightGBM (ensemble)
feature_cols.json — ordered feature list
"""
from __future__ import annotations
import json
import os
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional
# ── Constants ─────────────────────────────────────────────────────────────────
_DIR = os.path.dirname(os.path.abspath(__file__))
_MARKETS = ("ml", "total", "spread")
# ── Output dataclass ──────────────────────────────────────────────────────────
@dataclass
class BasketballMatchPrediction:
"""Complete basketball match prediction output."""
match_id: str
home_team_name: str
away_team_name: str
league_name: str = ""
# Money Line
ml_home_prob: float = 0.50
ml_away_prob: float = 0.50
ml_pick: str = ""
ml_confidence: float = 0.0
# Total (Over/Under)
total_line: float = 0.0
total_over_prob: float = 0.50
total_under_prob: float = 0.50
total_pick: str = ""
total_confidence: float = 0.0
# Spread (ATS)
spread_home_line: float = 0.0
spread_home_prob: float = 0.50
spread_away_prob: float = 0.50
spread_pick: str = ""
spread_confidence: float = 0.0
# Meta
model_version: str = "basketball_v25"
risk_level: str = "MEDIUM"
analysis_details: Dict[str, Any] = field(default_factory=dict)
market_board: Dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> Dict[str, Any]:
return {
"match_id": self.match_id,
"home_team": self.home_team_name,
"away_team": self.away_team_name,
"league": self.league_name,
"model": self.model_version,
"risk_level": self.risk_level,
"ml": {
"home_prob": round(self.ml_home_prob * 100, 1),
"away_prob": round(self.ml_away_prob * 100, 1),
"pick": self.ml_pick,
"confidence": round(self.ml_confidence, 1),
},
"total": {
"line": self.total_line,
"over_prob": round(self.total_over_prob * 100, 1),
"under_prob": round(self.total_under_prob * 100, 1),
"pick": self.total_pick,
"confidence": round(self.total_confidence, 1),
},
"spread": {
"home_line": self.spread_home_line,
"home_prob": round(self.spread_home_prob * 100, 1),
"away_prob": round(self.spread_away_prob * 100, 1),
"pick": self.spread_pick,
"confidence": round(self.spread_confidence, 1),
},
"market_board": self.market_board,
"analysis_details": self.analysis_details,
}
# ── Predictor ─────────────────────────────────────────────────────────────────
class BasketballV25Predictor:
"""
Ensemble basketball predictor using XGBoost + LightGBM models.
Markets:
- ml → home/away win probability
- total → over/under total points
- spread → home/away ATS cover
"""
def __init__(self) -> None:
self.feature_cols: List[str] = self._load_feature_cols()
self.models: Dict[str, Any] = {}
self._load_models()
print(f"✅ BasketballV25Predictor ready ({len(self.models)} models loaded)")
# ── Setup ──────────────────────────────────────────────────────────────
def _load_feature_cols(self) -> List[str]:
path = os.path.join(_DIR, "feature_cols.json")
try:
with open(path, "r") as f:
return json.load(f)
except Exception as e:
print(f"⚠️ [Basketball] Could not load feature_cols.json: {e}")
return []
def _load_models(self) -> None:
for market in _MARKETS:
xgb_path = os.path.join(_DIR, f"xgb_basketball_v25_{market}.json")
lgb_path = os.path.join(_DIR, f"lgb_basketball_v25_{market}.txt")
xgb_model = self._try_load_xgb(xgb_path, market)
lgb_model = self._try_load_lgb(lgb_path, market)
if xgb_model is not None or lgb_model is not None:
self.models[market] = {"xgb": xgb_model, "lgb": lgb_model}
def _try_load_xgb(self, path: str, market: str) -> Optional[Any]:
if not os.path.exists(path):
return None
try:
import xgboost as xgb # type: ignore[import-not-found]
m = xgb.XGBClassifier()
m.load_model(path)
return m
except Exception as e:
print(f"⚠️ [Basketball] XGB {market} load failed: {e}")
return None
def _try_load_lgb(self, path: str, market: str) -> Optional[Any]:
if not os.path.exists(path):
return None
try:
import lightgbm as lgb # type: ignore[import-not-found]
with open(path, "r", encoding="utf-8") as f:
model_str = f.read()
return lgb.Booster(model_str=model_str)
except Exception as e:
print(f"⚠️ [Basketball] LGB {market} load failed: {e}")
return None
# ── Inference ──────────────────────────────────────────────────────────
def _build_feature_row(self, odds_data: Dict[str, Any], **kwargs: Any) -> "Any":
"""Build a single-row DataFrame aligned to training feature columns."""
import pandas as pd # type: ignore[import-not-found]
row: Dict[str, float] = {}
for col in self.feature_cols:
row[col] = float(kwargs.get(col) or odds_data.get(col) or 0.0)
# Map common odds keys
row["ml_home_odds"] = float(odds_data.get("ml_h") or 0.0)
row["ml_away_odds"] = float(odds_data.get("ml_a") or 0.0)
row["total_line"] = float(odds_data.get("tot_line") or 0.0)
row["total_over_odds"] = float(odds_data.get("tot_o") or 0.0)
row["total_under_odds"] = float(odds_data.get("tot_u") or 0.0)
row["spread_home_line"] = float(odds_data.get("spread_home_line") or 0.0)
row["spread_home_odds"] = float(odds_data.get("spread_h") or 0.0)
row["spread_away_odds"] = float(odds_data.get("spread_a") or 0.0)
# Implied probabilities
def _imp(odd: float) -> float:
return (1.0 / odd) if odd > 1.01 else 0.5
ml_h = row["ml_home_odds"]
ml_a = row["ml_away_odds"]
if ml_h > 1.01 and ml_a > 1.01:
raw = _imp(ml_h) + _imp(ml_a)
row["implied_home"] = _imp(ml_h) / raw
row["implied_away"] = _imp(ml_a) / raw
row["odds_overround"] = raw - 1.0
tot_o = row["total_over_odds"]
tot_u = row["total_under_odds"]
if tot_o > 1.01 and tot_u > 1.01:
raw = _imp(tot_o) + _imp(tot_u)
row["implied_total_over"] = _imp(tot_o) / raw
row["implied_total_under"] = _imp(tot_u) / raw
sp_h = row["spread_home_odds"]
sp_a = row["spread_away_odds"]
if sp_h > 1.01 and sp_a > 1.01:
raw = _imp(sp_h) + _imp(sp_a)
row["implied_spread_home"] = _imp(sp_h) / raw
row["implied_spread_away"] = _imp(sp_a) / raw
return pd.DataFrame([row])
def _ensemble_predict(self, market: str, df: "Any") -> List[float]:
"""Return [p_class0, p_class1] from XGB+LGB ensemble."""
models = self.models.get(market, {})
xgb_model = models.get("xgb")
lgb_model = models.get("lgb")
probs_list: List[List[float]] = []
if xgb_model is not None:
try:
p = xgb_model.predict_proba(df)
probs_list.append([float(p[0][0]), float(p[0][1])])
except Exception as e:
print(f"⚠️ [Basketball] XGB {market} inference failed: {e}")
if lgb_model is not None:
try:
p_raw = lgb_model.predict(df)
p1 = float(p_raw[0]) if len(p_raw.shape) == 1 else float(p_raw[0][1])
probs_list.append([1.0 - p1, p1])
except Exception as e:
print(f"⚠️ [Basketball] LGB {market} inference failed: {e}")
if not probs_list:
return [0.5, 0.5]
p0 = sum(p[0] for p in probs_list) / len(probs_list)
p1 = sum(p[1] for p in probs_list) / len(probs_list)
total = p0 + p1 or 1.0
return [p0 / total, p1 / total]
# ── Public API ─────────────────────────────────────────────────────────
def predict(
self,
match_id: str,
home_team_id: str,
away_team_id: str,
home_team_name: str = "",
away_team_name: str = "",
league_id: str = "",
league_name: str = "",
odds_data: Optional[Dict[str, Any]] = None,
sidelined_data: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> BasketballMatchPrediction:
odds = odds_data or {}
prediction = BasketballMatchPrediction(
match_id=match_id,
home_team_name=home_team_name,
away_team_name=away_team_name,
league_name=league_name,
)
# Sidelined impact
home_sl = int((sidelined_data or {}).get("homeTeam", {}).get("totalSidelined", 0) or 0)
away_sl = int((sidelined_data or {}).get("awayTeam", {}).get("totalSidelined", 0) or 0)
kwargs.setdefault("home_sidelined_count", float(home_sl))
kwargs.setdefault("away_sidelined_count", float(away_sl))
kwargs.setdefault("sidelined_diff", float(home_sl - away_sl))
kwargs.setdefault("missing_players_impact", float(home_sl + away_sl) / 10.0)
if not self.models:
print("⚠️ [Basketball] No models loaded — returning neutral defaults")
return prediction
try:
df = self._build_feature_row(odds, **kwargs)
# ── ML ──
ml_probs = self._ensemble_predict("ml", df)
prediction.ml_home_prob = ml_probs[0]
prediction.ml_away_prob = ml_probs[1]
prediction.ml_pick = "1" if ml_probs[0] >= ml_probs[1] else "2"
prediction.ml_confidence = max(ml_probs) * 100.0
# ── Total ──
prediction.total_line = float(odds.get("tot_line") or 0.0)
tot_probs = self._ensemble_predict("total", df)
prediction.total_over_prob = tot_probs[1]
prediction.total_under_prob = tot_probs[0]
total_line = prediction.total_line
prediction.total_pick = (
f"Over {total_line}" if tot_probs[1] >= tot_probs[0] else f"Under {total_line}"
)
prediction.total_confidence = max(tot_probs) * 100.0
# ── Spread ──
prediction.spread_home_line = float(odds.get("spread_home_line") or 0.0)
sp_probs = self._ensemble_predict("spread", df)
prediction.spread_home_prob = sp_probs[0]
prediction.spread_away_prob = sp_probs[1]
home_line = prediction.spread_home_line
away_line = -home_line
prediction.spread_pick = (
f"Home {home_line:+.1f}" if sp_probs[0] >= sp_probs[1] else f"Away {away_line:+.1f}"
)
prediction.spread_confidence = max(sp_probs) * 100.0
# Market board summary
prediction.market_board = {
"ML": {
"1": f"{prediction.ml_home_prob * 100:.0f}%",
"2": f"{prediction.ml_away_prob * 100:.0f}%",
},
"Totals": {
f"Over {total_line}": f"{prediction.total_over_prob * 100:.0f}%",
f"Under {total_line}": f"{prediction.total_under_prob * 100:.0f}%",
},
"Spread": {
f"Home {home_line:+.1f}": f"{prediction.spread_home_prob * 100:.0f}%",
f"Away {away_line:+.1f}": f"{prediction.spread_away_prob * 100:.0f}%",
},
}
# Risk
top_conf = max(prediction.ml_confidence, prediction.total_confidence, prediction.spread_confidence)
prediction.risk_level = "LOW" if top_conf >= 65 else "MEDIUM" if top_conf >= 55 else "HIGH"
prediction.analysis_details = {
"model_version": "basketball_v25",
"markets_predicted": list(self.models.keys()),
"ensemble_size": {m: sum(1 for k in v.values() if v[k] is not None) for m, v in self.models.items()},
}
except Exception as e:
print(f"⚠️ [Basketball] Prediction failed for {match_id}: {e}")
return prediction
# ── Singleton factory ──────────────────────────────────────────────────────────
_predictor: Optional[BasketballV25Predictor] = None
def get_basketball_v25_predictor() -> BasketballV25Predictor:
"""Return the singleton BasketballV25Predictor (lazy-loaded)."""
global _predictor
if _predictor is None:
_predictor = BasketballV25Predictor()
return _predictor
__all__ = [
"BasketballMatchPrediction",
"BasketballV25Predictor",
"get_basketball_v25_predictor",
]