357 lines
14 KiB
Python
357 lines
14 KiB
Python
"""
|
|
Basketball V25 Predictor Package
|
|
=================================
|
|
Loads trained XGBoost + LightGBM models for basketball market predictions:
|
|
- ML (Money Line — home / away win)
|
|
- Total (Over/Under total points)
|
|
- Spread (ATS home cover / away cover)
|
|
|
|
Model files live in this directory:
|
|
xgb_basketball_v25_{market}.json — XGBoost (primary)
|
|
lgb_basketball_v25_{market}.txt — LightGBM (ensemble)
|
|
feature_cols.json — ordered feature list
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
from dataclasses import dataclass, field
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
# ── Constants ─────────────────────────────────────────────────────────────────
|
|
_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
_MARKETS = ("ml", "total", "spread")
|
|
|
|
|
|
# ── Output dataclass ──────────────────────────────────────────────────────────
|
|
|
|
@dataclass
|
|
class BasketballMatchPrediction:
|
|
"""Complete basketball match prediction output."""
|
|
|
|
match_id: str
|
|
home_team_name: str
|
|
away_team_name: str
|
|
league_name: str = ""
|
|
|
|
# Money Line
|
|
ml_home_prob: float = 0.50
|
|
ml_away_prob: float = 0.50
|
|
ml_pick: str = ""
|
|
ml_confidence: float = 0.0
|
|
|
|
# Total (Over/Under)
|
|
total_line: float = 0.0
|
|
total_over_prob: float = 0.50
|
|
total_under_prob: float = 0.50
|
|
total_pick: str = ""
|
|
total_confidence: float = 0.0
|
|
|
|
# Spread (ATS)
|
|
spread_home_line: float = 0.0
|
|
spread_home_prob: float = 0.50
|
|
spread_away_prob: float = 0.50
|
|
spread_pick: str = ""
|
|
spread_confidence: float = 0.0
|
|
|
|
# Meta
|
|
model_version: str = "basketball_v25"
|
|
risk_level: str = "MEDIUM"
|
|
analysis_details: Dict[str, Any] = field(default_factory=dict)
|
|
market_board: Dict[str, Any] = field(default_factory=dict)
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
return {
|
|
"match_id": self.match_id,
|
|
"home_team": self.home_team_name,
|
|
"away_team": self.away_team_name,
|
|
"league": self.league_name,
|
|
"model": self.model_version,
|
|
"risk_level": self.risk_level,
|
|
"ml": {
|
|
"home_prob": round(self.ml_home_prob * 100, 1),
|
|
"away_prob": round(self.ml_away_prob * 100, 1),
|
|
"pick": self.ml_pick,
|
|
"confidence": round(self.ml_confidence, 1),
|
|
},
|
|
"total": {
|
|
"line": self.total_line,
|
|
"over_prob": round(self.total_over_prob * 100, 1),
|
|
"under_prob": round(self.total_under_prob * 100, 1),
|
|
"pick": self.total_pick,
|
|
"confidence": round(self.total_confidence, 1),
|
|
},
|
|
"spread": {
|
|
"home_line": self.spread_home_line,
|
|
"home_prob": round(self.spread_home_prob * 100, 1),
|
|
"away_prob": round(self.spread_away_prob * 100, 1),
|
|
"pick": self.spread_pick,
|
|
"confidence": round(self.spread_confidence, 1),
|
|
},
|
|
"market_board": self.market_board,
|
|
"analysis_details": self.analysis_details,
|
|
}
|
|
|
|
|
|
# ── Predictor ─────────────────────────────────────────────────────────────────
|
|
|
|
class BasketballV25Predictor:
|
|
"""
|
|
Ensemble basketball predictor using XGBoost + LightGBM models.
|
|
|
|
Markets:
|
|
- ml → home/away win probability
|
|
- total → over/under total points
|
|
- spread → home/away ATS cover
|
|
"""
|
|
|
|
def __init__(self) -> None:
|
|
self.feature_cols: List[str] = self._load_feature_cols()
|
|
self.models: Dict[str, Any] = {}
|
|
self._load_models()
|
|
print(f"✅ BasketballV25Predictor ready ({len(self.models)} models loaded)")
|
|
|
|
# ── Setup ──────────────────────────────────────────────────────────────
|
|
|
|
def _load_feature_cols(self) -> List[str]:
|
|
path = os.path.join(_DIR, "feature_cols.json")
|
|
try:
|
|
with open(path, "r") as f:
|
|
return json.load(f)
|
|
except Exception as e:
|
|
print(f"⚠️ [Basketball] Could not load feature_cols.json: {e}")
|
|
return []
|
|
|
|
def _load_models(self) -> None:
|
|
for market in _MARKETS:
|
|
xgb_path = os.path.join(_DIR, f"xgb_basketball_v25_{market}.json")
|
|
lgb_path = os.path.join(_DIR, f"lgb_basketball_v25_{market}.txt")
|
|
|
|
xgb_model = self._try_load_xgb(xgb_path, market)
|
|
lgb_model = self._try_load_lgb(lgb_path, market)
|
|
|
|
if xgb_model is not None or lgb_model is not None:
|
|
self.models[market] = {"xgb": xgb_model, "lgb": lgb_model}
|
|
|
|
def _try_load_xgb(self, path: str, market: str) -> Optional[Any]:
|
|
if not os.path.exists(path):
|
|
return None
|
|
try:
|
|
import xgboost as xgb # type: ignore[import-not-found]
|
|
m = xgb.XGBClassifier()
|
|
m.load_model(path)
|
|
return m
|
|
except Exception as e:
|
|
print(f"⚠️ [Basketball] XGB {market} load failed: {e}")
|
|
return None
|
|
|
|
def _try_load_lgb(self, path: str, market: str) -> Optional[Any]:
|
|
if not os.path.exists(path):
|
|
return None
|
|
try:
|
|
import lightgbm as lgb # type: ignore[import-not-found]
|
|
with open(path, "r", encoding="utf-8") as f:
|
|
model_str = f.read()
|
|
return lgb.Booster(model_str=model_str)
|
|
except Exception as e:
|
|
print(f"⚠️ [Basketball] LGB {market} load failed: {e}")
|
|
return None
|
|
|
|
# ── Inference ──────────────────────────────────────────────────────────
|
|
|
|
def _build_feature_row(self, odds_data: Dict[str, Any], **kwargs: Any) -> "Any":
|
|
"""Build a single-row DataFrame aligned to training feature columns."""
|
|
import pandas as pd # type: ignore[import-not-found]
|
|
|
|
row: Dict[str, float] = {}
|
|
for col in self.feature_cols:
|
|
row[col] = float(kwargs.get(col) or odds_data.get(col) or 0.0)
|
|
|
|
# Map common odds keys
|
|
row["ml_home_odds"] = float(odds_data.get("ml_h") or 0.0)
|
|
row["ml_away_odds"] = float(odds_data.get("ml_a") or 0.0)
|
|
row["total_line"] = float(odds_data.get("tot_line") or 0.0)
|
|
row["total_over_odds"] = float(odds_data.get("tot_o") or 0.0)
|
|
row["total_under_odds"] = float(odds_data.get("tot_u") or 0.0)
|
|
row["spread_home_line"] = float(odds_data.get("spread_home_line") or 0.0)
|
|
row["spread_home_odds"] = float(odds_data.get("spread_h") or 0.0)
|
|
row["spread_away_odds"] = float(odds_data.get("spread_a") or 0.0)
|
|
|
|
# Implied probabilities
|
|
def _imp(odd: float) -> float:
|
|
return (1.0 / odd) if odd > 1.01 else 0.5
|
|
|
|
ml_h = row["ml_home_odds"]
|
|
ml_a = row["ml_away_odds"]
|
|
if ml_h > 1.01 and ml_a > 1.01:
|
|
raw = _imp(ml_h) + _imp(ml_a)
|
|
row["implied_home"] = _imp(ml_h) / raw
|
|
row["implied_away"] = _imp(ml_a) / raw
|
|
row["odds_overround"] = raw - 1.0
|
|
|
|
tot_o = row["total_over_odds"]
|
|
tot_u = row["total_under_odds"]
|
|
if tot_o > 1.01 and tot_u > 1.01:
|
|
raw = _imp(tot_o) + _imp(tot_u)
|
|
row["implied_total_over"] = _imp(tot_o) / raw
|
|
row["implied_total_under"] = _imp(tot_u) / raw
|
|
|
|
sp_h = row["spread_home_odds"]
|
|
sp_a = row["spread_away_odds"]
|
|
if sp_h > 1.01 and sp_a > 1.01:
|
|
raw = _imp(sp_h) + _imp(sp_a)
|
|
row["implied_spread_home"] = _imp(sp_h) / raw
|
|
row["implied_spread_away"] = _imp(sp_a) / raw
|
|
|
|
return pd.DataFrame([row])
|
|
|
|
def _ensemble_predict(self, market: str, df: "Any") -> List[float]:
|
|
"""Return [p_class0, p_class1] from XGB+LGB ensemble."""
|
|
models = self.models.get(market, {})
|
|
xgb_model = models.get("xgb")
|
|
lgb_model = models.get("lgb")
|
|
|
|
probs_list: List[List[float]] = []
|
|
|
|
if xgb_model is not None:
|
|
try:
|
|
p = xgb_model.predict_proba(df)
|
|
probs_list.append([float(p[0][0]), float(p[0][1])])
|
|
except Exception as e:
|
|
print(f"⚠️ [Basketball] XGB {market} inference failed: {e}")
|
|
|
|
if lgb_model is not None:
|
|
try:
|
|
p_raw = lgb_model.predict(df)
|
|
p1 = float(p_raw[0]) if len(p_raw.shape) == 1 else float(p_raw[0][1])
|
|
probs_list.append([1.0 - p1, p1])
|
|
except Exception as e:
|
|
print(f"⚠️ [Basketball] LGB {market} inference failed: {e}")
|
|
|
|
if not probs_list:
|
|
return [0.5, 0.5]
|
|
|
|
p0 = sum(p[0] for p in probs_list) / len(probs_list)
|
|
p1 = sum(p[1] for p in probs_list) / len(probs_list)
|
|
total = p0 + p1 or 1.0
|
|
return [p0 / total, p1 / total]
|
|
|
|
# ── Public API ─────────────────────────────────────────────────────────
|
|
|
|
def predict(
|
|
self,
|
|
match_id: str,
|
|
home_team_id: str,
|
|
away_team_id: str,
|
|
home_team_name: str = "",
|
|
away_team_name: str = "",
|
|
league_id: str = "",
|
|
league_name: str = "",
|
|
odds_data: Optional[Dict[str, Any]] = None,
|
|
sidelined_data: Optional[Dict[str, Any]] = None,
|
|
**kwargs: Any,
|
|
) -> BasketballMatchPrediction:
|
|
odds = odds_data or {}
|
|
prediction = BasketballMatchPrediction(
|
|
match_id=match_id,
|
|
home_team_name=home_team_name,
|
|
away_team_name=away_team_name,
|
|
league_name=league_name,
|
|
)
|
|
|
|
# Sidelined impact
|
|
home_sl = int((sidelined_data or {}).get("homeTeam", {}).get("totalSidelined", 0) or 0)
|
|
away_sl = int((sidelined_data or {}).get("awayTeam", {}).get("totalSidelined", 0) or 0)
|
|
kwargs.setdefault("home_sidelined_count", float(home_sl))
|
|
kwargs.setdefault("away_sidelined_count", float(away_sl))
|
|
kwargs.setdefault("sidelined_diff", float(home_sl - away_sl))
|
|
kwargs.setdefault("missing_players_impact", float(home_sl + away_sl) / 10.0)
|
|
|
|
if not self.models:
|
|
print("⚠️ [Basketball] No models loaded — returning neutral defaults")
|
|
return prediction
|
|
|
|
try:
|
|
df = self._build_feature_row(odds, **kwargs)
|
|
|
|
# ── ML ──
|
|
ml_probs = self._ensemble_predict("ml", df)
|
|
prediction.ml_home_prob = ml_probs[0]
|
|
prediction.ml_away_prob = ml_probs[1]
|
|
prediction.ml_pick = "1" if ml_probs[0] >= ml_probs[1] else "2"
|
|
prediction.ml_confidence = max(ml_probs) * 100.0
|
|
|
|
# ── Total ──
|
|
prediction.total_line = float(odds.get("tot_line") or 0.0)
|
|
tot_probs = self._ensemble_predict("total", df)
|
|
prediction.total_over_prob = tot_probs[1]
|
|
prediction.total_under_prob = tot_probs[0]
|
|
total_line = prediction.total_line
|
|
prediction.total_pick = (
|
|
f"Over {total_line}" if tot_probs[1] >= tot_probs[0] else f"Under {total_line}"
|
|
)
|
|
prediction.total_confidence = max(tot_probs) * 100.0
|
|
|
|
# ── Spread ──
|
|
prediction.spread_home_line = float(odds.get("spread_home_line") or 0.0)
|
|
sp_probs = self._ensemble_predict("spread", df)
|
|
prediction.spread_home_prob = sp_probs[0]
|
|
prediction.spread_away_prob = sp_probs[1]
|
|
home_line = prediction.spread_home_line
|
|
away_line = -home_line
|
|
prediction.spread_pick = (
|
|
f"Home {home_line:+.1f}" if sp_probs[0] >= sp_probs[1] else f"Away {away_line:+.1f}"
|
|
)
|
|
prediction.spread_confidence = max(sp_probs) * 100.0
|
|
|
|
# Market board summary
|
|
prediction.market_board = {
|
|
"ML": {
|
|
"1": f"{prediction.ml_home_prob * 100:.0f}%",
|
|
"2": f"{prediction.ml_away_prob * 100:.0f}%",
|
|
},
|
|
"Totals": {
|
|
f"Over {total_line}": f"{prediction.total_over_prob * 100:.0f}%",
|
|
f"Under {total_line}": f"{prediction.total_under_prob * 100:.0f}%",
|
|
},
|
|
"Spread": {
|
|
f"Home {home_line:+.1f}": f"{prediction.spread_home_prob * 100:.0f}%",
|
|
f"Away {away_line:+.1f}": f"{prediction.spread_away_prob * 100:.0f}%",
|
|
},
|
|
}
|
|
|
|
# Risk
|
|
top_conf = max(prediction.ml_confidence, prediction.total_confidence, prediction.spread_confidence)
|
|
prediction.risk_level = "LOW" if top_conf >= 65 else "MEDIUM" if top_conf >= 55 else "HIGH"
|
|
prediction.analysis_details = {
|
|
"model_version": "basketball_v25",
|
|
"markets_predicted": list(self.models.keys()),
|
|
"ensemble_size": {m: sum(1 for k in v.values() if v[k] is not None) for m, v in self.models.items()},
|
|
}
|
|
|
|
except Exception as e:
|
|
print(f"⚠️ [Basketball] Prediction failed for {match_id}: {e}")
|
|
|
|
return prediction
|
|
|
|
|
|
# ── Singleton factory ──────────────────────────────────────────────────────────
|
|
|
|
_predictor: Optional[BasketballV25Predictor] = None
|
|
|
|
|
|
def get_basketball_v25_predictor() -> BasketballV25Predictor:
|
|
"""Return the singleton BasketballV25Predictor (lazy-loaded)."""
|
|
global _predictor
|
|
if _predictor is None:
|
|
_predictor = BasketballV25Predictor()
|
|
return _predictor
|
|
|
|
|
|
__all__ = [
|
|
"BasketballMatchPrediction",
|
|
"BasketballV25Predictor",
|
|
"get_basketball_v25_predictor",
|
|
]
|