@@ -0,0 +1,356 @@
|
||||
"""
|
||||
Basketball V25 Predictor Package
|
||||
=================================
|
||||
Loads trained XGBoost + LightGBM models for basketball market predictions:
|
||||
- ML (Money Line — home / away win)
|
||||
- Total (Over/Under total points)
|
||||
- Spread (ATS home cover / away cover)
|
||||
|
||||
Model files live in this directory:
|
||||
xgb_basketball_v25_{market}.json — XGBoost (primary)
|
||||
lgb_basketball_v25_{market}.txt — LightGBM (ensemble)
|
||||
feature_cols.json — ordered feature list
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
# ── Constants ─────────────────────────────────────────────────────────────────
|
||||
_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
_MARKETS = ("ml", "total", "spread")
|
||||
|
||||
|
||||
# ── Output dataclass ──────────────────────────────────────────────────────────
|
||||
|
||||
@dataclass
|
||||
class BasketballMatchPrediction:
|
||||
"""Complete basketball match prediction output."""
|
||||
|
||||
match_id: str
|
||||
home_team_name: str
|
||||
away_team_name: str
|
||||
league_name: str = ""
|
||||
|
||||
# Money Line
|
||||
ml_home_prob: float = 0.50
|
||||
ml_away_prob: float = 0.50
|
||||
ml_pick: str = ""
|
||||
ml_confidence: float = 0.0
|
||||
|
||||
# Total (Over/Under)
|
||||
total_line: float = 0.0
|
||||
total_over_prob: float = 0.50
|
||||
total_under_prob: float = 0.50
|
||||
total_pick: str = ""
|
||||
total_confidence: float = 0.0
|
||||
|
||||
# Spread (ATS)
|
||||
spread_home_line: float = 0.0
|
||||
spread_home_prob: float = 0.50
|
||||
spread_away_prob: float = 0.50
|
||||
spread_pick: str = ""
|
||||
spread_confidence: float = 0.0
|
||||
|
||||
# Meta
|
||||
model_version: str = "basketball_v25"
|
||||
risk_level: str = "MEDIUM"
|
||||
analysis_details: Dict[str, Any] = field(default_factory=dict)
|
||||
market_board: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"match_id": self.match_id,
|
||||
"home_team": self.home_team_name,
|
||||
"away_team": self.away_team_name,
|
||||
"league": self.league_name,
|
||||
"model": self.model_version,
|
||||
"risk_level": self.risk_level,
|
||||
"ml": {
|
||||
"home_prob": round(self.ml_home_prob * 100, 1),
|
||||
"away_prob": round(self.ml_away_prob * 100, 1),
|
||||
"pick": self.ml_pick,
|
||||
"confidence": round(self.ml_confidence, 1),
|
||||
},
|
||||
"total": {
|
||||
"line": self.total_line,
|
||||
"over_prob": round(self.total_over_prob * 100, 1),
|
||||
"under_prob": round(self.total_under_prob * 100, 1),
|
||||
"pick": self.total_pick,
|
||||
"confidence": round(self.total_confidence, 1),
|
||||
},
|
||||
"spread": {
|
||||
"home_line": self.spread_home_line,
|
||||
"home_prob": round(self.spread_home_prob * 100, 1),
|
||||
"away_prob": round(self.spread_away_prob * 100, 1),
|
||||
"pick": self.spread_pick,
|
||||
"confidence": round(self.spread_confidence, 1),
|
||||
},
|
||||
"market_board": self.market_board,
|
||||
"analysis_details": self.analysis_details,
|
||||
}
|
||||
|
||||
|
||||
# ── Predictor ─────────────────────────────────────────────────────────────────
|
||||
|
||||
class BasketballV25Predictor:
|
||||
"""
|
||||
Ensemble basketball predictor using XGBoost + LightGBM models.
|
||||
|
||||
Markets:
|
||||
- ml → home/away win probability
|
||||
- total → over/under total points
|
||||
- spread → home/away ATS cover
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.feature_cols: List[str] = self._load_feature_cols()
|
||||
self.models: Dict[str, Any] = {}
|
||||
self._load_models()
|
||||
print(f"✅ BasketballV25Predictor ready ({len(self.models)} models loaded)")
|
||||
|
||||
# ── Setup ──────────────────────────────────────────────────────────────
|
||||
|
||||
def _load_feature_cols(self) -> List[str]:
|
||||
path = os.path.join(_DIR, "feature_cols.json")
|
||||
try:
|
||||
with open(path, "r") as f:
|
||||
return json.load(f)
|
||||
except Exception as e:
|
||||
print(f"⚠️ [Basketball] Could not load feature_cols.json: {e}")
|
||||
return []
|
||||
|
||||
def _load_models(self) -> None:
|
||||
for market in _MARKETS:
|
||||
xgb_path = os.path.join(_DIR, f"xgb_basketball_v25_{market}.json")
|
||||
lgb_path = os.path.join(_DIR, f"lgb_basketball_v25_{market}.txt")
|
||||
|
||||
xgb_model = self._try_load_xgb(xgb_path, market)
|
||||
lgb_model = self._try_load_lgb(lgb_path, market)
|
||||
|
||||
if xgb_model is not None or lgb_model is not None:
|
||||
self.models[market] = {"xgb": xgb_model, "lgb": lgb_model}
|
||||
|
||||
def _try_load_xgb(self, path: str, market: str) -> Optional[Any]:
|
||||
if not os.path.exists(path):
|
||||
return None
|
||||
try:
|
||||
import xgboost as xgb # type: ignore[import-not-found]
|
||||
m = xgb.XGBClassifier()
|
||||
m.load_model(path)
|
||||
return m
|
||||
except Exception as e:
|
||||
print(f"⚠️ [Basketball] XGB {market} load failed: {e}")
|
||||
return None
|
||||
|
||||
def _try_load_lgb(self, path: str, market: str) -> Optional[Any]:
|
||||
if not os.path.exists(path):
|
||||
return None
|
||||
try:
|
||||
import lightgbm as lgb # type: ignore[import-not-found]
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
model_str = f.read()
|
||||
return lgb.Booster(model_str=model_str)
|
||||
except Exception as e:
|
||||
print(f"⚠️ [Basketball] LGB {market} load failed: {e}")
|
||||
return None
|
||||
|
||||
# ── Inference ──────────────────────────────────────────────────────────
|
||||
|
||||
def _build_feature_row(self, odds_data: Dict[str, Any], **kwargs: Any) -> "Any":
|
||||
"""Build a single-row DataFrame aligned to training feature columns."""
|
||||
import pandas as pd # type: ignore[import-not-found]
|
||||
|
||||
row: Dict[str, float] = {}
|
||||
for col in self.feature_cols:
|
||||
row[col] = float(kwargs.get(col) or odds_data.get(col) or 0.0)
|
||||
|
||||
# Map common odds keys
|
||||
row["ml_home_odds"] = float(odds_data.get("ml_h") or 0.0)
|
||||
row["ml_away_odds"] = float(odds_data.get("ml_a") or 0.0)
|
||||
row["total_line"] = float(odds_data.get("tot_line") or 0.0)
|
||||
row["total_over_odds"] = float(odds_data.get("tot_o") or 0.0)
|
||||
row["total_under_odds"] = float(odds_data.get("tot_u") or 0.0)
|
||||
row["spread_home_line"] = float(odds_data.get("spread_home_line") or 0.0)
|
||||
row["spread_home_odds"] = float(odds_data.get("spread_h") or 0.0)
|
||||
row["spread_away_odds"] = float(odds_data.get("spread_a") or 0.0)
|
||||
|
||||
# Implied probabilities
|
||||
def _imp(odd: float) -> float:
|
||||
return (1.0 / odd) if odd > 1.01 else 0.5
|
||||
|
||||
ml_h = row["ml_home_odds"]
|
||||
ml_a = row["ml_away_odds"]
|
||||
if ml_h > 1.01 and ml_a > 1.01:
|
||||
raw = _imp(ml_h) + _imp(ml_a)
|
||||
row["implied_home"] = _imp(ml_h) / raw
|
||||
row["implied_away"] = _imp(ml_a) / raw
|
||||
row["odds_overround"] = raw - 1.0
|
||||
|
||||
tot_o = row["total_over_odds"]
|
||||
tot_u = row["total_under_odds"]
|
||||
if tot_o > 1.01 and tot_u > 1.01:
|
||||
raw = _imp(tot_o) + _imp(tot_u)
|
||||
row["implied_total_over"] = _imp(tot_o) / raw
|
||||
row["implied_total_under"] = _imp(tot_u) / raw
|
||||
|
||||
sp_h = row["spread_home_odds"]
|
||||
sp_a = row["spread_away_odds"]
|
||||
if sp_h > 1.01 and sp_a > 1.01:
|
||||
raw = _imp(sp_h) + _imp(sp_a)
|
||||
row["implied_spread_home"] = _imp(sp_h) / raw
|
||||
row["implied_spread_away"] = _imp(sp_a) / raw
|
||||
|
||||
return pd.DataFrame([row])
|
||||
|
||||
def _ensemble_predict(self, market: str, df: "Any") -> List[float]:
|
||||
"""Return [p_class0, p_class1] from XGB+LGB ensemble."""
|
||||
models = self.models.get(market, {})
|
||||
xgb_model = models.get("xgb")
|
||||
lgb_model = models.get("lgb")
|
||||
|
||||
probs_list: List[List[float]] = []
|
||||
|
||||
if xgb_model is not None:
|
||||
try:
|
||||
p = xgb_model.predict_proba(df)
|
||||
probs_list.append([float(p[0][0]), float(p[0][1])])
|
||||
except Exception as e:
|
||||
print(f"⚠️ [Basketball] XGB {market} inference failed: {e}")
|
||||
|
||||
if lgb_model is not None:
|
||||
try:
|
||||
p_raw = lgb_model.predict(df)
|
||||
p1 = float(p_raw[0]) if len(p_raw.shape) == 1 else float(p_raw[0][1])
|
||||
probs_list.append([1.0 - p1, p1])
|
||||
except Exception as e:
|
||||
print(f"⚠️ [Basketball] LGB {market} inference failed: {e}")
|
||||
|
||||
if not probs_list:
|
||||
return [0.5, 0.5]
|
||||
|
||||
p0 = sum(p[0] for p in probs_list) / len(probs_list)
|
||||
p1 = sum(p[1] for p in probs_list) / len(probs_list)
|
||||
total = p0 + p1 or 1.0
|
||||
return [p0 / total, p1 / total]
|
||||
|
||||
# ── Public API ─────────────────────────────────────────────────────────
|
||||
|
||||
def predict(
|
||||
self,
|
||||
match_id: str,
|
||||
home_team_id: str,
|
||||
away_team_id: str,
|
||||
home_team_name: str = "",
|
||||
away_team_name: str = "",
|
||||
league_id: str = "",
|
||||
league_name: str = "",
|
||||
odds_data: Optional[Dict[str, Any]] = None,
|
||||
sidelined_data: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> BasketballMatchPrediction:
|
||||
odds = odds_data or {}
|
||||
prediction = BasketballMatchPrediction(
|
||||
match_id=match_id,
|
||||
home_team_name=home_team_name,
|
||||
away_team_name=away_team_name,
|
||||
league_name=league_name,
|
||||
)
|
||||
|
||||
# Sidelined impact
|
||||
home_sl = int((sidelined_data or {}).get("homeTeam", {}).get("totalSidelined", 0) or 0)
|
||||
away_sl = int((sidelined_data or {}).get("awayTeam", {}).get("totalSidelined", 0) or 0)
|
||||
kwargs.setdefault("home_sidelined_count", float(home_sl))
|
||||
kwargs.setdefault("away_sidelined_count", float(away_sl))
|
||||
kwargs.setdefault("sidelined_diff", float(home_sl - away_sl))
|
||||
kwargs.setdefault("missing_players_impact", float(home_sl + away_sl) / 10.0)
|
||||
|
||||
if not self.models:
|
||||
print("⚠️ [Basketball] No models loaded — returning neutral defaults")
|
||||
return prediction
|
||||
|
||||
try:
|
||||
df = self._build_feature_row(odds, **kwargs)
|
||||
|
||||
# ── ML ──
|
||||
ml_probs = self._ensemble_predict("ml", df)
|
||||
prediction.ml_home_prob = ml_probs[0]
|
||||
prediction.ml_away_prob = ml_probs[1]
|
||||
prediction.ml_pick = "1" if ml_probs[0] >= ml_probs[1] else "2"
|
||||
prediction.ml_confidence = max(ml_probs) * 100.0
|
||||
|
||||
# ── Total ──
|
||||
prediction.total_line = float(odds.get("tot_line") or 0.0)
|
||||
tot_probs = self._ensemble_predict("total", df)
|
||||
prediction.total_over_prob = tot_probs[1]
|
||||
prediction.total_under_prob = tot_probs[0]
|
||||
total_line = prediction.total_line
|
||||
prediction.total_pick = (
|
||||
f"Over {total_line}" if tot_probs[1] >= tot_probs[0] else f"Under {total_line}"
|
||||
)
|
||||
prediction.total_confidence = max(tot_probs) * 100.0
|
||||
|
||||
# ── Spread ──
|
||||
prediction.spread_home_line = float(odds.get("spread_home_line") or 0.0)
|
||||
sp_probs = self._ensemble_predict("spread", df)
|
||||
prediction.spread_home_prob = sp_probs[0]
|
||||
prediction.spread_away_prob = sp_probs[1]
|
||||
home_line = prediction.spread_home_line
|
||||
away_line = -home_line
|
||||
prediction.spread_pick = (
|
||||
f"Home {home_line:+.1f}" if sp_probs[0] >= sp_probs[1] else f"Away {away_line:+.1f}"
|
||||
)
|
||||
prediction.spread_confidence = max(sp_probs) * 100.0
|
||||
|
||||
# Market board summary
|
||||
prediction.market_board = {
|
||||
"ML": {
|
||||
"1": f"{prediction.ml_home_prob * 100:.0f}%",
|
||||
"2": f"{prediction.ml_away_prob * 100:.0f}%",
|
||||
},
|
||||
"Totals": {
|
||||
f"Over {total_line}": f"{prediction.total_over_prob * 100:.0f}%",
|
||||
f"Under {total_line}": f"{prediction.total_under_prob * 100:.0f}%",
|
||||
},
|
||||
"Spread": {
|
||||
f"Home {home_line:+.1f}": f"{prediction.spread_home_prob * 100:.0f}%",
|
||||
f"Away {away_line:+.1f}": f"{prediction.spread_away_prob * 100:.0f}%",
|
||||
},
|
||||
}
|
||||
|
||||
# Risk
|
||||
top_conf = max(prediction.ml_confidence, prediction.total_confidence, prediction.spread_confidence)
|
||||
prediction.risk_level = "LOW" if top_conf >= 65 else "MEDIUM" if top_conf >= 55 else "HIGH"
|
||||
prediction.analysis_details = {
|
||||
"model_version": "basketball_v25",
|
||||
"markets_predicted": list(self.models.keys()),
|
||||
"ensemble_size": {m: sum(1 for k in v.values() if v[k] is not None) for m, v in self.models.items()},
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
print(f"⚠️ [Basketball] Prediction failed for {match_id}: {e}")
|
||||
|
||||
return prediction
|
||||
|
||||
|
||||
# ── Singleton factory ──────────────────────────────────────────────────────────
|
||||
|
||||
_predictor: Optional[BasketballV25Predictor] = None
|
||||
|
||||
|
||||
def get_basketball_v25_predictor() -> BasketballV25Predictor:
|
||||
"""Return the singleton BasketballV25Predictor (lazy-loaded)."""
|
||||
global _predictor
|
||||
if _predictor is None:
|
||||
_predictor = BasketballV25Predictor()
|
||||
return _predictor
|
||||
|
||||
|
||||
__all__ = [
|
||||
"BasketballMatchPrediction",
|
||||
"BasketballV25Predictor",
|
||||
"get_basketball_v25_predictor",
|
||||
]
|
||||
Reference in New Issue
Block a user