""" Basketball V25 Predictor Package ================================= Loads trained XGBoost + LightGBM models for basketball market predictions: - ML (Money Line — home / away win) - Total (Over/Under total points) - Spread (ATS home cover / away cover) Model files live in this directory: xgb_basketball_v25_{market}.json — XGBoost (primary) lgb_basketball_v25_{market}.txt — LightGBM (ensemble) feature_cols.json — ordered feature list """ from __future__ import annotations import json import os from dataclasses import dataclass, field from typing import Any, Dict, List, Optional # ── Constants ───────────────────────────────────────────────────────────────── _DIR = os.path.dirname(os.path.abspath(__file__)) _MARKETS = ("ml", "total", "spread") # ── Output dataclass ────────────────────────────────────────────────────────── @dataclass class BasketballMatchPrediction: """Complete basketball match prediction output.""" match_id: str home_team_name: str away_team_name: str league_name: str = "" # Money Line ml_home_prob: float = 0.50 ml_away_prob: float = 0.50 ml_pick: str = "" ml_confidence: float = 0.0 # Total (Over/Under) total_line: float = 0.0 total_over_prob: float = 0.50 total_under_prob: float = 0.50 total_pick: str = "" total_confidence: float = 0.0 # Spread (ATS) spread_home_line: float = 0.0 spread_home_prob: float = 0.50 spread_away_prob: float = 0.50 spread_pick: str = "" spread_confidence: float = 0.0 # Meta model_version: str = "basketball_v25" risk_level: str = "MEDIUM" analysis_details: Dict[str, Any] = field(default_factory=dict) market_board: Dict[str, Any] = field(default_factory=dict) def to_dict(self) -> Dict[str, Any]: return { "match_id": self.match_id, "home_team": self.home_team_name, "away_team": self.away_team_name, "league": self.league_name, "model": self.model_version, "risk_level": self.risk_level, "ml": { "home_prob": round(self.ml_home_prob * 100, 1), "away_prob": round(self.ml_away_prob * 100, 1), "pick": self.ml_pick, "confidence": round(self.ml_confidence, 1), }, "total": { "line": self.total_line, "over_prob": round(self.total_over_prob * 100, 1), "under_prob": round(self.total_under_prob * 100, 1), "pick": self.total_pick, "confidence": round(self.total_confidence, 1), }, "spread": { "home_line": self.spread_home_line, "home_prob": round(self.spread_home_prob * 100, 1), "away_prob": round(self.spread_away_prob * 100, 1), "pick": self.spread_pick, "confidence": round(self.spread_confidence, 1), }, "market_board": self.market_board, "analysis_details": self.analysis_details, } # ── Predictor ───────────────────────────────────────────────────────────────── class BasketballV25Predictor: """ Ensemble basketball predictor using XGBoost + LightGBM models. Markets: - ml → home/away win probability - total → over/under total points - spread → home/away ATS cover """ def __init__(self) -> None: self.feature_cols: List[str] = self._load_feature_cols() self.models: Dict[str, Any] = {} self._load_models() print(f"✅ BasketballV25Predictor ready ({len(self.models)} models loaded)") # ── Setup ────────────────────────────────────────────────────────────── def _load_feature_cols(self) -> List[str]: path = os.path.join(_DIR, "feature_cols.json") try: with open(path, "r") as f: return json.load(f) except Exception as e: print(f"⚠️ [Basketball] Could not load feature_cols.json: {e}") return [] def _load_models(self) -> None: for market in _MARKETS: xgb_path = os.path.join(_DIR, f"xgb_basketball_v25_{market}.json") lgb_path = os.path.join(_DIR, f"lgb_basketball_v25_{market}.txt") xgb_model = self._try_load_xgb(xgb_path, market) lgb_model = self._try_load_lgb(lgb_path, market) if xgb_model is not None or lgb_model is not None: self.models[market] = {"xgb": xgb_model, "lgb": lgb_model} def _try_load_xgb(self, path: str, market: str) -> Optional[Any]: if not os.path.exists(path): return None try: import xgboost as xgb # type: ignore[import-not-found] m = xgb.XGBClassifier() m.load_model(path) return m except Exception as e: print(f"⚠️ [Basketball] XGB {market} load failed: {e}") return None def _try_load_lgb(self, path: str, market: str) -> Optional[Any]: if not os.path.exists(path): return None try: import lightgbm as lgb # type: ignore[import-not-found] with open(path, "r", encoding="utf-8") as f: model_str = f.read() return lgb.Booster(model_str=model_str) except Exception as e: print(f"⚠️ [Basketball] LGB {market} load failed: {e}") return None # ── Inference ────────────────────────────────────────────────────────── def _build_feature_row(self, odds_data: Dict[str, Any], **kwargs: Any) -> "Any": """Build a single-row DataFrame aligned to training feature columns.""" import pandas as pd # type: ignore[import-not-found] row: Dict[str, float] = {} for col in self.feature_cols: row[col] = float(kwargs.get(col) or odds_data.get(col) or 0.0) # Map common odds keys row["ml_home_odds"] = float(odds_data.get("ml_h") or 0.0) row["ml_away_odds"] = float(odds_data.get("ml_a") or 0.0) row["total_line"] = float(odds_data.get("tot_line") or 0.0) row["total_over_odds"] = float(odds_data.get("tot_o") or 0.0) row["total_under_odds"] = float(odds_data.get("tot_u") or 0.0) row["spread_home_line"] = float(odds_data.get("spread_home_line") or 0.0) row["spread_home_odds"] = float(odds_data.get("spread_h") or 0.0) row["spread_away_odds"] = float(odds_data.get("spread_a") or 0.0) # Implied probabilities def _imp(odd: float) -> float: return (1.0 / odd) if odd > 1.01 else 0.5 ml_h = row["ml_home_odds"] ml_a = row["ml_away_odds"] if ml_h > 1.01 and ml_a > 1.01: raw = _imp(ml_h) + _imp(ml_a) row["implied_home"] = _imp(ml_h) / raw row["implied_away"] = _imp(ml_a) / raw row["odds_overround"] = raw - 1.0 tot_o = row["total_over_odds"] tot_u = row["total_under_odds"] if tot_o > 1.01 and tot_u > 1.01: raw = _imp(tot_o) + _imp(tot_u) row["implied_total_over"] = _imp(tot_o) / raw row["implied_total_under"] = _imp(tot_u) / raw sp_h = row["spread_home_odds"] sp_a = row["spread_away_odds"] if sp_h > 1.01 and sp_a > 1.01: raw = _imp(sp_h) + _imp(sp_a) row["implied_spread_home"] = _imp(sp_h) / raw row["implied_spread_away"] = _imp(sp_a) / raw return pd.DataFrame([row]) def _ensemble_predict(self, market: str, df: "Any") -> List[float]: """Return [p_class0, p_class1] from XGB+LGB ensemble.""" models = self.models.get(market, {}) xgb_model = models.get("xgb") lgb_model = models.get("lgb") probs_list: List[List[float]] = [] if xgb_model is not None: try: p = xgb_model.predict_proba(df) probs_list.append([float(p[0][0]), float(p[0][1])]) except Exception as e: print(f"⚠️ [Basketball] XGB {market} inference failed: {e}") if lgb_model is not None: try: p_raw = lgb_model.predict(df) p1 = float(p_raw[0]) if len(p_raw.shape) == 1 else float(p_raw[0][1]) probs_list.append([1.0 - p1, p1]) except Exception as e: print(f"⚠️ [Basketball] LGB {market} inference failed: {e}") if not probs_list: return [0.5, 0.5] p0 = sum(p[0] for p in probs_list) / len(probs_list) p1 = sum(p[1] for p in probs_list) / len(probs_list) total = p0 + p1 or 1.0 return [p0 / total, p1 / total] # ── Public API ───────────────────────────────────────────────────────── def predict( self, match_id: str, home_team_id: str, away_team_id: str, home_team_name: str = "", away_team_name: str = "", league_id: str = "", league_name: str = "", odds_data: Optional[Dict[str, Any]] = None, sidelined_data: Optional[Dict[str, Any]] = None, **kwargs: Any, ) -> BasketballMatchPrediction: odds = odds_data or {} prediction = BasketballMatchPrediction( match_id=match_id, home_team_name=home_team_name, away_team_name=away_team_name, league_name=league_name, ) # Sidelined impact home_sl = int((sidelined_data or {}).get("homeTeam", {}).get("totalSidelined", 0) or 0) away_sl = int((sidelined_data or {}).get("awayTeam", {}).get("totalSidelined", 0) or 0) kwargs.setdefault("home_sidelined_count", float(home_sl)) kwargs.setdefault("away_sidelined_count", float(away_sl)) kwargs.setdefault("sidelined_diff", float(home_sl - away_sl)) kwargs.setdefault("missing_players_impact", float(home_sl + away_sl) / 10.0) if not self.models: print("⚠️ [Basketball] No models loaded — returning neutral defaults") return prediction try: df = self._build_feature_row(odds, **kwargs) # ── ML ── ml_probs = self._ensemble_predict("ml", df) prediction.ml_home_prob = ml_probs[0] prediction.ml_away_prob = ml_probs[1] prediction.ml_pick = "1" if ml_probs[0] >= ml_probs[1] else "2" prediction.ml_confidence = max(ml_probs) * 100.0 # ── Total ── prediction.total_line = float(odds.get("tot_line") or 0.0) tot_probs = self._ensemble_predict("total", df) prediction.total_over_prob = tot_probs[1] prediction.total_under_prob = tot_probs[0] total_line = prediction.total_line prediction.total_pick = ( f"Over {total_line}" if tot_probs[1] >= tot_probs[0] else f"Under {total_line}" ) prediction.total_confidence = max(tot_probs) * 100.0 # ── Spread ── prediction.spread_home_line = float(odds.get("spread_home_line") or 0.0) sp_probs = self._ensemble_predict("spread", df) prediction.spread_home_prob = sp_probs[0] prediction.spread_away_prob = sp_probs[1] home_line = prediction.spread_home_line away_line = -home_line prediction.spread_pick = ( f"Home {home_line:+.1f}" if sp_probs[0] >= sp_probs[1] else f"Away {away_line:+.1f}" ) prediction.spread_confidence = max(sp_probs) * 100.0 # Market board summary prediction.market_board = { "ML": { "1": f"{prediction.ml_home_prob * 100:.0f}%", "2": f"{prediction.ml_away_prob * 100:.0f}%", }, "Totals": { f"Over {total_line}": f"{prediction.total_over_prob * 100:.0f}%", f"Under {total_line}": f"{prediction.total_under_prob * 100:.0f}%", }, "Spread": { f"Home {home_line:+.1f}": f"{prediction.spread_home_prob * 100:.0f}%", f"Away {away_line:+.1f}": f"{prediction.spread_away_prob * 100:.0f}%", }, } # Risk top_conf = max(prediction.ml_confidence, prediction.total_confidence, prediction.spread_confidence) prediction.risk_level = "LOW" if top_conf >= 65 else "MEDIUM" if top_conf >= 55 else "HIGH" prediction.analysis_details = { "model_version": "basketball_v25", "markets_predicted": list(self.models.keys()), "ensemble_size": {m: sum(1 for k in v.values() if v[k] is not None) for m, v in self.models.items()}, } except Exception as e: print(f"⚠️ [Basketball] Prediction failed for {match_id}: {e}") return prediction # ── Singleton factory ────────────────────────────────────────────────────────── _predictor: Optional[BasketballV25Predictor] = None def get_basketball_v25_predictor() -> BasketballV25Predictor: """Return the singleton BasketballV25Predictor (lazy-loaded).""" global _predictor if _predictor is None: _predictor = BasketballV25Predictor() return _predictor __all__ = [ "BasketballMatchPrediction", "BasketballV25Predictor", "get_basketball_v25_predictor", ]