""" V25 Ensemble Predictor - NO TARGET LEAKAGE =========================================== Multi-model ensemble for match prediction using XGBoost and LightGBM. Features: - 73 engineered features (NO target leakage) - Market-specific models (MS, OU25, BTTS) - Weighted ensemble predictions - Value bet detection """ import os import json import numpy as np import pandas as pd from typing import Dict, List, Optional, Any from dataclasses import dataclass, field import xgboost as xgb import lightgbm as lgb # CatBoost is optional try: from catboost import CatBoostClassifier CATBOOST_AVAILABLE = True except ImportError: CatBoostClassifier = None CATBOOST_AVAILABLE = False # Paths MODELS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'v25') @dataclass class MarketPrediction: """Prediction for a single betting market.""" market_type: str pick: str probability: float confidence: float odds: float = 0.0 is_value_bet: bool = False edge: float = 0.0 def to_dict(self) -> dict: return { 'market_type': self.market_type, 'pick': self.pick, 'probability': round(self.probability * 100, 1), 'confidence': round(self.confidence, 1), 'odds': self.odds, 'is_value_bet': self.is_value_bet, 'edge': round(self.edge * 100, 1), } @dataclass class ValueBet: """Detected value bet opportunity.""" market_type: str pick: str probability: float odds: float edge: float confidence: float def to_dict(self) -> dict: return { 'market_type': self.market_type, 'pick': self.pick, 'probability': round(self.probability * 100, 1), 'odds': self.odds, 'edge': round(self.edge * 100, 1), 'confidence': round(self.confidence, 1), } @dataclass class MatchPrediction: """Complete match prediction with all markets.""" match_id: str home_team: str away_team: str # MS predictions home_prob: float = 0.0 draw_prob: float = 0.0 away_prob: float = 0.0 ms_pick: str = '' ms_confidence: float = 0.0 # OU25 predictions over_prob: float = 0.0 under_prob: float = 0.0 ou25_pick: str = '' ou25_confidence: float = 0.0 # BTTS predictions btts_yes_prob: float = 0.0 btts_no_prob: float = 0.0 btts_pick: str = '' btts_confidence: float = 0.0 # Value bets value_bets: List[ValueBet] = field(default_factory=list) def to_dict(self) -> dict: return { 'match_id': self.match_id, 'home_team': self.home_team, 'away_team': self.away_team, 'ms': { 'home_prob': round(self.home_prob * 100, 1), 'draw_prob': round(self.draw_prob * 100, 1), 'away_prob': round(self.away_prob * 100, 1), 'pick': self.ms_pick, 'confidence': round(self.ms_confidence, 1), }, 'ou25': { 'over_prob': round(self.over_prob * 100, 1), 'under_prob': round(self.under_prob * 100, 1), 'pick': self.ou25_pick, 'confidence': round(self.ou25_confidence, 1), }, 'btts': { 'yes_prob': round(self.btts_yes_prob * 100, 1), 'no_prob': round(self.btts_no_prob * 100, 1), 'pick': self.btts_pick, 'confidence': round(self.btts_confidence, 1), }, 'value_bets': [vb.to_dict() for vb in self.value_bets], } class V25Predictor: """ V25 Ensemble Predictor - NO TARGET LEAKAGE Uses market-specific XGBoost and LightGBM models. Each market (MS, OU25, BTTS) has its own trained models. """ # Feature columns — loaded dynamically from feature_cols.json to stay # in sync with the trained models. The hardcoded list below is only a # fallback in case the JSON file is missing. _FALLBACK_FEATURE_COLS = [ # ELO Features (8) 'home_overall_elo', 'away_overall_elo', 'elo_diff', 'home_home_elo', 'away_away_elo', 'home_form_elo', 'away_form_elo', 'form_elo_diff', # Form Features (12) 'home_goals_avg', 'home_conceded_avg', 'away_goals_avg', 'away_conceded_avg', 'home_clean_sheet_rate', 'away_clean_sheet_rate', 'home_scoring_rate', 'away_scoring_rate', 'home_winning_streak', 'away_winning_streak', 'home_unbeaten_streak', 'away_unbeaten_streak', # H2H Features (6) 'h2h_total_matches', 'h2h_home_win_rate', 'h2h_draw_rate', 'h2h_avg_goals', 'h2h_btts_rate', 'h2h_over25_rate', # Team Stats Features (8) 'home_avg_possession', 'away_avg_possession', 'home_avg_shots_on_target', 'away_avg_shots_on_target', 'home_shot_conversion', 'away_shot_conversion', 'home_avg_corners', 'away_avg_corners', # Odds Features (24) 'odds_ms_h', 'odds_ms_d', 'odds_ms_a', 'implied_home', 'implied_draw', 'implied_away', 'odds_ht_ms_h', 'odds_ht_ms_d', 'odds_ht_ms_a', 'odds_ou05_o', 'odds_ou05_u', 'odds_ou15_o', 'odds_ou15_u', 'odds_ou25_o', 'odds_ou25_u', 'odds_ou35_o', 'odds_ou35_u', 'odds_ht_ou05_o', 'odds_ht_ou05_u', 'odds_ht_ou15_o', 'odds_ht_ou15_u', 'odds_btts_y', 'odds_btts_n', # Odds Presence Flags (20) 'odds_ms_h_present', 'odds_ms_d_present', 'odds_ms_a_present', 'odds_ht_ms_h_present', 'odds_ht_ms_d_present', 'odds_ht_ms_a_present', 'odds_ou05_o_present', 'odds_ou05_u_present', 'odds_ou15_o_present', 'odds_ou15_u_present', 'odds_ou25_o_present', 'odds_ou25_u_present', 'odds_ou35_o_present', 'odds_ou35_u_present', 'odds_ht_ou05_o_present', 'odds_ht_ou05_u_present', 'odds_ht_ou15_o_present', 'odds_ht_ou15_u_present', 'odds_btts_y_present', 'odds_btts_n_present', # League Features (4) 'home_xga', 'away_xga', 'league_avg_goals', 'league_zero_goal_rate', # Upset Engine (4) 'upset_atmosphere', 'upset_motivation', 'upset_fatigue', 'upset_potential', # Referee Engine (5) 'referee_home_bias', 'referee_avg_goals', 'referee_cards_total', 'referee_avg_yellow', 'referee_experience', # Momentum Engine (3) 'home_momentum_score', 'away_momentum_score', 'momentum_diff', # Squad Features (9) 'home_squad_quality', 'away_squad_quality', 'squad_diff', 'home_key_players', 'away_key_players', 'home_missing_impact', 'away_missing_impact', 'home_goals_form', 'away_goals_form', ] @staticmethod def _load_feature_cols() -> list: """Load feature columns from feature_cols.json, falling back to hardcoded list.""" feature_json = os.path.join(MODELS_DIR, 'feature_cols.json') try: if os.path.exists(feature_json): with open(feature_json, 'r', encoding='utf-8') as f: cols = json.load(f) if isinstance(cols, list) and len(cols) > 0: print(f"[V25] Loaded {len(cols)} feature columns from feature_cols.json") return cols except Exception as e: print(f"[V25] Warning: could not load feature_cols.json: {e}") print(f"[V25] Using fallback feature columns ({len(V25Predictor._FALLBACK_FEATURE_COLS)} features)") return V25Predictor._FALLBACK_FEATURE_COLS # Model weights for ensemble DEFAULT_WEIGHTS = { 'xgb': 0.50, 'lgb': 0.50, } def __init__(self, models_dir: Optional[str] = None): """ Initialize V25 Predictor. Args: models_dir: Directory containing model files. Defaults to v25/ directory. """ self.models_dir = models_dir or MODELS_DIR self.models = {} # market -> {'xgb': model, 'lgb': model} self._loaded = False self.FEATURE_COLS = self._load_feature_cols() # All trained market models available in V25 ALL_MARKETS = [ 'ms', 'ou25', 'btts', # Core markets 'ou15', 'ou35', # Additional OU lines 'ht_result', 'ht_ou05', 'ht_ou15', # HT markets 'htft', # HT/FT combo 'cards_ou45', # Cards market 'handicap_ms', # Handicap 'odd_even', # Odd/Even goals ] # Multi-class markets (output > 2 classes) MULTICLASS_MARKETS = {'ms', 'ht_result', 'htft', 'handicap_ms'} def load_models(self) -> bool: """Load all market-specific models from disk.""" try: loaded_count = 0 for market in self.ALL_MARKETS: self.models[market] = {} # Load XGBoost (read content in Python to avoid non-ASCII path issues) xgb_path = os.path.join(self.models_dir, f'xgb_v25_{market}.json') if os.path.exists(xgb_path) and os.path.getsize(xgb_path) > 0: with open(xgb_path, 'r', encoding='utf-8') as f: xgb_content = f.read() booster = xgb.Booster() booster.load_model(bytearray(xgb_content, 'utf-8')) self.models[market]['xgb'] = booster loaded_count += 1 # Load LightGBM (read content in Python to avoid non-ASCII path issues) lgb_path = os.path.join(self.models_dir, f'lgb_v25_{market}.txt') if os.path.exists(lgb_path) and os.path.getsize(lgb_path) > 0: with open(lgb_path, 'r', encoding='utf-8') as f: model_str = f.read() self.models[market]['lgb'] = lgb.Booster(model_str=model_str) loaded_count += 1 # Remove empty entries if not self.models[market]: del self.models[market] print(f"[V25] Loaded {loaded_count} model files across {len(self.models)} markets: {list(self.models.keys())}") self._loaded = loaded_count > 0 return self._loaded except Exception as e: print(f"[ERROR] Error loading models: {e}") import traceback traceback.print_exc() return False def _ensure_loaded(self): """Ensure models are loaded before prediction.""" if not self._loaded: if not self.load_models(): raise RuntimeError("Failed to load V25 models") def _prepare_features(self, features: Dict[str, float]) -> pd.DataFrame: """Prepare feature vector for prediction.""" X = pd.DataFrame([{col: features.get(col, 0.0) for col in self.FEATURE_COLS}]) return X def predict_ms(self, features: Dict[str, float]) -> tuple: """ Predict match result (1X2). Returns: (home_prob, draw_prob, away_prob) """ self._ensure_loaded() X = self._prepare_features(features) probs = [] # XGBoost if 'xgb' in self.models.get('ms', {}): dmat = xgb.DMatrix(X) xgb_proba = self.models['ms']['xgb'].predict(dmat) if len(xgb_proba.shape) == 1: xgb_proba = np.array([xgb_proba]) probs.append(xgb_proba[0] * self.DEFAULT_WEIGHTS['xgb']) # LightGBM if 'lgb' in self.models.get('ms', {}): lgb_proba = self.models['ms']['lgb'].predict(X) if len(lgb_proba.shape) == 2: probs.append(lgb_proba[0] * self.DEFAULT_WEIGHTS['lgb']) if not probs: return 0.33, 0.33, 0.33 ensemble_proba = np.sum(probs, axis=0) ensemble_proba = ensemble_proba / ensemble_proba.sum() return float(ensemble_proba[0]), float(ensemble_proba[1]), float(ensemble_proba[2]) def predict_ou25(self, features: Dict[str, float]) -> tuple: """ Predict Over/Under 2.5 goals. Returns: (over_prob, under_prob) """ self._ensure_loaded() X = self._prepare_features(features) probs = [] # XGBoost if 'xgb' in self.models.get('ou25', {}): dmat = xgb.DMatrix(X) xgb_proba = self.models['ou25']['xgb'].predict(dmat) if isinstance(xgb_proba, np.ndarray) and len(xgb_proba.shape) == 1: probs.append(xgb_proba[0]) # LightGBM if 'lgb' in self.models.get('ou25', {}): lgb_proba = self.models['ou25']['lgb'].predict(X) if isinstance(lgb_proba, np.ndarray): probs.append(lgb_proba[0]) if not probs: return 0.5, 0.5 # Average probability avg_prob = np.mean(probs) return float(avg_prob), float(1 - avg_prob) def predict_btts(self, features: Dict[str, float]) -> tuple: """ Predict Both Teams To Score. Returns: (yes_prob, no_prob) """ self._ensure_loaded() X = self._prepare_features(features) probs = [] # XGBoost if 'xgb' in self.models.get('btts', {}): dmat = xgb.DMatrix(X) xgb_proba = self.models['btts']['xgb'].predict(dmat) if isinstance(xgb_proba, np.ndarray) and len(xgb_proba.shape) == 1: probs.append(xgb_proba[0]) # LightGBM if 'lgb' in self.models.get('btts', {}): lgb_proba = self.models['btts']['lgb'].predict(X) if isinstance(lgb_proba, np.ndarray): probs.append(lgb_proba[0]) if not probs: return 0.5, 0.5 # Average probability avg_prob = np.mean(probs) return float(avg_prob), float(1 - avg_prob) def predict_market(self, market: str, features: Dict[str, float]) -> Optional[np.ndarray]: """ Generic prediction for any loaded market. Args: market: Market key (e.g. 'ht_result', 'htft', 'cards_ou45') features: Feature dictionary. Returns: numpy array of probabilities. For binary markets: [positive_prob] For multi-class markets: [class0_prob, class1_prob, ...] """ self._ensure_loaded() if market not in self.models: return None X = self._prepare_features(features) probs = [] weights = [] is_multiclass = market in self.MULTICLASS_MARKETS # XGBoost if 'xgb' in self.models[market]: dmat = xgb.DMatrix(X) xgb_proba = self.models[market]['xgb'].predict(dmat) if isinstance(xgb_proba, np.ndarray): if is_multiclass and len(xgb_proba.shape) == 2: probs.append(xgb_proba[0]) elif is_multiclass and len(xgb_proba.shape) == 1: probs.append(xgb_proba) else: probs.append(np.array([xgb_proba[0]])) weights.append(self.DEFAULT_WEIGHTS['xgb']) # LightGBM if 'lgb' in self.models[market]: lgb_proba = self.models[market]['lgb'].predict(X) if isinstance(lgb_proba, np.ndarray): if is_multiclass and len(lgb_proba.shape) == 2: probs.append(lgb_proba[0]) elif is_multiclass and len(lgb_proba.shape) == 1: probs.append(lgb_proba) else: probs.append(np.array([lgb_proba[0]])) weights.append(self.DEFAULT_WEIGHTS['lgb']) if not probs: return None # Weighted average if len(probs) == 1: return probs[0] total_w = sum(weights[:len(probs)]) result = np.zeros_like(probs[0]) for p, w in zip(probs, weights): result += p * (w / total_w) # Normalize multi-class if is_multiclass and result.sum() > 0: result = result / result.sum() return result def has_market(self, market: str) -> bool: """Check if a specific market model is loaded.""" return market in self.models def predict_match( self, match_id: str, home_team: str, away_team: str, features: Dict[str, float], odds: Optional[Dict[str, float]] = None, ) -> MatchPrediction: """ Predict all markets for a match. Args: match_id: Match identifier. home_team: Home team name. away_team: Away team name. features: Feature dictionary. odds: Optional odds dictionary for value bet detection. Returns: MatchPrediction object. """ # Get predictions for each market home_prob, draw_prob, away_prob = self.predict_ms(features) over_prob, under_prob = self.predict_ou25(features) btts_yes_prob, btts_no_prob = self.predict_btts(features) # Determine picks ms_probs = {'1': home_prob, 'X': draw_prob, '2': away_prob} ms_pick = max(ms_probs, key=ms_probs.__getitem__) ms_confidence = ms_probs[ms_pick] * 100 ou25_probs = {'Over': over_prob, 'Under': under_prob} ou25_pick = max(ou25_probs, key=ou25_probs.__getitem__) ou25_confidence = ou25_probs[ou25_pick] * 100 btts_probs = {'Yes': btts_yes_prob, 'No': btts_no_prob} btts_pick = max(btts_probs, key=btts_probs.__getitem__) btts_confidence = btts_probs[btts_pick] * 100 # Create prediction prediction = MatchPrediction( match_id=match_id, home_team=home_team, away_team=away_team, home_prob=home_prob, draw_prob=draw_prob, away_prob=away_prob, ms_pick=ms_pick, ms_confidence=ms_confidence, over_prob=over_prob, under_prob=under_prob, ou25_pick=ou25_pick, ou25_confidence=ou25_confidence, btts_yes_prob=btts_yes_prob, btts_no_prob=btts_no_prob, btts_pick=btts_pick, btts_confidence=btts_confidence, ) # Detect value bets if odds: prediction.value_bets = self._detect_value_bets( prediction, odds, home_prob, draw_prob, away_prob, over_prob, under_prob, btts_yes_prob, btts_no_prob ) return prediction def _detect_value_bets( self, prediction: MatchPrediction, odds: Dict[str, float], home_prob: float, draw_prob: float, away_prob: float, over_prob: float, under_prob: float, btts_yes_prob: float, btts_no_prob: float, ) -> List[ValueBet]: """Detect value bets based on model vs market odds.""" value_bets = [] min_edge = 0.05 # 5% minimum edge # MS value bets if 'ms_h' in odds and odds['ms_h'] > 0: implied = 1 / odds['ms_h'] edge = home_prob - implied if edge > min_edge: value_bets.append(ValueBet( market_type='MS', pick='1', probability=home_prob, odds=odds['ms_h'], edge=edge, confidence=home_prob * 100, )) if 'ms_d' in odds and odds['ms_d'] > 0: implied = 1 / odds['ms_d'] edge = draw_prob - implied if edge > min_edge: value_bets.append(ValueBet( market_type='MS', pick='X', probability=draw_prob, odds=odds['ms_d'], edge=edge, confidence=draw_prob * 100, )) if 'ms_a' in odds and odds['ms_a'] > 0: implied = 1 / odds['ms_a'] edge = away_prob - implied if edge > min_edge: value_bets.append(ValueBet( market_type='MS', pick='2', probability=away_prob, odds=odds['ms_a'], edge=edge, confidence=away_prob * 100, )) # OU25 value bets if 'ou25_o' in odds and odds['ou25_o'] > 0: implied = 1 / odds['ou25_o'] edge = over_prob - implied if edge > min_edge: value_bets.append(ValueBet( market_type='OU25', pick='Over', probability=over_prob, odds=odds['ou25_o'], edge=edge, confidence=over_prob * 100, )) if 'ou25_u' in odds and odds['ou25_u'] > 0: implied = 1 / odds['ou25_u'] edge = under_prob - implied if edge > min_edge: value_bets.append(ValueBet( market_type='OU25', pick='Under', probability=under_prob, odds=odds['ou25_u'], edge=edge, confidence=under_prob * 100, )) # BTTS value bets if 'btts_y' in odds and odds['btts_y'] > 0: implied = 1 / odds['btts_y'] edge = btts_yes_prob - implied if edge > min_edge: value_bets.append(ValueBet( market_type='BTTS', pick='Yes', probability=btts_yes_prob, odds=odds['btts_y'], edge=edge, confidence=btts_yes_prob * 100, )) if 'btts_n' in odds and odds['btts_n'] > 0: implied = 1 / odds['btts_n'] edge = btts_no_prob - implied if edge > min_edge: value_bets.append(ValueBet( market_type='BTTS', pick='No', probability=btts_no_prob, odds=odds['btts_n'], edge=edge, confidence=btts_no_prob * 100, )) return value_bets # Singleton instance _v25_predictor: Optional[V25Predictor] = None def get_v25_predictor() -> V25Predictor: """Get or create V25 predictor instance.""" global _v25_predictor if _v25_predictor is None: _v25_predictor = V25Predictor() _v25_predictor.load_models() return _v25_predictor