""" Team Predictor Engine - V20 Ensemble Component Combines ELO ratings, form stats, H2H records and team statistics. Weight: 30% in ensemble """ import os import sys from typing import Dict, Optional, Tuple, Any from dataclasses import dataclass, field # Add parent to path sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) from features.elo_system import get_elo_system from features.h2h_engine import get_h2h_engine from features.momentum_engine import get_momentum_engine, MomentumData from features.team_stats_engine import get_team_stats_engine @dataclass class TeamPrediction: """Team engine prediction output.""" home_win_prob: float = 0.33 draw_prob: float = 0.33 away_win_prob: float = 0.33 home_xg: float = 1.3 away_xg: float = 1.1 form_advantage: float = 0.0 # -1 to +1, positive = home advantage h2h_advantage: float = 0.0 # -1 to +1 elo_diff: float = 0.0 confidence: float = 0.0 def to_dict(self) -> dict: return { "home_win_prob": round(self.home_win_prob * 100, 1), "draw_prob": round(self.draw_prob * 100, 1), "away_win_prob": round(self.away_win_prob * 100, 1), "home_xg": round(self.home_xg, 2), "away_xg": round(self.away_xg, 2), "form_advantage": round(self.form_advantage, 2), "h2h_advantage": round(self.h2h_advantage, 2), "elo_diff": round(self.elo_diff, 0), "confidence": round(self.confidence, 1) } raw_features: Dict[str, Any] = field(default_factory=dict) class TeamPredictorEngine: """ Team-based prediction engine. Uses: - ELO Rating System (venue-adjusted, league-weighted) - H2H Engine (head-to-head history) - Momentum Engine (recent form) - Team Stats Engine (possession, shots, corners) """ def __init__(self): self.elo_system = get_elo_system() self.h2h_engine = get_h2h_engine() self.momentum_engine = get_momentum_engine() self.team_stats_engine = get_team_stats_engine() print("โœ… TeamPredictorEngine initialized") def predict(self, home_team_id: str, away_team_id: str, match_date_ms: int, home_team_name: str = "", away_team_name: str = "") -> TeamPrediction: """ Generate team-based prediction. Args: home_team_id: Home team ID away_team_id: Away team ID match_date_ms: Match date in milliseconds home_team_name: Home team name (for ELO) away_team_name: Away team name (for ELO) Returns: TeamPrediction with 1X2 probabilities and xG """ # 1. Get ELO predictions elo_pred = self.elo_system.predict_match(home_team_id, away_team_id) elo_features = self.elo_system.get_match_features(home_team_id, away_team_id) # 2. Get H2H features try: h2h_features = self.h2h_engine.get_features( home_team_id, away_team_id, match_date_ms ) except Exception: h2h_features = { "h2h_home_win_rate": 0.5, "h2h_away_win_rate": 0.5, "h2h_avg_goals": 2.5, "h2h_btts_rate": 0.5 } # 3. Get Momentum/Form features try: # key: form_score should be 0-1 derived from momentum_score (-1 to 1) home_mom_data = self.momentum_engine.calculate_momentum(home_team_id, match_date_ms) away_mom_data = self.momentum_engine.calculate_momentum(away_team_id, match_date_ms) home_form_score = (home_mom_data.momentum_score + 1) / 2 away_form_score = (away_mom_data.momentum_score + 1) / 2 except Exception as e: print(f"โš ๏ธ MomentumEngine error: {e}") home_mom_data = MomentumData() away_mom_data = MomentumData() home_form_score = 0.5 away_form_score = 0.5 # 4. Get Team Stats home_stats = self.team_stats_engine.get_features(home_team_id, match_date_ms) away_stats = self.team_stats_engine.get_features(away_team_id, match_date_ms) # 5. Combine predictions # ELO-based 1X2 (60% weight) elo_home = elo_pred.get("home_win_prob", 0.33) elo_draw = elo_pred.get("draw_prob", 0.33) elo_away = elo_pred.get("away_win_prob", 0.33) # Adjust based on H2H (20% weight) h2h_home_rate = h2h_features.get("h2h_home_win_rate", 0.5) h2h_away_rate = h2h_features.get("h2h_away_win_rate", 0.5) # Adjust based on form (20% weight) home_form = home_form_score away_form = away_form_score form_diff = (home_form - away_form) # -1 to +1 # Weighted combination final_home = elo_home * 0.6 + h2h_home_rate * 0.2 + (0.5 + form_diff * 0.3) * 0.2 final_away = elo_away * 0.6 + h2h_away_rate * 0.2 + (0.5 - form_diff * 0.3) * 0.2 final_draw = 1.0 - final_home - final_away # Normalize total = final_home + final_draw + final_away if total > 0: final_home /= total final_draw /= total final_away /= total # Calculate xG based on stats and form (conservative base) home_conversion = home_stats.get("shot_conversion_rate", 0.1) away_conversion = away_stats.get("shot_conversion_rate", 0.1) base_home_xg = 1.35 + (home_conversion * 3.0) base_away_xg = 1.10 + (away_conversion * 2.5) # Defense weakness factor: opponent's defensive quality affects xG # Higher shots on target against = weaker defense away_def_weakness = away_stats.get("shot_accuracy", 0.35) # opponent's shot accuracy as proxy home_def_weakness = home_stats.get("shot_accuracy", 0.35) # Adjust xG: stronger opponent defense โ†’ lower xG home_xg = base_home_xg * (1 + form_diff * 0.15) * (0.8 + away_def_weakness * 0.6) away_xg = base_away_xg * (1 - form_diff * 0.15) * (0.8 + home_def_weakness * 0.6) # Apply xG Underperformance Penalty directly to calculated xG # If a team chronically underperforms its xG, we subtract that historical difference here if hasattr(home_mom_data, 'xg_underperformance') and home_mom_data.xg_underperformance > 0.2: home_xg -= min(0.5, home_mom_data.xg_underperformance * 0.5) if hasattr(away_mom_data, 'xg_underperformance') and away_mom_data.xg_underperformance > 0.2: away_xg -= min(0.5, away_mom_data.xg_underperformance * 0.5) # H2H adjustment (more conservative) h2h_avg_goals = h2h_features.get("h2h_avg_goals", 2.5) if h2h_avg_goals > 3.0: home_xg *= 1.05 away_xg *= 1.05 elif h2h_avg_goals < 2.0: home_xg *= 0.95 away_xg *= 0.95 # Clamp xG to reasonable range home_xg = max(0.5, min(3.5, home_xg)) away_xg = max(0.3, min(3.0, away_xg)) # Calculate confidence # Higher when ELO, H2H, and Form all agree elo_winner = "H" if elo_home > max(elo_draw, elo_away) else ("A" if elo_away > elo_draw else "D") h2h_winner = "H" if h2h_home_rate > h2h_away_rate else "A" form_winner = "H" if form_diff > 0.1 else ("A" if form_diff < -0.1 else "D") agreement = sum([ elo_winner == h2h_winner, elo_winner == form_winner, h2h_winner == form_winner ]) max_prob = max(final_home, final_draw, final_away) confidence = max_prob * 100 * (0.7 + agreement * 0.1) # Collect Raw Features for XGBoost # Note: home_mom_data is an object now def get_rate(val): return val if val is not None else 0.5 raw_features = { **elo_features, # 8 features # Form Features (need key mapping to match extract_training_data.py) "home_goals_avg": 1.5 + home_mom_data.goals_trend, # Proxy "home_conceded_avg": 1.5 - home_mom_data.conceded_trend, # Proxy "away_goals_avg": 1.5 + away_mom_data.goals_trend, "away_conceded_avg": 1.5 - away_mom_data.conceded_trend, "home_clean_sheet_rate": 0.2, # Not in new MomentumData "away_clean_sheet_rate": 0.2, "home_scoring_rate": 0.8, "away_scoring_rate": 0.8, "home_winning_streak": home_mom_data.winning_streak, "away_winning_streak": away_mom_data.winning_streak, "home_unbeaten_streak": home_mom_data.unbeaten_streak, "away_unbeaten_streak": away_mom_data.unbeaten_streak, # H2H Features **h2h_features, # Team Stats "home_avg_possession": home_stats.get("avg_possession", 0.5), "away_avg_possession": away_stats.get("avg_possession", 0.5), "home_avg_shots_on_target": home_stats.get("avg_shots_on_target", 3.5), "away_avg_shots_on_target": away_stats.get("avg_shots_on_target", 3.5), "home_shot_conversion": home_stats.get("shot_conversion_rate", 0.1), "away_shot_conversion": away_stats.get("shot_conversion_rate", 0.1), "home_avg_corners": home_stats.get("avg_corners", 4.5), "away_avg_corners": away_stats.get("avg_corners", 4.5), # Derived "home_xga": 1.5 - home_mom_data.conceded_trend, # reusing as proxy "away_xga": 1.5 - away_mom_data.conceded_trend } return TeamPrediction( home_win_prob=final_home, draw_prob=final_draw, away_win_prob=final_away, home_xg=home_xg, away_xg=away_xg, form_advantage=form_diff, h2h_advantage=h2h_home_rate - h2h_away_rate, elo_diff=elo_features.get("elo_diff", 0), confidence=confidence, raw_features=raw_features ) # Singleton _engine: Optional[TeamPredictorEngine] = None def get_team_predictor() -> TeamPredictorEngine: global _engine if _engine is None: _engine = TeamPredictorEngine() return _engine if __name__ == "__main__": engine = get_team_predictor() print("\n๐Ÿงช Team Predictor Engine Test") print("=" * 50) # Test with sample IDs pred = engine.predict( home_team_id="test_home", away_team_id="test_away", match_date_ms=1707393600000 ) print(f"\n๐Ÿ“Š Prediction:") for k, v in pred.to_dict().items(): print(f" {k}: {v}")