""" Player Predictor Engine - V20 Ensemble Component Analyzes squad quality, key players, and missing player impact. Weight: 25% in ensemble """ import os import sys from typing import Dict, Optional, List from dataclasses import dataclass sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) from features.squad_analysis_engine import get_squad_analysis_engine from features.sidelined_analyzer import get_sidelined_analyzer @dataclass class PlayerPrediction: """Player engine prediction output. IMPORTANT: squad_quality uses the SAME composite formula as extract_training_data.py so that inference values match the distribution the model was trained on (~3-36 range). """ home_squad_quality: float = 12.0 away_squad_quality: float = 12.0 squad_diff: float = 0.0 home_key_players: int = 0 away_key_players: int = 0 home_missing_impact: float = 0.0 away_missing_impact: float = 0.0 home_goals_form: int = 0 away_goals_form: int = 0 home_lineup_goals_per90: float = 0.0 away_lineup_goals_per90: float = 0.0 home_lineup_assists_per90: float = 0.0 away_lineup_assists_per90: float = 0.0 home_squad_continuity: float = 0.5 away_squad_continuity: float = 0.5 home_top_scorer_form: int = 0 away_top_scorer_form: int = 0 home_avg_player_exp: float = 0.0 away_avg_player_exp: float = 0.0 home_goals_diversity: float = 0.0 away_goals_diversity: float = 0.0 lineup_available: bool = False confidence: float = 0.0 class PlayerPredictorEngine: """ Player/Squad-based prediction engine. Analyzes: - Starting 11 quality - Key player availability (top scorers) - Missing player impact - Recent goalscoring form per player """ def __init__(self): self.squad_engine = get_squad_analysis_engine() self.sidelined_analyzer = get_sidelined_analyzer() print("✅ PlayerPredictorEngine initialized") def predict(self, match_id: str, home_team_id: str, away_team_id: str, home_lineup: Optional[List[str]] = None, away_lineup: Optional[List[str]] = None, sidelined_data: Optional[Dict] = None) -> PlayerPrediction: """ Generate player-based prediction. Args: match_id: Match ID for lineup lookup home_team_id: Home team ID away_team_id: Away team ID home_lineup: Optional list of home player IDs away_lineup: Optional list of away player IDs Returns: PlayerPrediction with squad analysis """ # Get squad features home_analysis = None away_analysis = None if home_lineup and away_lineup: home_analysis = self.squad_engine.analyze_squad_from_list( home_lineup, home_team_id ) away_analysis = self.squad_engine.analyze_squad_from_list( away_lineup, away_team_id ) lineup_available = True features = { "home_starting_11": home_analysis.starting_count or 11, "home_goals_last_5": home_analysis.total_goals_last_5, "home_assists_last_5": home_analysis.total_assists_last_5, "home_key_players": home_analysis.key_players_count, "home_forwards": home_analysis.forward_count or 2, "away_starting_11": away_analysis.starting_count or 11, "away_goals_last_5": away_analysis.total_goals_last_5, "away_assists_last_5": away_analysis.total_assists_last_5, "away_key_players": away_analysis.key_players_count, "away_forwards": away_analysis.forward_count or 2, } elif match_id: try: features = self.squad_engine.get_features( match_id, home_team_id, away_team_id ) lineup_available = ( features.get("home_starting_11", 0) >= 11 and features.get("away_starting_11", 0) >= 11 ) except Exception: features = self.squad_engine.get_features_without_match( home_team_id, away_team_id ) lineup_available = False else: features = self.squad_engine.get_features_without_match( home_team_id, away_team_id ) lineup_available = False home_goals = int(features.get("home_goals_last_5", 0)) away_goals = int(features.get("away_goals_last_5", 0)) home_key = int(features.get("home_key_players", 0)) away_key = int(features.get("away_key_players", 0)) home_starting = features.get("home_starting_11", 11) away_starting = features.get("away_starting_11", 11) home_fwd = features.get("home_forwards", 2) away_fwd = features.get("away_forwards", 2) # Squad quality — matches V25 extract_training_data.py:579 home_quality = home_starting * 0.3 + home_key * 3.0 + home_fwd * 1.5 away_quality = away_starting * 0.3 + away_key * 3.0 + away_fwd * 1.5 squad_diff = home_quality - away_quality # Missing player impact if sidelined_data: home_impact, away_impact = self.sidelined_analyzer.analyze_match(sidelined_data) home_missing = min(1.0, max(0.0, home_impact.impact_score)) away_missing = min(1.0, max(0.0, away_impact.impact_score)) sidelined_available = True else: expected_xi = 11 actual_home_xi = features.get("home_starting_11", 11) actual_away_xi = features.get("away_starting_11", 11) home_missing = (expected_xi - actual_home_xi) / expected_xi if actual_home_xi < expected_xi else 0 away_missing = (expected_xi - actual_away_xi) / expected_xi if actual_away_xi < expected_xi else 0 sidelined_available = False # Player-level features (matches extract_training_data.py:594-650) player_feats = self._compute_player_level_features( home_lineup or [], away_lineup or [], home_team_id, away_team_id, home_analysis, away_analysis, ) confidence = 70.0 if lineup_available else 35.0 if home_goals + away_goals > 10: confidence += 15 if sidelined_available: confidence += self.sidelined_analyzer.config.get("sidelined.confidence_boost", 10) if not lineup_available: confidence -= 5.0 return PlayerPrediction( home_squad_quality=home_quality, away_squad_quality=away_quality, squad_diff=squad_diff, home_key_players=home_key, away_key_players=away_key, home_missing_impact=home_missing, away_missing_impact=away_missing, home_goals_form=home_goals, away_goals_form=away_goals, home_lineup_goals_per90=player_feats['home_lineup_goals_per90'], away_lineup_goals_per90=player_feats['away_lineup_goals_per90'], home_lineup_assists_per90=player_feats['home_lineup_assists_per90'], away_lineup_assists_per90=player_feats['away_lineup_assists_per90'], home_squad_continuity=player_feats['home_squad_continuity'], away_squad_continuity=player_feats['away_squad_continuity'], home_top_scorer_form=player_feats['home_top_scorer_form'], away_top_scorer_form=player_feats['away_top_scorer_form'], home_avg_player_exp=player_feats['home_avg_player_exp'], away_avg_player_exp=player_feats['away_avg_player_exp'], home_goals_diversity=player_feats['home_goals_diversity'], away_goals_diversity=player_feats['away_goals_diversity'], lineup_available=lineup_available, confidence=max(5.0, confidence) ) def _compute_player_level_features( self, home_lineup: List[str], away_lineup: List[str], home_team_id: str, away_team_id: str, home_analysis, away_analysis, ) -> Dict[str, float]: defaults = { 'home_lineup_goals_per90': 0.0, 'away_lineup_goals_per90': 0.0, 'home_lineup_assists_per90': 0.0, 'away_lineup_assists_per90': 0.0, 'home_squad_continuity': 0.5, 'away_squad_continuity': 0.5, 'home_top_scorer_form': 0, 'away_top_scorer_form': 0, 'home_avg_player_exp': 0.0, 'away_avg_player_exp': 0.0, 'home_goals_diversity': 0.0, 'away_goals_diversity': 0.0, } conn = self.squad_engine.get_conn() if conn is None: return defaults try: from psycopg2.extras import RealDictCursor result = {} for prefix, lineup, team_id in [ ('home', home_lineup, home_team_id), ('away', away_lineup, away_team_id), ]: if not lineup: for k in ('lineup_goals_per90', 'lineup_assists_per90', 'squad_continuity', 'top_scorer_form', 'avg_player_exp', 'goals_diversity'): result[f'{prefix}_{k}'] = defaults[f'{prefix}_{k}'] continue g90, a90, total_exp = 0.0, 0.0, 0 best_scorer_total, best_scorer_id = 0, None scorers_in_lineup = 0 with conn.cursor(cursor_factory=RealDictCursor) as cur: for pid in lineup: cur.execute(""" SELECT COUNT(*) as starts, COALESCE(SUM(CASE WHEN e.event_type = 'goal' AND (e.event_subtype IS NULL OR e.event_subtype NOT ILIKE '%%penaltı kaçırma%%') THEN 1 ELSE 0 END), 0) as goals, COALESCE((SELECT COUNT(*) FROM match_player_events WHERE assist_player_id = %s), 0) as assists FROM match_player_participation mpp LEFT JOIN match_player_events e ON e.match_id = mpp.match_id AND e.player_id = mpp.player_id WHERE mpp.player_id = %s AND mpp.is_starting = true """, (pid, pid)) row = cur.fetchone() if not row or not row['starts']: continue starts = row['starts'] goals = row['goals'] or 0 assists = row['assists'] or 0 g90 += goals / starts a90 += assists / starts total_exp += starts if goals > 0: scorers_in_lineup += 1 if goals > best_scorer_total: best_scorer_total = goals best_scorer_id = pid n_st = len(lineup) or 1 # Top scorer recent form (goals in last 5 starts) top_scorer_form = 0 if best_scorer_id: cur.execute(""" SELECT COUNT(*) as goals FROM match_player_events mpe WHERE mpe.player_id = %s AND mpe.event_type = 'goal' AND mpe.match_id IN ( SELECT match_id FROM match_player_participation WHERE player_id = %s AND is_starting = true ORDER BY match_id DESC LIMIT 5 ) """, (best_scorer_id, best_scorer_id)) tsf_row = cur.fetchone() if tsf_row: top_scorer_form = tsf_row['goals'] or 0 # Squad continuity (overlap with previous match lineup) squad_continuity = 0.5 cur.execute(""" SELECT mpp.player_id FROM match_player_participation mpp JOIN matches m ON mpp.match_id = m.id WHERE mpp.team_id = %s AND mpp.is_starting = true AND m.status = 'FT' ORDER BY m.mst_utc DESC LIMIT 11 """, (team_id,)) prev_starters = {r['player_id'] for r in cur.fetchall()} if prev_starters: overlap = len(set(lineup) & prev_starters) squad_continuity = overlap / n_st result[f'{prefix}_lineup_goals_per90'] = round(g90, 3) result[f'{prefix}_lineup_assists_per90'] = round(a90, 3) result[f'{prefix}_squad_continuity'] = round(squad_continuity, 3) result[f'{prefix}_top_scorer_form'] = top_scorer_form result[f'{prefix}_avg_player_exp'] = round(total_exp / n_st, 1) result[f'{prefix}_goals_diversity'] = round(scorers_in_lineup / n_st, 3) return result except Exception as e: print(f"[PlayerPredictor] Player-level features failed: {e}") return defaults def get_1x2_modifier(self, prediction: PlayerPrediction) -> Dict[str, float]: """ Calculate 1X2 probability modifiers based on squad analysis. Returns modifiers to apply to base probabilities. squad_diff is in training scale (~-33 to +33), normalize to -1..+1. """ diff = prediction.squad_diff / 33.0 # training-scale normalisation diff = max(-1.0, min(1.0, diff)) # clamp return { "home_modifier": 1.0 + (diff * 0.3), # Up to +/-30% "away_modifier": 1.0 - (diff * 0.3), "draw_modifier": 1.0 - abs(diff) * 0.2 # Less draw if big diff } # Singleton _engine: Optional[PlayerPredictorEngine] = None def get_player_predictor() -> PlayerPredictorEngine: global _engine if _engine is None: _engine = PlayerPredictorEngine() return _engine if __name__ == "__main__": engine = get_player_predictor() print("\n🧪 Player Predictor Engine Test") print("=" * 50) pred = engine.predict( match_id="test_match", home_team_id="test_home", away_team_id="test_away" ) print(f"\n📊 Prediction:") for k, v in pred.to_dict().items(): print(f" {k}: {v}")