""" Sidelined Analyzer — Injury & Suspension Impact Calculator ========================================================== Parses sidelined JSON from live_matches and calculates position-weighted missing player impact using ACTUAL player statistics from the database (goals, assists, starting frequency). Senior ML Engineer Principle: No magic numbers — all weights from config. Data Quality: Cross-reference sidelined IDs with DB for real impact. """ from dataclasses import dataclass, field from typing import Dict, List, Optional, Any, Tuple import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) try: import psycopg2 from psycopg2.extras import RealDictCursor except ImportError: psycopg2 = None from config.config_loader import get_config @dataclass class PlayerImpactDetail: """Impact detail for a single sidelined player.""" player_id: str player_name: str position: str impact_score: float db_goals: int = 0 db_assists: int = 0 db_starts: int = 0 db_rating: float = 0.0 # Calculated from DB stats is_key_player: bool = False adaptation_applied: bool = False @dataclass class SidelinedImpact: """Impact analysis of sidelined players for one team.""" total_sidelined: int = 0 impact_score: float = 0.0 # 0.0 - 1.0 (normalized) key_position_missing: bool = False # GK or 2+ same position missing key_players_missing: int = 0 # How many key players are missing position_breakdown: Dict[str, int] = field(default_factory=dict) player_details: List[PlayerImpactDetail] = field(default_factory=list) details: List[str] = field(default_factory=list) class SidelinedAnalyzer: """ Analyzes sidelined player data with DB-backed statistics. Impact formula per player: player_impact = position_weight × db_rating_factor × adaptation_factor Where: - position_weight: from config (GK most critical) - db_rating_factor: calculated from actual goals + assists + starts (not mackolik average!) - adaptation_factor: 1.0 if recent injury, discounted if team adapted (many matches missed) DB Query: Cross-references sidelined player IDs with match_player_events to get real goals/assists from recent matches. """ def __init__(self): self.config = get_config() self.conn = None self._load_config() self._connect_db() def _load_config(self): """Load all config values once at init.""" cfg = self.config self.position_weights = cfg.get("sidelined.position_weights", { "K": 0.35, "D": 0.20, "O": 0.25, "F": 0.30 }) self.max_rating = cfg.get("sidelined.max_rating", 10) self.adaptation_threshold = cfg.get("sidelined.adaptation_threshold", 10) self.adaptation_discount = cfg.get("sidelined.adaptation_discount", 0.5) self.goalkeeper_penalty = cfg.get("sidelined.goalkeeper_penalty", 0.15) self.confidence_boost = cfg.get("sidelined.confidence_boost", 10) self.max_impact = cfg.get("sidelined.max_impact", 0.85) self.key_player_threshold = cfg.get("sidelined.key_player_threshold", 3) self.recent_matches_lookback = cfg.get("sidelined.recent_matches_lookback", 15) @staticmethod def _safe_int(value: Any, default: int = 0) -> int: try: if value is None or value == "": return default return int(float(value)) except (TypeError, ValueError): return default @staticmethod def _safe_float(value: Any, default: float = 0.0) -> float: try: if value is None or value == "": return default return float(value) except (TypeError, ValueError): return default def _connect_db(self): """Lazy DB connection following existing engine patterns.""" if psycopg2 is None: return try: from data.db import get_clean_dsn self.conn = psycopg2.connect(get_clean_dsn()) except Exception as e: print(f"[SidelinedAnalyzer] DB connection failed: {e}") self.conn = None def _get_conn(self): """Get or reconnect DB connection.""" if self.conn is None or self.conn.closed: self._connect_db() return self.conn def _fetch_player_stats(self, player_ids: List[str]) -> Dict[str, Dict]: """ Fetch real player statistics from DB for given player IDs. Returns dict keyed by player_id with: goals: int, assists: int, starts: int, matches: int """ conn = self._get_conn() if not conn or not player_ids: return {} stats = {} try: cur = conn.cursor(cursor_factory=RealDictCursor) # 1. Goals from match_player_events + Assists via assist_player_id cur.execute(""" SELECT sub.player_id, SUM(sub.goals) AS goals, SUM(sub.assists) AS assists FROM ( -- Goals: player scored SELECT mpe.player_id, COUNT(*) AS goals, 0 AS assists FROM match_player_events mpe JOIN matches m ON mpe.match_id = m.id WHERE mpe.player_id = ANY(%s) AND mpe.event_type = 'goal' AND m.status = 'FT' GROUP BY mpe.player_id UNION ALL -- Assists: player assisted SELECT mpe.assist_player_id AS player_id, 0 AS goals, COUNT(*) AS assists FROM match_player_events mpe JOIN matches m ON mpe.match_id = m.id WHERE mpe.assist_player_id = ANY(%s) AND mpe.event_type = 'goal' AND m.status = 'FT' GROUP BY mpe.assist_player_id ) sub GROUP BY sub.player_id """, (player_ids, player_ids)) for row in cur.fetchall(): pid = row["player_id"] stats[pid] = { "goals": row["goals"] or 0, "assists": row["assists"] or 0, "starts": 0, "matches": 0 } # 2. Starting frequency from match_player_participation cur.execute(""" SELECT mpp.player_id, COUNT(*) AS total_matches, COUNT(*) FILTER (WHERE mpp.is_starting = true) AS starts FROM match_player_participation mpp JOIN matches m ON mpp.match_id = m.id WHERE mpp.player_id = ANY(%s) AND m.status = 'FT' GROUP BY mpp.player_id """, (player_ids,)) for row in cur.fetchall(): pid = row["player_id"] if pid not in stats: stats[pid] = {"goals": 0, "assists": 0, "starts": 0, "matches": 0} stats[pid]["starts"] = row["starts"] or 0 stats[pid]["matches"] = row["total_matches"] or 0 cur.close() except Exception as e: print(f"[SidelinedAnalyzer] DB query error: {e}") try: conn.rollback() except Exception: pass return stats def _calculate_db_rating(self, db_stats: Dict, position: str) -> float: """ Calculate player rating from DB statistics. Rating is 0.0 - 1.0, where 1.0 = absolute key player. Factors: - Goals (weighted by position: Forwards value more, Defenders less) - Assists - Starting frequency (regulars > squad players) """ def _to_float(value: Any, default: float = 0.0) -> float: try: return float(value) except (TypeError, ValueError): return default goals = _to_float(db_stats.get("goals", 0)) assists = _to_float(db_stats.get("assists", 0)) starts = _to_float(db_stats.get("starts", 0)) matches = _to_float(db_stats.get("matches", 0)) # Goal contribution weight by position # Forwards: goals matter most # Midfielders: balanced # Defenders: starts matter more than goals # Goalkeeper: starts are everything goal_weight = {"F": 0.5, "O": 0.35, "D": 0.15, "K": 0.05}.get(position, 0.25) assist_weight = {"F": 0.2, "O": 0.3, "D": 0.15, "K": 0.0}.get(position, 0.15) start_weight = {"F": 0.3, "O": 0.35, "D": 0.7, "K": 0.95}.get(position, 0.5) # Normalize each component to 0-1 # Goals: 5+ goals in recent matches = max goal_factor = min(goals / 5.0, 1.0) if goals > 0 else 0.0 # Assists: 4+ assists = max assist_factor = min(assists / 4.0, 1.0) if assists > 0 else 0.0 # Starts: 80%+ start rate = max regular start_rate = starts / max(matches, 1) start_factor = min(start_rate / 0.8, 1.0) rating = (goal_factor * goal_weight + assist_factor * assist_weight + start_factor * start_weight) return round(min(rating, 1.0), 4) def analyze(self, team_data: Optional[Dict[str, Any]]) -> SidelinedImpact: """ Analyze sidelined data for a single team using DB-backed stats. Args: team_data: dict with 'players' list and 'totalSidelined' count. Returns: SidelinedImpact with calculated impact score and breakdown. """ if not team_data or not isinstance(team_data, dict): return SidelinedImpact() players = team_data.get("players", []) if not players: return SidelinedImpact( total_sidelined=team_data.get("totalSidelined", 0) ) # Collect player IDs for batch DB query player_ids = [p.get("playerId", "") for p in players if p.get("playerId")] # Batch fetch DB stats (single query, not N+1) db_stats = self._fetch_player_stats(player_ids) if player_ids else {} total_impact = 0.0 position_counts: Dict[str, int] = {} player_details: List[PlayerImpactDetail] = [] details: List[str] = [] has_gk_missing = False key_players_count = 0 for player in players: if not isinstance(player, dict): continue pos = player.get("positionShort", "O") name = player.get("playerName", "Unknown") pid = player.get("playerId", "") matches_missed = self._safe_int(player.get("matchesMissed", 0), 0) player_type = player.get("type", "other") mackolik_avg = self._safe_float(player.get("average", 0), 0.0) position_counts[pos] = position_counts.get(pos, 0) + 1 if pos == "K": has_gk_missing = True # === Rating: DB first, mackolik fallback === p_db_stats = db_stats.get(pid, {}) if p_db_stats: # Use real DB stats db_rating = self._calculate_db_rating(p_db_stats, pos) else: # Fallback to mackolik average (normalized) db_rating = min(mackolik_avg / self.max_rating, 1.0) if self.max_rating > 0 else 0.3 db_rating = max(db_rating, 0.15) # Minimum floor # Key player check is_key = db_rating >= 0.5 or ( self._safe_int(p_db_stats.get("goals", 0), 0) >= self.key_player_threshold ) if is_key: key_players_count += 1 # === Impact Calculation === pos_weight = self.position_weights.get(pos, 0.20) # Rating factor: higher rated = bigger loss rating_factor = max(db_rating, 0.15) # Even unknown players have minimum impact # Adaptation: team has coped if player missed many matches adapted = matches_missed >= self.adaptation_threshold adapt_factor = self.adaptation_discount if adapted else 1.0 # Type factor type_factor = 1.0 if player_type == "injury" else 0.8 player_impact = pos_weight * rating_factor * adapt_factor * type_factor total_impact += player_impact detail = PlayerImpactDetail( player_id=pid, player_name=name, position=pos, impact_score=round(player_impact, 4), db_goals=p_db_stats.get("goals", 0), db_assists=p_db_stats.get("assists", 0), db_starts=p_db_stats.get("starts", 0), db_rating=db_rating, is_key_player=is_key, adaptation_applied=adapted ) player_details.append(detail) db_info = f"G:{detail.db_goals} A:{detail.db_assists} S:{detail.db_starts}" if p_db_stats else "no DB data" details.append( f"{name} ({pos}, db_rating:{db_rating:.2f}, {db_info}) → impact:{player_impact:.3f}" + (" ⭐ KEY" if is_key else "") + (f" [adapted, {matches_missed} missed]" if adapted else "") ) # GK penalty bonus if has_gk_missing: total_impact += self.goalkeeper_penalty key_position_missing = has_gk_missing or any(v >= 2 for v in position_counts.values()) # Normalize to 0-1 range normalization_cap = 1.5 normalized_impact = min(total_impact / normalization_cap, self.max_impact) return SidelinedImpact( total_sidelined=len(players), impact_score=round(normalized_impact, 4), key_position_missing=key_position_missing, key_players_missing=key_players_count, position_breakdown=position_counts, player_details=player_details, details=details ) def analyze_match(self, sidelined_json: Optional[Dict[str, Any]]) -> Tuple[SidelinedImpact, SidelinedImpact]: """ Analyze sidelined data for both teams. Returns: (home_impact, away_impact) """ if not sidelined_json or not isinstance(sidelined_json, dict): return SidelinedImpact(), SidelinedImpact() home_impact = self.analyze(sidelined_json.get("homeTeam")) away_impact = self.analyze(sidelined_json.get("awayTeam")) return home_impact, away_impact # Singleton _analyzer: Optional[SidelinedAnalyzer] = None def get_sidelined_analyzer() -> SidelinedAnalyzer: global _analyzer if _analyzer is None: _analyzer = SidelinedAnalyzer() return _analyzer