first (part 2: other directories)

2026-04-16 15:11:25 +03:00
parent 7814e0bc6b
commit 2f0b85a0c7
203 changed files with 59989 additions and 0 deletions
@@ -0,0 +1,408 @@
+"""
+Sidelined Analyzer — Injury & Suspension Impact Calculator
+==========================================================
+Parses sidelined JSON from live_matches and calculates
+position-weighted missing player impact using ACTUAL player
+statistics from the database (goals, assists, starting frequency).
+
+Senior ML Engineer Principle: No magic numbers — all weights from config.
+Data Quality: Cross-reference sidelined IDs with DB for real impact.
+"""
+
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional, Any, Tuple
+
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+try:
+    import psycopg2
+    from psycopg2.extras import RealDictCursor
+except ImportError:
+    psycopg2 = None
+
+from config.config_loader import get_config
+
+
+@dataclass
+class PlayerImpactDetail:
+    """Impact detail for a single sidelined player."""
+    player_id: str
+    player_name: str
+    position: str
+    impact_score: float
+    db_goals: int = 0
+    db_assists: int = 0
+    db_starts: int = 0
+    db_rating: float = 0.0  # Calculated from DB stats
+    is_key_player: bool = False
+    adaptation_applied: bool = False
+
+
+@dataclass
+class SidelinedImpact:
+    """Impact analysis of sidelined players for one team."""
+    total_sidelined: int = 0
+    impact_score: float = 0.0         # 0.0 - 1.0 (normalized)
+    key_position_missing: bool = False # GK or 2+ same position missing
+    key_players_missing: int = 0      # How many key players are missing
+    position_breakdown: Dict[str, int] = field(default_factory=dict)
+    player_details: List[PlayerImpactDetail] = field(default_factory=list)
+    details: List[str] = field(default_factory=list)
+
+
+class SidelinedAnalyzer:
+    """
+    Analyzes sidelined player data with DB-backed statistics.
+    
+    Impact formula per player:
+        player_impact = position_weight × db_rating_factor × adaptation_factor
+    
+    Where:
+        - position_weight: from config (GK most critical)
+        - db_rating_factor: calculated from actual goals + assists + starts (not mackolik average!)
+        - adaptation_factor: 1.0 if recent injury, discounted if team adapted (many matches missed)
+    
+    DB Query: Cross-references sidelined player IDs with match_player_events 
+    to get real goals/assists from recent matches.
+    """
+
+    def __init__(self):
+        self.config = get_config()
+        self.conn = None
+        self._load_config()
+        self._connect_db()
+
+    def _load_config(self):
+        """Load all config values once at init."""
+        cfg = self.config
+        self.position_weights = cfg.get("sidelined.position_weights", {
+            "K": 0.35, "D": 0.20, "O": 0.25, "F": 0.30
+        })
+        self.max_rating = cfg.get("sidelined.max_rating", 10)
+        self.adaptation_threshold = cfg.get("sidelined.adaptation_threshold", 10)
+        self.adaptation_discount = cfg.get("sidelined.adaptation_discount", 0.5)
+        self.goalkeeper_penalty = cfg.get("sidelined.goalkeeper_penalty", 0.15)
+        self.confidence_boost = cfg.get("sidelined.confidence_boost", 10)
+        self.max_impact = cfg.get("sidelined.max_impact", 0.85)
+        self.key_player_threshold = cfg.get("sidelined.key_player_threshold", 3)
+        self.recent_matches_lookback = cfg.get("sidelined.recent_matches_lookback", 15)
+
+    @staticmethod
+    def _safe_int(value: Any, default: int = 0) -> int:
+        try:
+            if value is None or value == "":
+                return default
+            return int(float(value))
+        except (TypeError, ValueError):
+            return default
+
+    @staticmethod
+    def _safe_float(value: Any, default: float = 0.0) -> float:
+        try:
+            if value is None or value == "":
+                return default
+            return float(value)
+        except (TypeError, ValueError):
+            return default
+
+    def _connect_db(self):
+        """Lazy DB connection following existing engine patterns."""
+        if psycopg2 is None:
+            return
+        try:
+            from data.db import get_clean_dsn
+            self.conn = psycopg2.connect(get_clean_dsn())
+        except Exception as e:
+            print(f"[SidelinedAnalyzer] DB connection failed: {e}")
+            self.conn = None
+
+    def _get_conn(self):
+        """Get or reconnect DB connection."""
+        if self.conn is None or self.conn.closed:
+            self._connect_db()
+        return self.conn
+
+    def _fetch_player_stats(self, player_ids: List[str]) -> Dict[str, Dict]:
+        """
+        Fetch real player statistics from DB for given player IDs.
+        
+        Returns dict keyed by player_id with:
+            goals: int, assists: int, starts: int, matches: int
+        """
+        conn = self._get_conn()
+        if not conn or not player_ids:
+            return {}
+
+        stats = {}
+        try:
+            cur = conn.cursor(cursor_factory=RealDictCursor)
+
+            # 1. Goals from match_player_events + Assists via assist_player_id
+            cur.execute("""
+                SELECT 
+                    sub.player_id,
+                    SUM(sub.goals) AS goals,
+                    SUM(sub.assists) AS assists
+                FROM (
+                    -- Goals: player scored
+                    SELECT mpe.player_id, 
+                           COUNT(*) AS goals, 
+                           0 AS assists
+                    FROM match_player_events mpe
+                    JOIN matches m ON mpe.match_id = m.id
+                    WHERE mpe.player_id = ANY(%s)
+                      AND mpe.event_type = 'goal'
+                      AND m.status = 'FT'
+                    GROUP BY mpe.player_id
+                    
+                    UNION ALL
+                    
+                    -- Assists: player assisted
+                    SELECT mpe.assist_player_id AS player_id,
+                           0 AS goals,
+                           COUNT(*) AS assists
+                    FROM match_player_events mpe
+                    JOIN matches m ON mpe.match_id = m.id
+                    WHERE mpe.assist_player_id = ANY(%s)
+                      AND mpe.event_type = 'goal'
+                      AND m.status = 'FT'
+                    GROUP BY mpe.assist_player_id
+                ) sub
+                GROUP BY sub.player_id
+            """, (player_ids, player_ids))
+
+            for row in cur.fetchall():
+                pid = row["player_id"]
+                stats[pid] = {
+                    "goals": row["goals"] or 0,
+                    "assists": row["assists"] or 0,
+                    "starts": 0,
+                    "matches": 0
+                }
+
+            # 2. Starting frequency from match_player_participation
+            cur.execute("""
+                SELECT 
+                    mpp.player_id,
+                    COUNT(*) AS total_matches,
+                    COUNT(*) FILTER (WHERE mpp.is_starting = true) AS starts
+                FROM match_player_participation mpp
+                JOIN matches m ON mpp.match_id = m.id
+                WHERE mpp.player_id = ANY(%s)
+                  AND m.status = 'FT'
+                GROUP BY mpp.player_id
+            """, (player_ids,))
+
+            for row in cur.fetchall():
+                pid = row["player_id"]
+                if pid not in stats:
+                    stats[pid] = {"goals": 0, "assists": 0, "starts": 0, "matches": 0}
+                stats[pid]["starts"] = row["starts"] or 0
+                stats[pid]["matches"] = row["total_matches"] or 0
+
+            cur.close()
+        except Exception as e:
+            print(f"[SidelinedAnalyzer] DB query error: {e}")
+            try:
+                conn.rollback()
+            except Exception:
+                pass
+
+        return stats
+
+    def _calculate_db_rating(self, db_stats: Dict, position: str) -> float:
+        """
+        Calculate player rating from DB statistics.
+        
+        Rating is 0.0 - 1.0, where 1.0 = absolute key player.
+        
+        Factors:
+            - Goals (weighted by position: Forwards value more, Defenders less)
+            - Assists 
+            - Starting frequency (regulars > squad players)
+        """
+        def _to_float(value: Any, default: float = 0.0) -> float:
+            try:
+                return float(value)
+            except (TypeError, ValueError):
+                return default
+
+        goals = _to_float(db_stats.get("goals", 0))
+        assists = _to_float(db_stats.get("assists", 0))
+        starts = _to_float(db_stats.get("starts", 0))
+        matches = _to_float(db_stats.get("matches", 0))
+
+        # Goal contribution weight by position 
+        # Forwards: goals matter most
+        # Midfielders: balanced
+        # Defenders: starts matter more than goals
+        # Goalkeeper: starts are everything
+        goal_weight = {"F": 0.5, "O": 0.35, "D": 0.15, "K": 0.05}.get(position, 0.25)
+        assist_weight = {"F": 0.2, "O": 0.3, "D": 0.15, "K": 0.0}.get(position, 0.15)
+        start_weight = {"F": 0.3, "O": 0.35, "D": 0.7, "K": 0.95}.get(position, 0.5)
+
+        # Normalize each component to 0-1
+        # Goals: 5+ goals in recent matches = max
+        goal_factor = min(goals / 5.0, 1.0) if goals > 0 else 0.0
+        # Assists: 4+ assists = max
+        assist_factor = min(assists / 4.0, 1.0) if assists > 0 else 0.0
+        # Starts: 80%+ start rate = max regular
+        start_rate = starts / max(matches, 1)
+        start_factor = min(start_rate / 0.8, 1.0)
+
+        rating = (goal_factor * goal_weight + 
+                  assist_factor * assist_weight + 
+                  start_factor * start_weight)
+
+        return round(min(rating, 1.0), 4)
+
+    def analyze(self, team_data: Optional[Dict[str, Any]]) -> SidelinedImpact:
+        """
+        Analyze sidelined data for a single team using DB-backed stats.
+        
+        Args:
+            team_data: dict with 'players' list and 'totalSidelined' count.
+        
+        Returns:
+            SidelinedImpact with calculated impact score and breakdown.
+        """
+        if not team_data or not isinstance(team_data, dict):
+            return SidelinedImpact()
+
+        players = team_data.get("players", [])
+        if not players:
+            return SidelinedImpact(
+                total_sidelined=team_data.get("totalSidelined", 0)
+            )
+
+        # Collect player IDs for batch DB query
+        player_ids = [p.get("playerId", "") for p in players if p.get("playerId")]
+
+        # Batch fetch DB stats (single query, not N+1)
+        db_stats = self._fetch_player_stats(player_ids) if player_ids else {}
+
+        total_impact = 0.0
+        position_counts: Dict[str, int] = {}
+        player_details: List[PlayerImpactDetail] = []
+        details: List[str] = []
+        has_gk_missing = False
+        key_players_count = 0
+
+        for player in players:
+            if not isinstance(player, dict):
+                continue
+
+            pos = player.get("positionShort", "O")
+            name = player.get("playerName", "Unknown")
+            pid = player.get("playerId", "")
+            matches_missed = self._safe_int(player.get("matchesMissed", 0), 0)
+            player_type = player.get("type", "other")
+            mackolik_avg = self._safe_float(player.get("average", 0), 0.0)
+
+            position_counts[pos] = position_counts.get(pos, 0) + 1
+
+            if pos == "K":
+                has_gk_missing = True
+
+            # === Rating: DB first, mackolik fallback ===
+            p_db_stats = db_stats.get(pid, {})
+            
+            if p_db_stats:
+                # Use real DB stats
+                db_rating = self._calculate_db_rating(p_db_stats, pos)
+            else:
+                # Fallback to mackolik average (normalized)
+                db_rating = min(mackolik_avg / self.max_rating, 1.0) if self.max_rating > 0 else 0.3
+                db_rating = max(db_rating, 0.15)  # Minimum floor
+
+            # Key player check
+            is_key = db_rating >= 0.5 or (
+                self._safe_int(p_db_stats.get("goals", 0), 0) >= self.key_player_threshold
+            )
+            if is_key:
+                key_players_count += 1
+
+            # === Impact Calculation ===
+            pos_weight = self.position_weights.get(pos, 0.20)
+
+            # Rating factor: higher rated = bigger loss
+            rating_factor = max(db_rating, 0.15)  # Even unknown players have minimum impact
+
+            # Adaptation: team has coped if player missed many matches
+            adapted = matches_missed >= self.adaptation_threshold
+            adapt_factor = self.adaptation_discount if adapted else 1.0
+
+            # Type factor
+            type_factor = 1.0 if player_type == "injury" else 0.8
+
+            player_impact = pos_weight * rating_factor * adapt_factor * type_factor
+            total_impact += player_impact
+
+            detail = PlayerImpactDetail(
+                player_id=pid,
+                player_name=name,
+                position=pos,
+                impact_score=round(player_impact, 4),
+                db_goals=p_db_stats.get("goals", 0),
+                db_assists=p_db_stats.get("assists", 0),
+                db_starts=p_db_stats.get("starts", 0),
+                db_rating=db_rating,
+                is_key_player=is_key,
+                adaptation_applied=adapted
+            )
+            player_details.append(detail)
+
+            db_info = f"G:{detail.db_goals} A:{detail.db_assists} S:{detail.db_starts}" if p_db_stats else "no DB data"
+            details.append(
+                f"{name} ({pos}, db_rating:{db_rating:.2f}, {db_info}) → impact:{player_impact:.3f}"
+                + (" ⭐ KEY" if is_key else "")
+                + (f" [adapted, {matches_missed} missed]" if adapted else "")
+            )
+
+        # GK penalty bonus
+        if has_gk_missing:
+            total_impact += self.goalkeeper_penalty
+
+        key_position_missing = has_gk_missing or any(v >= 2 for v in position_counts.values())
+
+        # Normalize to 0-1 range
+        normalization_cap = 1.5
+        normalized_impact = min(total_impact / normalization_cap, self.max_impact)
+
+        return SidelinedImpact(
+            total_sidelined=len(players),
+            impact_score=round(normalized_impact, 4),
+            key_position_missing=key_position_missing,
+            key_players_missing=key_players_count,
+            position_breakdown=position_counts,
+            player_details=player_details,
+            details=details
+        )
+
+    def analyze_match(self, sidelined_json: Optional[Dict[str, Any]]) -> Tuple[SidelinedImpact, SidelinedImpact]:
+        """
+        Analyze sidelined data for both teams.
+        
+        Returns:
+            (home_impact, away_impact)
+        """
+        if not sidelined_json or not isinstance(sidelined_json, dict):
+            return SidelinedImpact(), SidelinedImpact()
+
+        home_impact = self.analyze(sidelined_json.get("homeTeam"))
+        away_impact = self.analyze(sidelined_json.get("awayTeam"))
+        return home_impact, away_impact
+
+
+# Singleton
+_analyzer: Optional[SidelinedAnalyzer] = None
+
+
+def get_sidelined_analyzer() -> SidelinedAnalyzer:
+    global _analyzer
+    if _analyzer is None:
+        _analyzer = SidelinedAnalyzer()
+    return _analyzer