main

2026-05-17 02:17:22 +03:00
parent 17ace9bd12
commit 94c7a4481a
53 changed files with 29602 additions and 7832 deletions
@@ -24,32 +24,29 @@ class PlayerPrediction:
    extract_training_data.py so that inference values match the
    distribution the model was trained on (~3-36 range).
    """
-    home_squad_quality: float = 12.0   # training-scale composite (~3-36)
+    home_squad_quality: float = 12.0
    away_squad_quality: float = 12.0
-    squad_diff: float = 0.0            # home - away (training scale)
+    squad_diff: float = 0.0
    home_key_players: int = 0
    away_key_players: int = 0
-    home_missing_impact: float = 0.0   # 0-1, how much weaker due to missing players
+    home_missing_impact: float = 0.0
    away_missing_impact: float = 0.0
-    home_goals_form: int = 0           # Goals in last 5 matches
+    home_goals_form: int = 0
    away_goals_form: int = 0
+    home_lineup_goals_per90: float = 0.0
+    away_lineup_goals_per90: float = 0.0
+    home_lineup_assists_per90: float = 0.0
+    away_lineup_assists_per90: float = 0.0
+    home_squad_continuity: float = 0.5
+    away_squad_continuity: float = 0.5
+    home_top_scorer_form: int = 0
+    away_top_scorer_form: int = 0
+    home_avg_player_exp: float = 0.0
+    away_avg_player_exp: float = 0.0
+    home_goals_diversity: float = 0.0
+    away_goals_diversity: float = 0.0
    lineup_available: bool = False
    confidence: float = 0.0
-    
-    def to_dict(self) -> dict:
-        return {
-            "home_squad_quality": round(self.home_squad_quality, 1),
-            "away_squad_quality": round(self.away_squad_quality, 1),
-            "squad_diff": round(self.squad_diff, 1),
-            "home_key_players": self.home_key_players,
-            "away_key_players": self.away_key_players,
-            "home_missing_impact": round(self.home_missing_impact, 2),
-            "away_missing_impact": round(self.away_missing_impact, 2),
-            "home_goals_form": self.home_goals_form,
-            "away_goals_form": self.away_goals_form,
-            "lineup_available": self.lineup_available,
-            "confidence": round(self.confidence, 1)
-        }


 class PlayerPredictorEngine:
@@ -90,8 +87,9 @@ class PlayerPredictorEngine:
        """
        
        # Get squad features
+        home_analysis = None
+        away_analysis = None
        if home_lineup and away_lineup:
-            # Use provided lineups (for live matches)
            home_analysis = self.squad_engine.analyze_squad_from_list(
                home_lineup, home_team_id
            )
@@ -99,7 +97,6 @@ class PlayerPredictorEngine:
                away_lineup, away_team_id
            )
            lineup_available = True
-            # Build features dict from analysis objects
            features = {
                "home_starting_11": home_analysis.starting_count or 11,
                "home_goals_last_5": home_analysis.total_goals_last_5,
@@ -113,7 +110,6 @@ class PlayerPredictorEngine:
                "away_forwards": away_analysis.forward_count or 2,
            }
        elif match_id:
-            # Try to get from database
            try:
                features = self.squad_engine.get_features(
                    match_id, home_team_id, away_team_id
@@ -132,58 +128,42 @@ class PlayerPredictorEngine:
                home_team_id, away_team_id
            )
            lineup_available = False
-        
-        # Extract features
+
        home_goals = int(features.get("home_goals_last_5", 0))
        away_goals = int(features.get("away_goals_last_5", 0))
        home_key = int(features.get("home_key_players", 0))
        away_key = int(features.get("away_key_players", 0))
-        home_assists = features.get("home_assists_last_5", 0)
-        away_assists = features.get("away_assists_last_5", 0)
        home_starting = features.get("home_starting_11", 11)
        away_starting = features.get("away_starting_11", 11)
        home_fwd = features.get("home_forwards", 2)
        away_fwd = features.get("away_forwards", 2)
-        
-        # Calculate squad quality — MUST match extract_training_data.py formula
-        # Formula: starting_count * 0.3 + goals * 2.0 + assists * 1.0
-        #          + key_players * 3.0 + fwd_count * 1.5
-        # Typical range: ~3 – 36 (model trained on this distribution)
-        home_quality = (
-            home_starting * 0.3 +
-            home_goals * 2.0 +
-            home_assists * 1.0 +
-            home_key * 3.0 +
-            home_fwd * 1.5
-        )
-        away_quality = (
-            away_starting * 0.3 +
-            away_goals * 2.0 +
-            away_assists * 1.0 +
-            away_key * 3.0 +
-            away_fwd * 1.5
-        )
-        
-        # Squad difference
+
+        # Squad quality — matches V25 extract_training_data.py:579
+        home_quality = home_starting * 0.3 + home_key * 3.0 + home_fwd * 1.5
+        away_quality = away_starting * 0.3 + away_key * 3.0 + away_fwd * 1.5
        squad_diff = home_quality - away_quality
-        
+
        # Missing player impact
-        # Priority: sidelined data (position-weighted) > lineup count (basic)
        if sidelined_data:
            home_impact, away_impact = self.sidelined_analyzer.analyze_match(sidelined_data)
            home_missing = min(1.0, max(0.0, home_impact.impact_score))
            away_missing = min(1.0, max(0.0, away_impact.impact_score))
            sidelined_available = True
        else:
-            # Fallback: basic lineup count method
            expected_xi = 11
            actual_home_xi = features.get("home_starting_11", 11)
            actual_away_xi = features.get("away_starting_11", 11)
            home_missing = (expected_xi - actual_home_xi) / expected_xi if actual_home_xi < expected_xi else 0
            away_missing = (expected_xi - actual_away_xi) / expected_xi if actual_away_xi < expected_xi else 0
            sidelined_available = False
-        
-        # Confidence: more data sources = higher confidence
+
+        # Player-level features (matches extract_training_data.py:594-650)
+        player_feats = self._compute_player_level_features(
+            home_lineup or [], away_lineup or [],
+            home_team_id, away_team_id,
+            home_analysis, away_analysis,
+        )
+
        confidence = 70.0 if lineup_available else 35.0
        if home_goals + away_goals > 10:
            confidence += 15
@@ -191,7 +171,7 @@ class PlayerPredictorEngine:
            confidence += self.sidelined_analyzer.config.get("sidelined.confidence_boost", 10)
        if not lineup_available:
            confidence -= 5.0
-        
+
        return PlayerPrediction(
            home_squad_quality=home_quality,
            away_squad_quality=away_quality,
@@ -202,9 +182,137 @@ class PlayerPredictorEngine:
            away_missing_impact=away_missing,
            home_goals_form=home_goals,
            away_goals_form=away_goals,
+            home_lineup_goals_per90=player_feats['home_lineup_goals_per90'],
+            away_lineup_goals_per90=player_feats['away_lineup_goals_per90'],
+            home_lineup_assists_per90=player_feats['home_lineup_assists_per90'],
+            away_lineup_assists_per90=player_feats['away_lineup_assists_per90'],
+            home_squad_continuity=player_feats['home_squad_continuity'],
+            away_squad_continuity=player_feats['away_squad_continuity'],
+            home_top_scorer_form=player_feats['home_top_scorer_form'],
+            away_top_scorer_form=player_feats['away_top_scorer_form'],
+            home_avg_player_exp=player_feats['home_avg_player_exp'],
+            away_avg_player_exp=player_feats['away_avg_player_exp'],
+            home_goals_diversity=player_feats['home_goals_diversity'],
+            away_goals_diversity=player_feats['away_goals_diversity'],
            lineup_available=lineup_available,
            confidence=max(5.0, confidence)
        )
+
+    def _compute_player_level_features(
+        self,
+        home_lineup: List[str],
+        away_lineup: List[str],
+        home_team_id: str,
+        away_team_id: str,
+        home_analysis,
+        away_analysis,
+    ) -> Dict[str, float]:
+        defaults = {
+            'home_lineup_goals_per90': 0.0, 'away_lineup_goals_per90': 0.0,
+            'home_lineup_assists_per90': 0.0, 'away_lineup_assists_per90': 0.0,
+            'home_squad_continuity': 0.5, 'away_squad_continuity': 0.5,
+            'home_top_scorer_form': 0, 'away_top_scorer_form': 0,
+            'home_avg_player_exp': 0.0, 'away_avg_player_exp': 0.0,
+            'home_goals_diversity': 0.0, 'away_goals_diversity': 0.0,
+        }
+        conn = self.squad_engine.get_conn()
+        if conn is None:
+            return defaults
+
+        try:
+            from psycopg2.extras import RealDictCursor
+            result = {}
+            for prefix, lineup, team_id in [
+                ('home', home_lineup, home_team_id),
+                ('away', away_lineup, away_team_id),
+            ]:
+                if not lineup:
+                    for k in ('lineup_goals_per90', 'lineup_assists_per90',
+                              'squad_continuity', 'top_scorer_form',
+                              'avg_player_exp', 'goals_diversity'):
+                        result[f'{prefix}_{k}'] = defaults[f'{prefix}_{k}']
+                    continue
+
+                g90, a90, total_exp = 0.0, 0.0, 0
+                best_scorer_total, best_scorer_id = 0, None
+                scorers_in_lineup = 0
+
+                with conn.cursor(cursor_factory=RealDictCursor) as cur:
+                    for pid in lineup:
+                        cur.execute("""
+                            SELECT
+                                COUNT(*) as starts,
+                                COALESCE(SUM(CASE WHEN e.event_type = 'goal'
+                                    AND (e.event_subtype IS NULL OR e.event_subtype NOT ILIKE '%%penaltı kaçırma%%')
+                                    THEN 1 ELSE 0 END), 0) as goals,
+                                COALESCE((SELECT COUNT(*) FROM match_player_events
+                                    WHERE assist_player_id = %s), 0) as assists
+                            FROM match_player_participation mpp
+                            LEFT JOIN match_player_events e
+                                ON e.match_id = mpp.match_id AND e.player_id = mpp.player_id
+                            WHERE mpp.player_id = %s AND mpp.is_starting = true
+                        """, (pid, pid))
+                        row = cur.fetchone()
+                        if not row or not row['starts']:
+                            continue
+                        starts = row['starts']
+                        goals = row['goals'] or 0
+                        assists = row['assists'] or 0
+                        g90 += goals / starts
+                        a90 += assists / starts
+                        total_exp += starts
+                        if goals > 0:
+                            scorers_in_lineup += 1
+                        if goals > best_scorer_total:
+                            best_scorer_total = goals
+                            best_scorer_id = pid
+
+                    n_st = len(lineup) or 1
+
+                    # Top scorer recent form (goals in last 5 starts)
+                    top_scorer_form = 0
+                    if best_scorer_id:
+                        cur.execute("""
+                            SELECT COUNT(*) as goals
+                            FROM match_player_events mpe
+                            WHERE mpe.player_id = %s AND mpe.event_type = 'goal'
+                              AND mpe.match_id IN (
+                                  SELECT match_id FROM match_player_participation
+                                  WHERE player_id = %s AND is_starting = true
+                                  ORDER BY match_id DESC LIMIT 5
+                              )
+                        """, (best_scorer_id, best_scorer_id))
+                        tsf_row = cur.fetchone()
+                        if tsf_row:
+                            top_scorer_form = tsf_row['goals'] or 0
+
+                    # Squad continuity (overlap with previous match lineup)
+                    squad_continuity = 0.5
+                    cur.execute("""
+                        SELECT mpp.player_id
+                        FROM match_player_participation mpp
+                        JOIN matches m ON mpp.match_id = m.id
+                        WHERE mpp.team_id = %s AND mpp.is_starting = true
+                          AND m.status = 'FT'
+                        ORDER BY m.mst_utc DESC
+                        LIMIT 11
+                    """, (team_id,))
+                    prev_starters = {r['player_id'] for r in cur.fetchall()}
+                    if prev_starters:
+                        overlap = len(set(lineup) & prev_starters)
+                        squad_continuity = overlap / n_st
+
+                result[f'{prefix}_lineup_goals_per90'] = round(g90, 3)
+                result[f'{prefix}_lineup_assists_per90'] = round(a90, 3)
+                result[f'{prefix}_squad_continuity'] = round(squad_continuity, 3)
+                result[f'{prefix}_top_scorer_form'] = top_scorer_form
+                result[f'{prefix}_avg_player_exp'] = round(total_exp / n_st, 1)
+                result[f'{prefix}_goals_diversity'] = round(scorers_in_lineup / n_st, 3)
+
+            return result
+        except Exception as e:
+            print(f"[PlayerPredictor] Player-level features failed: {e}")
+            return defaults
    
    def get_1x2_modifier(self, prediction: PlayerPrediction) -> Dict[str, float]:
        """