409 lines
15 KiB
Python
Executable File
409 lines
15 KiB
Python
Executable File
"""
|
||
Sidelined Analyzer — Injury & Suspension Impact Calculator
|
||
==========================================================
|
||
Parses sidelined JSON from live_matches and calculates
|
||
position-weighted missing player impact using ACTUAL player
|
||
statistics from the database (goals, assists, starting frequency).
|
||
|
||
Senior ML Engineer Principle: No magic numbers — all weights from config.
|
||
Data Quality: Cross-reference sidelined IDs with DB for real impact.
|
||
"""
|
||
|
||
from dataclasses import dataclass, field
|
||
from typing import Dict, List, Optional, Any, Tuple
|
||
|
||
import os
|
||
import sys
|
||
|
||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||
|
||
try:
|
||
import psycopg2
|
||
from psycopg2.extras import RealDictCursor
|
||
except ImportError:
|
||
psycopg2 = None
|
||
|
||
from config.config_loader import get_config
|
||
|
||
|
||
@dataclass
|
||
class PlayerImpactDetail:
|
||
"""Impact detail for a single sidelined player."""
|
||
player_id: str
|
||
player_name: str
|
||
position: str
|
||
impact_score: float
|
||
db_goals: int = 0
|
||
db_assists: int = 0
|
||
db_starts: int = 0
|
||
db_rating: float = 0.0 # Calculated from DB stats
|
||
is_key_player: bool = False
|
||
adaptation_applied: bool = False
|
||
|
||
|
||
@dataclass
|
||
class SidelinedImpact:
|
||
"""Impact analysis of sidelined players for one team."""
|
||
total_sidelined: int = 0
|
||
impact_score: float = 0.0 # 0.0 - 1.0 (normalized)
|
||
key_position_missing: bool = False # GK or 2+ same position missing
|
||
key_players_missing: int = 0 # How many key players are missing
|
||
position_breakdown: Dict[str, int] = field(default_factory=dict)
|
||
player_details: List[PlayerImpactDetail] = field(default_factory=list)
|
||
details: List[str] = field(default_factory=list)
|
||
|
||
|
||
class SidelinedAnalyzer:
|
||
"""
|
||
Analyzes sidelined player data with DB-backed statistics.
|
||
|
||
Impact formula per player:
|
||
player_impact = position_weight × db_rating_factor × adaptation_factor
|
||
|
||
Where:
|
||
- position_weight: from config (GK most critical)
|
||
- db_rating_factor: calculated from actual goals + assists + starts (not mackolik average!)
|
||
- adaptation_factor: 1.0 if recent injury, discounted if team adapted (many matches missed)
|
||
|
||
DB Query: Cross-references sidelined player IDs with match_player_events
|
||
to get real goals/assists from recent matches.
|
||
"""
|
||
|
||
def __init__(self):
|
||
self.config = get_config()
|
||
self.conn = None
|
||
self._load_config()
|
||
self._connect_db()
|
||
|
||
def _load_config(self):
|
||
"""Load all config values once at init."""
|
||
cfg = self.config
|
||
self.position_weights = cfg.get("sidelined.position_weights", {
|
||
"K": 0.35, "D": 0.20, "O": 0.25, "F": 0.30
|
||
})
|
||
self.max_rating = cfg.get("sidelined.max_rating", 10)
|
||
self.adaptation_threshold = cfg.get("sidelined.adaptation_threshold", 10)
|
||
self.adaptation_discount = cfg.get("sidelined.adaptation_discount", 0.5)
|
||
self.goalkeeper_penalty = cfg.get("sidelined.goalkeeper_penalty", 0.15)
|
||
self.confidence_boost = cfg.get("sidelined.confidence_boost", 10)
|
||
self.max_impact = cfg.get("sidelined.max_impact", 0.85)
|
||
self.key_player_threshold = cfg.get("sidelined.key_player_threshold", 3)
|
||
self.recent_matches_lookback = cfg.get("sidelined.recent_matches_lookback", 15)
|
||
|
||
@staticmethod
|
||
def _safe_int(value: Any, default: int = 0) -> int:
|
||
try:
|
||
if value is None or value == "":
|
||
return default
|
||
return int(float(value))
|
||
except (TypeError, ValueError):
|
||
return default
|
||
|
||
@staticmethod
|
||
def _safe_float(value: Any, default: float = 0.0) -> float:
|
||
try:
|
||
if value is None or value == "":
|
||
return default
|
||
return float(value)
|
||
except (TypeError, ValueError):
|
||
return default
|
||
|
||
def _connect_db(self):
|
||
"""Lazy DB connection following existing engine patterns."""
|
||
if psycopg2 is None:
|
||
return
|
||
try:
|
||
from data.db import get_clean_dsn
|
||
self.conn = psycopg2.connect(get_clean_dsn())
|
||
except Exception as e:
|
||
print(f"[SidelinedAnalyzer] DB connection failed: {e}")
|
||
self.conn = None
|
||
|
||
def _get_conn(self):
|
||
"""Get or reconnect DB connection."""
|
||
if self.conn is None or self.conn.closed:
|
||
self._connect_db()
|
||
return self.conn
|
||
|
||
def _fetch_player_stats(self, player_ids: List[str]) -> Dict[str, Dict]:
|
||
"""
|
||
Fetch real player statistics from DB for given player IDs.
|
||
|
||
Returns dict keyed by player_id with:
|
||
goals: int, assists: int, starts: int, matches: int
|
||
"""
|
||
conn = self._get_conn()
|
||
if not conn or not player_ids:
|
||
return {}
|
||
|
||
stats = {}
|
||
try:
|
||
cur = conn.cursor(cursor_factory=RealDictCursor)
|
||
|
||
# 1. Goals from match_player_events + Assists via assist_player_id
|
||
cur.execute("""
|
||
SELECT
|
||
sub.player_id,
|
||
SUM(sub.goals) AS goals,
|
||
SUM(sub.assists) AS assists
|
||
FROM (
|
||
-- Goals: player scored
|
||
SELECT mpe.player_id,
|
||
COUNT(*) AS goals,
|
||
0 AS assists
|
||
FROM match_player_events mpe
|
||
JOIN matches m ON mpe.match_id = m.id
|
||
WHERE mpe.player_id = ANY(%s)
|
||
AND mpe.event_type = 'goal'
|
||
AND m.status = 'FT'
|
||
GROUP BY mpe.player_id
|
||
|
||
UNION ALL
|
||
|
||
-- Assists: player assisted
|
||
SELECT mpe.assist_player_id AS player_id,
|
||
0 AS goals,
|
||
COUNT(*) AS assists
|
||
FROM match_player_events mpe
|
||
JOIN matches m ON mpe.match_id = m.id
|
||
WHERE mpe.assist_player_id = ANY(%s)
|
||
AND mpe.event_type = 'goal'
|
||
AND m.status = 'FT'
|
||
GROUP BY mpe.assist_player_id
|
||
) sub
|
||
GROUP BY sub.player_id
|
||
""", (player_ids, player_ids))
|
||
|
||
for row in cur.fetchall():
|
||
pid = row["player_id"]
|
||
stats[pid] = {
|
||
"goals": row["goals"] or 0,
|
||
"assists": row["assists"] or 0,
|
||
"starts": 0,
|
||
"matches": 0
|
||
}
|
||
|
||
# 2. Starting frequency from match_player_participation
|
||
cur.execute("""
|
||
SELECT
|
||
mpp.player_id,
|
||
COUNT(*) AS total_matches,
|
||
COUNT(*) FILTER (WHERE mpp.is_starting = true) AS starts
|
||
FROM match_player_participation mpp
|
||
JOIN matches m ON mpp.match_id = m.id
|
||
WHERE mpp.player_id = ANY(%s)
|
||
AND m.status = 'FT'
|
||
GROUP BY mpp.player_id
|
||
""", (player_ids,))
|
||
|
||
for row in cur.fetchall():
|
||
pid = row["player_id"]
|
||
if pid not in stats:
|
||
stats[pid] = {"goals": 0, "assists": 0, "starts": 0, "matches": 0}
|
||
stats[pid]["starts"] = row["starts"] or 0
|
||
stats[pid]["matches"] = row["total_matches"] or 0
|
||
|
||
cur.close()
|
||
except Exception as e:
|
||
print(f"[SidelinedAnalyzer] DB query error: {e}")
|
||
try:
|
||
conn.rollback()
|
||
except Exception:
|
||
pass
|
||
|
||
return stats
|
||
|
||
def _calculate_db_rating(self, db_stats: Dict, position: str) -> float:
|
||
"""
|
||
Calculate player rating from DB statistics.
|
||
|
||
Rating is 0.0 - 1.0, where 1.0 = absolute key player.
|
||
|
||
Factors:
|
||
- Goals (weighted by position: Forwards value more, Defenders less)
|
||
- Assists
|
||
- Starting frequency (regulars > squad players)
|
||
"""
|
||
def _to_float(value: Any, default: float = 0.0) -> float:
|
||
try:
|
||
return float(value)
|
||
except (TypeError, ValueError):
|
||
return default
|
||
|
||
goals = _to_float(db_stats.get("goals", 0))
|
||
assists = _to_float(db_stats.get("assists", 0))
|
||
starts = _to_float(db_stats.get("starts", 0))
|
||
matches = _to_float(db_stats.get("matches", 0))
|
||
|
||
# Goal contribution weight by position
|
||
# Forwards: goals matter most
|
||
# Midfielders: balanced
|
||
# Defenders: starts matter more than goals
|
||
# Goalkeeper: starts are everything
|
||
goal_weight = {"F": 0.5, "O": 0.35, "D": 0.15, "K": 0.05}.get(position, 0.25)
|
||
assist_weight = {"F": 0.2, "O": 0.3, "D": 0.15, "K": 0.0}.get(position, 0.15)
|
||
start_weight = {"F": 0.3, "O": 0.35, "D": 0.7, "K": 0.95}.get(position, 0.5)
|
||
|
||
# Normalize each component to 0-1
|
||
# Goals: 5+ goals in recent matches = max
|
||
goal_factor = min(goals / 5.0, 1.0) if goals > 0 else 0.0
|
||
# Assists: 4+ assists = max
|
||
assist_factor = min(assists / 4.0, 1.0) if assists > 0 else 0.0
|
||
# Starts: 80%+ start rate = max regular
|
||
start_rate = starts / max(matches, 1)
|
||
start_factor = min(start_rate / 0.8, 1.0)
|
||
|
||
rating = (goal_factor * goal_weight +
|
||
assist_factor * assist_weight +
|
||
start_factor * start_weight)
|
||
|
||
return round(min(rating, 1.0), 4)
|
||
|
||
def analyze(self, team_data: Optional[Dict[str, Any]]) -> SidelinedImpact:
|
||
"""
|
||
Analyze sidelined data for a single team using DB-backed stats.
|
||
|
||
Args:
|
||
team_data: dict with 'players' list and 'totalSidelined' count.
|
||
|
||
Returns:
|
||
SidelinedImpact with calculated impact score and breakdown.
|
||
"""
|
||
if not team_data or not isinstance(team_data, dict):
|
||
return SidelinedImpact()
|
||
|
||
players = team_data.get("players", [])
|
||
if not players:
|
||
return SidelinedImpact(
|
||
total_sidelined=team_data.get("totalSidelined", 0)
|
||
)
|
||
|
||
# Collect player IDs for batch DB query
|
||
player_ids = [p.get("playerId", "") for p in players if p.get("playerId")]
|
||
|
||
# Batch fetch DB stats (single query, not N+1)
|
||
db_stats = self._fetch_player_stats(player_ids) if player_ids else {}
|
||
|
||
total_impact = 0.0
|
||
position_counts: Dict[str, int] = {}
|
||
player_details: List[PlayerImpactDetail] = []
|
||
details: List[str] = []
|
||
has_gk_missing = False
|
||
key_players_count = 0
|
||
|
||
for player in players:
|
||
if not isinstance(player, dict):
|
||
continue
|
||
|
||
pos = player.get("positionShort", "O")
|
||
name = player.get("playerName", "Unknown")
|
||
pid = player.get("playerId", "")
|
||
matches_missed = self._safe_int(player.get("matchesMissed", 0), 0)
|
||
player_type = player.get("type", "other")
|
||
mackolik_avg = self._safe_float(player.get("average", 0), 0.0)
|
||
|
||
position_counts[pos] = position_counts.get(pos, 0) + 1
|
||
|
||
if pos == "K":
|
||
has_gk_missing = True
|
||
|
||
# === Rating: DB first, mackolik fallback ===
|
||
p_db_stats = db_stats.get(pid, {})
|
||
|
||
if p_db_stats:
|
||
# Use real DB stats
|
||
db_rating = self._calculate_db_rating(p_db_stats, pos)
|
||
else:
|
||
# Fallback to mackolik average (normalized)
|
||
db_rating = min(mackolik_avg / self.max_rating, 1.0) if self.max_rating > 0 else 0.3
|
||
db_rating = max(db_rating, 0.15) # Minimum floor
|
||
|
||
# Key player check
|
||
is_key = db_rating >= 0.5 or (
|
||
self._safe_int(p_db_stats.get("goals", 0), 0) >= self.key_player_threshold
|
||
)
|
||
if is_key:
|
||
key_players_count += 1
|
||
|
||
# === Impact Calculation ===
|
||
pos_weight = self.position_weights.get(pos, 0.20)
|
||
|
||
# Rating factor: higher rated = bigger loss
|
||
rating_factor = max(db_rating, 0.15) # Even unknown players have minimum impact
|
||
|
||
# Adaptation: team has coped if player missed many matches
|
||
adapted = matches_missed >= self.adaptation_threshold
|
||
adapt_factor = self.adaptation_discount if adapted else 1.0
|
||
|
||
# Type factor
|
||
type_factor = 1.0 if player_type == "injury" else 0.8
|
||
|
||
player_impact = pos_weight * rating_factor * adapt_factor * type_factor
|
||
total_impact += player_impact
|
||
|
||
detail = PlayerImpactDetail(
|
||
player_id=pid,
|
||
player_name=name,
|
||
position=pos,
|
||
impact_score=round(player_impact, 4),
|
||
db_goals=p_db_stats.get("goals", 0),
|
||
db_assists=p_db_stats.get("assists", 0),
|
||
db_starts=p_db_stats.get("starts", 0),
|
||
db_rating=db_rating,
|
||
is_key_player=is_key,
|
||
adaptation_applied=adapted
|
||
)
|
||
player_details.append(detail)
|
||
|
||
db_info = f"G:{detail.db_goals} A:{detail.db_assists} S:{detail.db_starts}" if p_db_stats else "no DB data"
|
||
details.append(
|
||
f"{name} ({pos}, db_rating:{db_rating:.2f}, {db_info}) → impact:{player_impact:.3f}"
|
||
+ (" ⭐ KEY" if is_key else "")
|
||
+ (f" [adapted, {matches_missed} missed]" if adapted else "")
|
||
)
|
||
|
||
# GK penalty bonus
|
||
if has_gk_missing:
|
||
total_impact += self.goalkeeper_penalty
|
||
|
||
key_position_missing = has_gk_missing or any(v >= 2 for v in position_counts.values())
|
||
|
||
# Normalize to 0-1 range
|
||
normalization_cap = 1.5
|
||
normalized_impact = min(total_impact / normalization_cap, self.max_impact)
|
||
|
||
return SidelinedImpact(
|
||
total_sidelined=len(players),
|
||
impact_score=round(normalized_impact, 4),
|
||
key_position_missing=key_position_missing,
|
||
key_players_missing=key_players_count,
|
||
position_breakdown=position_counts,
|
||
player_details=player_details,
|
||
details=details
|
||
)
|
||
|
||
def analyze_match(self, sidelined_json: Optional[Dict[str, Any]]) -> Tuple[SidelinedImpact, SidelinedImpact]:
|
||
"""
|
||
Analyze sidelined data for both teams.
|
||
|
||
Returns:
|
||
(home_impact, away_impact)
|
||
"""
|
||
if not sidelined_json or not isinstance(sidelined_json, dict):
|
||
return SidelinedImpact(), SidelinedImpact()
|
||
|
||
home_impact = self.analyze(sidelined_json.get("homeTeam"))
|
||
away_impact = self.analyze(sidelined_json.get("awayTeam"))
|
||
return home_impact, away_impact
|
||
|
||
|
||
# Singleton
|
||
_analyzer: Optional[SidelinedAnalyzer] = None
|
||
|
||
|
||
def get_sidelined_analyzer() -> SidelinedAnalyzer:
|
||
global _analyzer
|
||
if _analyzer is None:
|
||
_analyzer = SidelinedAnalyzer()
|
||
return _analyzer
|