This commit is contained in:
Executable
+408
@@ -0,0 +1,408 @@
|
||||
"""
|
||||
Sidelined Analyzer — Injury & Suspension Impact Calculator
|
||||
==========================================================
|
||||
Parses sidelined JSON from live_matches and calculates
|
||||
position-weighted missing player impact using ACTUAL player
|
||||
statistics from the database (goals, assists, starting frequency).
|
||||
|
||||
Senior ML Engineer Principle: No magic numbers — all weights from config.
|
||||
Data Quality: Cross-reference sidelined IDs with DB for real impact.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, List, Optional, Any, Tuple
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
try:
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
except ImportError:
|
||||
psycopg2 = None
|
||||
|
||||
from config.config_loader import get_config
|
||||
|
||||
|
||||
@dataclass
|
||||
class PlayerImpactDetail:
|
||||
"""Impact detail for a single sidelined player."""
|
||||
player_id: str
|
||||
player_name: str
|
||||
position: str
|
||||
impact_score: float
|
||||
db_goals: int = 0
|
||||
db_assists: int = 0
|
||||
db_starts: int = 0
|
||||
db_rating: float = 0.0 # Calculated from DB stats
|
||||
is_key_player: bool = False
|
||||
adaptation_applied: bool = False
|
||||
|
||||
|
||||
@dataclass
|
||||
class SidelinedImpact:
|
||||
"""Impact analysis of sidelined players for one team."""
|
||||
total_sidelined: int = 0
|
||||
impact_score: float = 0.0 # 0.0 - 1.0 (normalized)
|
||||
key_position_missing: bool = False # GK or 2+ same position missing
|
||||
key_players_missing: int = 0 # How many key players are missing
|
||||
position_breakdown: Dict[str, int] = field(default_factory=dict)
|
||||
player_details: List[PlayerImpactDetail] = field(default_factory=list)
|
||||
details: List[str] = field(default_factory=list)
|
||||
|
||||
|
||||
class SidelinedAnalyzer:
|
||||
"""
|
||||
Analyzes sidelined player data with DB-backed statistics.
|
||||
|
||||
Impact formula per player:
|
||||
player_impact = position_weight × db_rating_factor × adaptation_factor
|
||||
|
||||
Where:
|
||||
- position_weight: from config (GK most critical)
|
||||
- db_rating_factor: calculated from actual goals + assists + starts (not mackolik average!)
|
||||
- adaptation_factor: 1.0 if recent injury, discounted if team adapted (many matches missed)
|
||||
|
||||
DB Query: Cross-references sidelined player IDs with match_player_events
|
||||
to get real goals/assists from recent matches.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.config = get_config()
|
||||
self.conn = None
|
||||
self._load_config()
|
||||
self._connect_db()
|
||||
|
||||
def _load_config(self):
|
||||
"""Load all config values once at init."""
|
||||
cfg = self.config
|
||||
self.position_weights = cfg.get("sidelined.position_weights", {
|
||||
"K": 0.35, "D": 0.20, "O": 0.25, "F": 0.30
|
||||
})
|
||||
self.max_rating = cfg.get("sidelined.max_rating", 10)
|
||||
self.adaptation_threshold = cfg.get("sidelined.adaptation_threshold", 10)
|
||||
self.adaptation_discount = cfg.get("sidelined.adaptation_discount", 0.5)
|
||||
self.goalkeeper_penalty = cfg.get("sidelined.goalkeeper_penalty", 0.15)
|
||||
self.confidence_boost = cfg.get("sidelined.confidence_boost", 10)
|
||||
self.max_impact = cfg.get("sidelined.max_impact", 0.85)
|
||||
self.key_player_threshold = cfg.get("sidelined.key_player_threshold", 3)
|
||||
self.recent_matches_lookback = cfg.get("sidelined.recent_matches_lookback", 15)
|
||||
|
||||
@staticmethod
|
||||
def _safe_int(value: Any, default: int = 0) -> int:
|
||||
try:
|
||||
if value is None or value == "":
|
||||
return default
|
||||
return int(float(value))
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
@staticmethod
|
||||
def _safe_float(value: Any, default: float = 0.0) -> float:
|
||||
try:
|
||||
if value is None or value == "":
|
||||
return default
|
||||
return float(value)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
def _connect_db(self):
|
||||
"""Lazy DB connection following existing engine patterns."""
|
||||
if psycopg2 is None:
|
||||
return
|
||||
try:
|
||||
from data.db import get_clean_dsn
|
||||
self.conn = psycopg2.connect(get_clean_dsn())
|
||||
except Exception as e:
|
||||
print(f"[SidelinedAnalyzer] DB connection failed: {e}")
|
||||
self.conn = None
|
||||
|
||||
def _get_conn(self):
|
||||
"""Get or reconnect DB connection."""
|
||||
if self.conn is None or self.conn.closed:
|
||||
self._connect_db()
|
||||
return self.conn
|
||||
|
||||
def _fetch_player_stats(self, player_ids: List[str]) -> Dict[str, Dict]:
|
||||
"""
|
||||
Fetch real player statistics from DB for given player IDs.
|
||||
|
||||
Returns dict keyed by player_id with:
|
||||
goals: int, assists: int, starts: int, matches: int
|
||||
"""
|
||||
conn = self._get_conn()
|
||||
if not conn or not player_ids:
|
||||
return {}
|
||||
|
||||
stats = {}
|
||||
try:
|
||||
cur = conn.cursor(cursor_factory=RealDictCursor)
|
||||
|
||||
# 1. Goals from match_player_events + Assists via assist_player_id
|
||||
cur.execute("""
|
||||
SELECT
|
||||
sub.player_id,
|
||||
SUM(sub.goals) AS goals,
|
||||
SUM(sub.assists) AS assists
|
||||
FROM (
|
||||
-- Goals: player scored
|
||||
SELECT mpe.player_id,
|
||||
COUNT(*) AS goals,
|
||||
0 AS assists
|
||||
FROM match_player_events mpe
|
||||
JOIN matches m ON mpe.match_id = m.id
|
||||
WHERE mpe.player_id = ANY(%s)
|
||||
AND mpe.event_type = 'goal'
|
||||
AND m.status = 'FT'
|
||||
GROUP BY mpe.player_id
|
||||
|
||||
UNION ALL
|
||||
|
||||
-- Assists: player assisted
|
||||
SELECT mpe.assist_player_id AS player_id,
|
||||
0 AS goals,
|
||||
COUNT(*) AS assists
|
||||
FROM match_player_events mpe
|
||||
JOIN matches m ON mpe.match_id = m.id
|
||||
WHERE mpe.assist_player_id = ANY(%s)
|
||||
AND mpe.event_type = 'goal'
|
||||
AND m.status = 'FT'
|
||||
GROUP BY mpe.assist_player_id
|
||||
) sub
|
||||
GROUP BY sub.player_id
|
||||
""", (player_ids, player_ids))
|
||||
|
||||
for row in cur.fetchall():
|
||||
pid = row["player_id"]
|
||||
stats[pid] = {
|
||||
"goals": row["goals"] or 0,
|
||||
"assists": row["assists"] or 0,
|
||||
"starts": 0,
|
||||
"matches": 0
|
||||
}
|
||||
|
||||
# 2. Starting frequency from match_player_participation
|
||||
cur.execute("""
|
||||
SELECT
|
||||
mpp.player_id,
|
||||
COUNT(*) AS total_matches,
|
||||
COUNT(*) FILTER (WHERE mpp.is_starting = true) AS starts
|
||||
FROM match_player_participation mpp
|
||||
JOIN matches m ON mpp.match_id = m.id
|
||||
WHERE mpp.player_id = ANY(%s)
|
||||
AND m.status = 'FT'
|
||||
GROUP BY mpp.player_id
|
||||
""", (player_ids,))
|
||||
|
||||
for row in cur.fetchall():
|
||||
pid = row["player_id"]
|
||||
if pid not in stats:
|
||||
stats[pid] = {"goals": 0, "assists": 0, "starts": 0, "matches": 0}
|
||||
stats[pid]["starts"] = row["starts"] or 0
|
||||
stats[pid]["matches"] = row["total_matches"] or 0
|
||||
|
||||
cur.close()
|
||||
except Exception as e:
|
||||
print(f"[SidelinedAnalyzer] DB query error: {e}")
|
||||
try:
|
||||
conn.rollback()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return stats
|
||||
|
||||
def _calculate_db_rating(self, db_stats: Dict, position: str) -> float:
|
||||
"""
|
||||
Calculate player rating from DB statistics.
|
||||
|
||||
Rating is 0.0 - 1.0, where 1.0 = absolute key player.
|
||||
|
||||
Factors:
|
||||
- Goals (weighted by position: Forwards value more, Defenders less)
|
||||
- Assists
|
||||
- Starting frequency (regulars > squad players)
|
||||
"""
|
||||
def _to_float(value: Any, default: float = 0.0) -> float:
|
||||
try:
|
||||
return float(value)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
goals = _to_float(db_stats.get("goals", 0))
|
||||
assists = _to_float(db_stats.get("assists", 0))
|
||||
starts = _to_float(db_stats.get("starts", 0))
|
||||
matches = _to_float(db_stats.get("matches", 0))
|
||||
|
||||
# Goal contribution weight by position
|
||||
# Forwards: goals matter most
|
||||
# Midfielders: balanced
|
||||
# Defenders: starts matter more than goals
|
||||
# Goalkeeper: starts are everything
|
||||
goal_weight = {"F": 0.5, "O": 0.35, "D": 0.15, "K": 0.05}.get(position, 0.25)
|
||||
assist_weight = {"F": 0.2, "O": 0.3, "D": 0.15, "K": 0.0}.get(position, 0.15)
|
||||
start_weight = {"F": 0.3, "O": 0.35, "D": 0.7, "K": 0.95}.get(position, 0.5)
|
||||
|
||||
# Normalize each component to 0-1
|
||||
# Goals: 5+ goals in recent matches = max
|
||||
goal_factor = min(goals / 5.0, 1.0) if goals > 0 else 0.0
|
||||
# Assists: 4+ assists = max
|
||||
assist_factor = min(assists / 4.0, 1.0) if assists > 0 else 0.0
|
||||
# Starts: 80%+ start rate = max regular
|
||||
start_rate = starts / max(matches, 1)
|
||||
start_factor = min(start_rate / 0.8, 1.0)
|
||||
|
||||
rating = (goal_factor * goal_weight +
|
||||
assist_factor * assist_weight +
|
||||
start_factor * start_weight)
|
||||
|
||||
return round(min(rating, 1.0), 4)
|
||||
|
||||
def analyze(self, team_data: Optional[Dict[str, Any]]) -> SidelinedImpact:
|
||||
"""
|
||||
Analyze sidelined data for a single team using DB-backed stats.
|
||||
|
||||
Args:
|
||||
team_data: dict with 'players' list and 'totalSidelined' count.
|
||||
|
||||
Returns:
|
||||
SidelinedImpact with calculated impact score and breakdown.
|
||||
"""
|
||||
if not team_data or not isinstance(team_data, dict):
|
||||
return SidelinedImpact()
|
||||
|
||||
players = team_data.get("players", [])
|
||||
if not players:
|
||||
return SidelinedImpact(
|
||||
total_sidelined=team_data.get("totalSidelined", 0)
|
||||
)
|
||||
|
||||
# Collect player IDs for batch DB query
|
||||
player_ids = [p.get("playerId", "") for p in players if p.get("playerId")]
|
||||
|
||||
# Batch fetch DB stats (single query, not N+1)
|
||||
db_stats = self._fetch_player_stats(player_ids) if player_ids else {}
|
||||
|
||||
total_impact = 0.0
|
||||
position_counts: Dict[str, int] = {}
|
||||
player_details: List[PlayerImpactDetail] = []
|
||||
details: List[str] = []
|
||||
has_gk_missing = False
|
||||
key_players_count = 0
|
||||
|
||||
for player in players:
|
||||
if not isinstance(player, dict):
|
||||
continue
|
||||
|
||||
pos = player.get("positionShort", "O")
|
||||
name = player.get("playerName", "Unknown")
|
||||
pid = player.get("playerId", "")
|
||||
matches_missed = self._safe_int(player.get("matchesMissed", 0), 0)
|
||||
player_type = player.get("type", "other")
|
||||
mackolik_avg = self._safe_float(player.get("average", 0), 0.0)
|
||||
|
||||
position_counts[pos] = position_counts.get(pos, 0) + 1
|
||||
|
||||
if pos == "K":
|
||||
has_gk_missing = True
|
||||
|
||||
# === Rating: DB first, mackolik fallback ===
|
||||
p_db_stats = db_stats.get(pid, {})
|
||||
|
||||
if p_db_stats:
|
||||
# Use real DB stats
|
||||
db_rating = self._calculate_db_rating(p_db_stats, pos)
|
||||
else:
|
||||
# Fallback to mackolik average (normalized)
|
||||
db_rating = min(mackolik_avg / self.max_rating, 1.0) if self.max_rating > 0 else 0.3
|
||||
db_rating = max(db_rating, 0.15) # Minimum floor
|
||||
|
||||
# Key player check
|
||||
is_key = db_rating >= 0.5 or (
|
||||
self._safe_int(p_db_stats.get("goals", 0), 0) >= self.key_player_threshold
|
||||
)
|
||||
if is_key:
|
||||
key_players_count += 1
|
||||
|
||||
# === Impact Calculation ===
|
||||
pos_weight = self.position_weights.get(pos, 0.20)
|
||||
|
||||
# Rating factor: higher rated = bigger loss
|
||||
rating_factor = max(db_rating, 0.15) # Even unknown players have minimum impact
|
||||
|
||||
# Adaptation: team has coped if player missed many matches
|
||||
adapted = matches_missed >= self.adaptation_threshold
|
||||
adapt_factor = self.adaptation_discount if adapted else 1.0
|
||||
|
||||
# Type factor
|
||||
type_factor = 1.0 if player_type == "injury" else 0.8
|
||||
|
||||
player_impact = pos_weight * rating_factor * adapt_factor * type_factor
|
||||
total_impact += player_impact
|
||||
|
||||
detail = PlayerImpactDetail(
|
||||
player_id=pid,
|
||||
player_name=name,
|
||||
position=pos,
|
||||
impact_score=round(player_impact, 4),
|
||||
db_goals=p_db_stats.get("goals", 0),
|
||||
db_assists=p_db_stats.get("assists", 0),
|
||||
db_starts=p_db_stats.get("starts", 0),
|
||||
db_rating=db_rating,
|
||||
is_key_player=is_key,
|
||||
adaptation_applied=adapted
|
||||
)
|
||||
player_details.append(detail)
|
||||
|
||||
db_info = f"G:{detail.db_goals} A:{detail.db_assists} S:{detail.db_starts}" if p_db_stats else "no DB data"
|
||||
details.append(
|
||||
f"{name} ({pos}, db_rating:{db_rating:.2f}, {db_info}) → impact:{player_impact:.3f}"
|
||||
+ (" ⭐ KEY" if is_key else "")
|
||||
+ (f" [adapted, {matches_missed} missed]" if adapted else "")
|
||||
)
|
||||
|
||||
# GK penalty bonus
|
||||
if has_gk_missing:
|
||||
total_impact += self.goalkeeper_penalty
|
||||
|
||||
key_position_missing = has_gk_missing or any(v >= 2 for v in position_counts.values())
|
||||
|
||||
# Normalize to 0-1 range
|
||||
normalization_cap = 1.5
|
||||
normalized_impact = min(total_impact / normalization_cap, self.max_impact)
|
||||
|
||||
return SidelinedImpact(
|
||||
total_sidelined=len(players),
|
||||
impact_score=round(normalized_impact, 4),
|
||||
key_position_missing=key_position_missing,
|
||||
key_players_missing=key_players_count,
|
||||
position_breakdown=position_counts,
|
||||
player_details=player_details,
|
||||
details=details
|
||||
)
|
||||
|
||||
def analyze_match(self, sidelined_json: Optional[Dict[str, Any]]) -> Tuple[SidelinedImpact, SidelinedImpact]:
|
||||
"""
|
||||
Analyze sidelined data for both teams.
|
||||
|
||||
Returns:
|
||||
(home_impact, away_impact)
|
||||
"""
|
||||
if not sidelined_json or not isinstance(sidelined_json, dict):
|
||||
return SidelinedImpact(), SidelinedImpact()
|
||||
|
||||
home_impact = self.analyze(sidelined_json.get("homeTeam"))
|
||||
away_impact = self.analyze(sidelined_json.get("awayTeam"))
|
||||
return home_impact, away_impact
|
||||
|
||||
|
||||
# Singleton
|
||||
_analyzer: Optional[SidelinedAnalyzer] = None
|
||||
|
||||
|
||||
def get_sidelined_analyzer() -> SidelinedAnalyzer:
|
||||
global _analyzer
|
||||
if _analyzer is None:
|
||||
_analyzer = SidelinedAnalyzer()
|
||||
return _analyzer
|
||||
Reference in New Issue
Block a user