first (part 2: other directories)
Deploy Iddaai Backend / build-and-deploy (push) Failing after 18s

This commit is contained in:
2026-04-16 15:11:25 +03:00
parent 7814e0bc6b
commit 2f0b85a0c7
203 changed files with 59989 additions and 0 deletions
+408
View File
@@ -0,0 +1,408 @@
"""
Sidelined Analyzer — Injury & Suspension Impact Calculator
==========================================================
Parses sidelined JSON from live_matches and calculates
position-weighted missing player impact using ACTUAL player
statistics from the database (goals, assists, starting frequency).
Senior ML Engineer Principle: No magic numbers — all weights from config.
Data Quality: Cross-reference sidelined IDs with DB for real impact.
"""
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Any, Tuple
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
try:
import psycopg2
from psycopg2.extras import RealDictCursor
except ImportError:
psycopg2 = None
from config.config_loader import get_config
@dataclass
class PlayerImpactDetail:
"""Impact detail for a single sidelined player."""
player_id: str
player_name: str
position: str
impact_score: float
db_goals: int = 0
db_assists: int = 0
db_starts: int = 0
db_rating: float = 0.0 # Calculated from DB stats
is_key_player: bool = False
adaptation_applied: bool = False
@dataclass
class SidelinedImpact:
"""Impact analysis of sidelined players for one team."""
total_sidelined: int = 0
impact_score: float = 0.0 # 0.0 - 1.0 (normalized)
key_position_missing: bool = False # GK or 2+ same position missing
key_players_missing: int = 0 # How many key players are missing
position_breakdown: Dict[str, int] = field(default_factory=dict)
player_details: List[PlayerImpactDetail] = field(default_factory=list)
details: List[str] = field(default_factory=list)
class SidelinedAnalyzer:
"""
Analyzes sidelined player data with DB-backed statistics.
Impact formula per player:
player_impact = position_weight × db_rating_factor × adaptation_factor
Where:
- position_weight: from config (GK most critical)
- db_rating_factor: calculated from actual goals + assists + starts (not mackolik average!)
- adaptation_factor: 1.0 if recent injury, discounted if team adapted (many matches missed)
DB Query: Cross-references sidelined player IDs with match_player_events
to get real goals/assists from recent matches.
"""
def __init__(self):
self.config = get_config()
self.conn = None
self._load_config()
self._connect_db()
def _load_config(self):
"""Load all config values once at init."""
cfg = self.config
self.position_weights = cfg.get("sidelined.position_weights", {
"K": 0.35, "D": 0.20, "O": 0.25, "F": 0.30
})
self.max_rating = cfg.get("sidelined.max_rating", 10)
self.adaptation_threshold = cfg.get("sidelined.adaptation_threshold", 10)
self.adaptation_discount = cfg.get("sidelined.adaptation_discount", 0.5)
self.goalkeeper_penalty = cfg.get("sidelined.goalkeeper_penalty", 0.15)
self.confidence_boost = cfg.get("sidelined.confidence_boost", 10)
self.max_impact = cfg.get("sidelined.max_impact", 0.85)
self.key_player_threshold = cfg.get("sidelined.key_player_threshold", 3)
self.recent_matches_lookback = cfg.get("sidelined.recent_matches_lookback", 15)
@staticmethod
def _safe_int(value: Any, default: int = 0) -> int:
try:
if value is None or value == "":
return default
return int(float(value))
except (TypeError, ValueError):
return default
@staticmethod
def _safe_float(value: Any, default: float = 0.0) -> float:
try:
if value is None or value == "":
return default
return float(value)
except (TypeError, ValueError):
return default
def _connect_db(self):
"""Lazy DB connection following existing engine patterns."""
if psycopg2 is None:
return
try:
from data.db import get_clean_dsn
self.conn = psycopg2.connect(get_clean_dsn())
except Exception as e:
print(f"[SidelinedAnalyzer] DB connection failed: {e}")
self.conn = None
def _get_conn(self):
"""Get or reconnect DB connection."""
if self.conn is None or self.conn.closed:
self._connect_db()
return self.conn
def _fetch_player_stats(self, player_ids: List[str]) -> Dict[str, Dict]:
"""
Fetch real player statistics from DB for given player IDs.
Returns dict keyed by player_id with:
goals: int, assists: int, starts: int, matches: int
"""
conn = self._get_conn()
if not conn or not player_ids:
return {}
stats = {}
try:
cur = conn.cursor(cursor_factory=RealDictCursor)
# 1. Goals from match_player_events + Assists via assist_player_id
cur.execute("""
SELECT
sub.player_id,
SUM(sub.goals) AS goals,
SUM(sub.assists) AS assists
FROM (
-- Goals: player scored
SELECT mpe.player_id,
COUNT(*) AS goals,
0 AS assists
FROM match_player_events mpe
JOIN matches m ON mpe.match_id = m.id
WHERE mpe.player_id = ANY(%s)
AND mpe.event_type = 'goal'
AND m.status = 'FT'
GROUP BY mpe.player_id
UNION ALL
-- Assists: player assisted
SELECT mpe.assist_player_id AS player_id,
0 AS goals,
COUNT(*) AS assists
FROM match_player_events mpe
JOIN matches m ON mpe.match_id = m.id
WHERE mpe.assist_player_id = ANY(%s)
AND mpe.event_type = 'goal'
AND m.status = 'FT'
GROUP BY mpe.assist_player_id
) sub
GROUP BY sub.player_id
""", (player_ids, player_ids))
for row in cur.fetchall():
pid = row["player_id"]
stats[pid] = {
"goals": row["goals"] or 0,
"assists": row["assists"] or 0,
"starts": 0,
"matches": 0
}
# 2. Starting frequency from match_player_participation
cur.execute("""
SELECT
mpp.player_id,
COUNT(*) AS total_matches,
COUNT(*) FILTER (WHERE mpp.is_starting = true) AS starts
FROM match_player_participation mpp
JOIN matches m ON mpp.match_id = m.id
WHERE mpp.player_id = ANY(%s)
AND m.status = 'FT'
GROUP BY mpp.player_id
""", (player_ids,))
for row in cur.fetchall():
pid = row["player_id"]
if pid not in stats:
stats[pid] = {"goals": 0, "assists": 0, "starts": 0, "matches": 0}
stats[pid]["starts"] = row["starts"] or 0
stats[pid]["matches"] = row["total_matches"] or 0
cur.close()
except Exception as e:
print(f"[SidelinedAnalyzer] DB query error: {e}")
try:
conn.rollback()
except Exception:
pass
return stats
def _calculate_db_rating(self, db_stats: Dict, position: str) -> float:
"""
Calculate player rating from DB statistics.
Rating is 0.0 - 1.0, where 1.0 = absolute key player.
Factors:
- Goals (weighted by position: Forwards value more, Defenders less)
- Assists
- Starting frequency (regulars > squad players)
"""
def _to_float(value: Any, default: float = 0.0) -> float:
try:
return float(value)
except (TypeError, ValueError):
return default
goals = _to_float(db_stats.get("goals", 0))
assists = _to_float(db_stats.get("assists", 0))
starts = _to_float(db_stats.get("starts", 0))
matches = _to_float(db_stats.get("matches", 0))
# Goal contribution weight by position
# Forwards: goals matter most
# Midfielders: balanced
# Defenders: starts matter more than goals
# Goalkeeper: starts are everything
goal_weight = {"F": 0.5, "O": 0.35, "D": 0.15, "K": 0.05}.get(position, 0.25)
assist_weight = {"F": 0.2, "O": 0.3, "D": 0.15, "K": 0.0}.get(position, 0.15)
start_weight = {"F": 0.3, "O": 0.35, "D": 0.7, "K": 0.95}.get(position, 0.5)
# Normalize each component to 0-1
# Goals: 5+ goals in recent matches = max
goal_factor = min(goals / 5.0, 1.0) if goals > 0 else 0.0
# Assists: 4+ assists = max
assist_factor = min(assists / 4.0, 1.0) if assists > 0 else 0.0
# Starts: 80%+ start rate = max regular
start_rate = starts / max(matches, 1)
start_factor = min(start_rate / 0.8, 1.0)
rating = (goal_factor * goal_weight +
assist_factor * assist_weight +
start_factor * start_weight)
return round(min(rating, 1.0), 4)
def analyze(self, team_data: Optional[Dict[str, Any]]) -> SidelinedImpact:
"""
Analyze sidelined data for a single team using DB-backed stats.
Args:
team_data: dict with 'players' list and 'totalSidelined' count.
Returns:
SidelinedImpact with calculated impact score and breakdown.
"""
if not team_data or not isinstance(team_data, dict):
return SidelinedImpact()
players = team_data.get("players", [])
if not players:
return SidelinedImpact(
total_sidelined=team_data.get("totalSidelined", 0)
)
# Collect player IDs for batch DB query
player_ids = [p.get("playerId", "") for p in players if p.get("playerId")]
# Batch fetch DB stats (single query, not N+1)
db_stats = self._fetch_player_stats(player_ids) if player_ids else {}
total_impact = 0.0
position_counts: Dict[str, int] = {}
player_details: List[PlayerImpactDetail] = []
details: List[str] = []
has_gk_missing = False
key_players_count = 0
for player in players:
if not isinstance(player, dict):
continue
pos = player.get("positionShort", "O")
name = player.get("playerName", "Unknown")
pid = player.get("playerId", "")
matches_missed = self._safe_int(player.get("matchesMissed", 0), 0)
player_type = player.get("type", "other")
mackolik_avg = self._safe_float(player.get("average", 0), 0.0)
position_counts[pos] = position_counts.get(pos, 0) + 1
if pos == "K":
has_gk_missing = True
# === Rating: DB first, mackolik fallback ===
p_db_stats = db_stats.get(pid, {})
if p_db_stats:
# Use real DB stats
db_rating = self._calculate_db_rating(p_db_stats, pos)
else:
# Fallback to mackolik average (normalized)
db_rating = min(mackolik_avg / self.max_rating, 1.0) if self.max_rating > 0 else 0.3
db_rating = max(db_rating, 0.15) # Minimum floor
# Key player check
is_key = db_rating >= 0.5 or (
self._safe_int(p_db_stats.get("goals", 0), 0) >= self.key_player_threshold
)
if is_key:
key_players_count += 1
# === Impact Calculation ===
pos_weight = self.position_weights.get(pos, 0.20)
# Rating factor: higher rated = bigger loss
rating_factor = max(db_rating, 0.15) # Even unknown players have minimum impact
# Adaptation: team has coped if player missed many matches
adapted = matches_missed >= self.adaptation_threshold
adapt_factor = self.adaptation_discount if adapted else 1.0
# Type factor
type_factor = 1.0 if player_type == "injury" else 0.8
player_impact = pos_weight * rating_factor * adapt_factor * type_factor
total_impact += player_impact
detail = PlayerImpactDetail(
player_id=pid,
player_name=name,
position=pos,
impact_score=round(player_impact, 4),
db_goals=p_db_stats.get("goals", 0),
db_assists=p_db_stats.get("assists", 0),
db_starts=p_db_stats.get("starts", 0),
db_rating=db_rating,
is_key_player=is_key,
adaptation_applied=adapted
)
player_details.append(detail)
db_info = f"G:{detail.db_goals} A:{detail.db_assists} S:{detail.db_starts}" if p_db_stats else "no DB data"
details.append(
f"{name} ({pos}, db_rating:{db_rating:.2f}, {db_info}) → impact:{player_impact:.3f}"
+ (" ⭐ KEY" if is_key else "")
+ (f" [adapted, {matches_missed} missed]" if adapted else "")
)
# GK penalty bonus
if has_gk_missing:
total_impact += self.goalkeeper_penalty
key_position_missing = has_gk_missing or any(v >= 2 for v in position_counts.values())
# Normalize to 0-1 range
normalization_cap = 1.5
normalized_impact = min(total_impact / normalization_cap, self.max_impact)
return SidelinedImpact(
total_sidelined=len(players),
impact_score=round(normalized_impact, 4),
key_position_missing=key_position_missing,
key_players_missing=key_players_count,
position_breakdown=position_counts,
player_details=player_details,
details=details
)
def analyze_match(self, sidelined_json: Optional[Dict[str, Any]]) -> Tuple[SidelinedImpact, SidelinedImpact]:
"""
Analyze sidelined data for both teams.
Returns:
(home_impact, away_impact)
"""
if not sidelined_json or not isinstance(sidelined_json, dict):
return SidelinedImpact(), SidelinedImpact()
home_impact = self.analyze(sidelined_json.get("homeTeam"))
away_impact = self.analyze(sidelined_json.get("awayTeam"))
return home_impact, away_impact
# Singleton
_analyzer: Optional[SidelinedAnalyzer] = None
def get_sidelined_analyzer() -> SidelinedAnalyzer:
global _analyzer
if _analyzer is None:
_analyzer = SidelinedAnalyzer()
return _analyzer