1112 lines
46 KiB
Python
1112 lines
46 KiB
Python
"""Data Loader Mixin — DB fetching, lineup/odds parsing.
|
||
|
||
Auto-extracted mixin module — split from services/single_match_orchestrator.py.
|
||
All methods here are composed into SingleMatchOrchestrator via inheritance.
|
||
`self` attributes (self.dsn, self.enrichment, self.v25_predictor, etc.) are
|
||
initialised in the main __init__.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import json
|
||
import re
|
||
import time
|
||
import math
|
||
import os
|
||
import pickle
|
||
from collections import defaultdict
|
||
from typing import Any, Dict, List, Optional, Set, Tuple, overload
|
||
|
||
import pandas as pd
|
||
import numpy as np
|
||
|
||
import psycopg2
|
||
from psycopg2.extras import RealDictCursor
|
||
|
||
from data.db import get_clean_dsn
|
||
from schemas.prediction import FullMatchPrediction
|
||
from schemas.match_data import MatchData
|
||
from models.v25_ensemble import V25Predictor, get_v25_predictor
|
||
try:
|
||
from models.v27_predictor import V27Predictor, compute_divergence, compute_value_edge
|
||
except ImportError:
|
||
class V27Predictor: # type: ignore[no-redef]
|
||
def __init__(self): self.models = {}
|
||
def load_models(self): return False
|
||
def predict_all(self, features): return {}
|
||
def compute_divergence(*args, **kwargs):
|
||
return {}
|
||
def compute_value_edge(*args, **kwargs):
|
||
return {}
|
||
from features.odds_band_analyzer import OddsBandAnalyzer
|
||
try:
|
||
from models.basketball_v25 import (
|
||
BasketballMatchPrediction,
|
||
get_basketball_v25_predictor,
|
||
)
|
||
except ImportError:
|
||
BasketballMatchPrediction = Any # type: ignore[misc]
|
||
def get_basketball_v25_predictor() -> Any:
|
||
raise ImportError("Basketball predictor is not available")
|
||
from core.engines.player_predictor import PlayerPrediction, get_player_predictor
|
||
from services.feature_enrichment import FeatureEnrichmentService
|
||
from services.betting_brain import BettingBrain
|
||
from services.v26_shadow_engine import V26ShadowEngine, get_v26_shadow_engine
|
||
from services.match_commentary import generate_match_commentary
|
||
from utils.top_leagues import load_top_league_ids
|
||
from utils.league_reliability import load_league_reliability
|
||
from config.config_loader import build_threshold_dict, get_threshold_default
|
||
from models.calibration import get_calibrator
|
||
|
||
|
||
class DataLoaderMixin:
|
||
def _load_match_data(self, match_id: str) -> Optional[MatchData]:
|
||
with psycopg2.connect(self.dsn) as conn:
|
||
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||
row = self._fetch_live_match(cur, match_id)
|
||
if not row:
|
||
row = self._fetch_hist_match(cur, match_id)
|
||
if not row:
|
||
return None
|
||
|
||
home_team_id = row.get("home_team_id")
|
||
away_team_id = row.get("away_team_id")
|
||
if not home_team_id or not away_team_id:
|
||
# Hard gate: predictions with unknown teams are noisy and misleading.
|
||
return None
|
||
|
||
status, state, substate = self._normalize_match_status(
|
||
row.get("status"),
|
||
row.get("state"),
|
||
row.get("substate"),
|
||
row.get("score_home"),
|
||
row.get("score_away"),
|
||
)
|
||
odds_data = self._extract_odds(cur, row)
|
||
home_lineup, away_lineup, lineup_source, lineup_confidence = self._extract_lineups(cur, row)
|
||
sidelined = self._parse_json_dict(row.get("sidelined"))
|
||
match_date_ms = int(row.get("match_date_ms") or 0)
|
||
league_id = str(row.get("league_id")) if row.get("league_id") else None
|
||
home_id_str = str(home_team_id)
|
||
away_id_str = str(away_team_id)
|
||
|
||
home_goals_avg, home_conceded_avg = self._calculate_team_form(
|
||
cur=cur,
|
||
team_id=home_id_str,
|
||
before_date_ms=match_date_ms,
|
||
)
|
||
away_goals_avg, away_conceded_avg = self._calculate_team_form(
|
||
cur=cur,
|
||
team_id=away_id_str,
|
||
before_date_ms=match_date_ms,
|
||
)
|
||
home_position = self._estimate_league_position(
|
||
cur=cur,
|
||
team_id=home_id_str,
|
||
league_id=league_id,
|
||
before_date_ms=match_date_ms,
|
||
)
|
||
away_position = self._estimate_league_position(
|
||
cur=cur,
|
||
team_id=away_id_str,
|
||
league_id=league_id,
|
||
before_date_ms=match_date_ms,
|
||
)
|
||
|
||
return MatchData(
|
||
match_id=str(row["match_id"]),
|
||
home_team_id=home_id_str,
|
||
away_team_id=away_id_str,
|
||
home_team_name=row.get("home_team_name") or "Home",
|
||
away_team_name=row.get("away_team_name") or "Away",
|
||
match_date_ms=match_date_ms,
|
||
sport=str(row.get("sport") or "football").lower(),
|
||
league_id=league_id,
|
||
league_name=row.get("league_name") or "",
|
||
referee_name=row.get("referee_name"),
|
||
odds_data=odds_data,
|
||
home_lineup=home_lineup,
|
||
away_lineup=away_lineup,
|
||
sidelined_data=sidelined,
|
||
home_goals_avg=home_goals_avg,
|
||
home_conceded_avg=home_conceded_avg,
|
||
away_goals_avg=away_goals_avg,
|
||
away_conceded_avg=away_conceded_avg,
|
||
home_position=home_position,
|
||
away_position=away_position,
|
||
lineup_source=lineup_source,
|
||
status=status,
|
||
state=state,
|
||
substate=substate,
|
||
lineup_confidence=lineup_confidence,
|
||
source_table=str(row.get("source_table") or "matches"),
|
||
current_score_home=(
|
||
int(str(row.get("score_home")))
|
||
if row.get("score_home") is not None
|
||
else None
|
||
),
|
||
current_score_away=(
|
||
int(str(row.get("score_away")))
|
||
if row.get("score_away") is not None
|
||
else None
|
||
),
|
||
)
|
||
|
||
def _fetch_live_match(self, cur: RealDictCursor, match_id: str) -> Optional[Dict[str, Any]]:
|
||
cur.execute(
|
||
"""
|
||
SELECT
|
||
lm.id as match_id,
|
||
lm.home_team_id,
|
||
lm.away_team_id,
|
||
lm.league_id,
|
||
lm.sport,
|
||
lm.mst_utc as match_date_ms,
|
||
lm.status,
|
||
lm.state,
|
||
lm.substate,
|
||
lm.score_home,
|
||
lm.score_away,
|
||
lm.odds,
|
||
lm.lineups,
|
||
lm.sidelined,
|
||
lm.referee_name,
|
||
ht.name as home_team_name,
|
||
at.name as away_team_name,
|
||
l.name as league_name,
|
||
'live_matches'::text as source_table
|
||
FROM live_matches lm
|
||
LEFT JOIN teams ht ON ht.id = lm.home_team_id
|
||
LEFT JOIN teams at ON at.id = lm.away_team_id
|
||
LEFT JOIN leagues l ON l.id = lm.league_id
|
||
WHERE lm.id = %s
|
||
LIMIT 1
|
||
""",
|
||
(match_id,),
|
||
)
|
||
return cur.fetchone()
|
||
|
||
@staticmethod
|
||
def _normalize_match_status(
|
||
status: Any,
|
||
state: Any,
|
||
substate: Any,
|
||
score_home: Any,
|
||
score_away: Any,
|
||
) -> Tuple[str, Optional[str], Optional[str]]:
|
||
state_text = str(state or "").strip()
|
||
status_text = str(status or "").strip()
|
||
substate_text = str(substate or "").strip()
|
||
|
||
state_key = state_text.lower().replace("_", "").replace(" ", "")
|
||
status_key = status_text.lower().replace("_", "").replace(" ", "")
|
||
substate_key = substate_text.lower().replace("_", "").replace(" ", "")
|
||
|
||
live_tokens = {"live", "livegame", "firsthalf", "secondhalf", "halftime", "1h", "2h", "ht", "1q", "2q", "3q", "4q"}
|
||
finished_tokens = {"post", "postgame", "finished", "played", "ft", "ended", "aet", "pen", "penalties", "afterpenalties"}
|
||
pre_tokens = {"pre", "pregame", "scheduled", "ns", "notstarted", "timestamp"}
|
||
|
||
if state_key in live_tokens or status_key in live_tokens or substate_key in live_tokens:
|
||
return "LIVE", state_text or "live", substate_text or None
|
||
if state_key in finished_tokens or status_key in finished_tokens or substate_key in finished_tokens:
|
||
return "FT", state_text or "post", substate_text or None
|
||
if score_home is not None and score_away is not None and status_key not in pre_tokens:
|
||
return "FT", state_text or "post", substate_text or None
|
||
if state_key in pre_tokens or status_key in pre_tokens or substate_key in pre_tokens:
|
||
return "NS", state_text or "pre", substate_text or None
|
||
|
||
return status_text or "NS", state_text or None, substate_text or None
|
||
|
||
def _fetch_hist_match(self, cur: RealDictCursor, match_id: str) -> Optional[Dict[str, Any]]:
|
||
cur.execute(
|
||
"""
|
||
SELECT
|
||
m.id as match_id,
|
||
m.home_team_id,
|
||
m.away_team_id,
|
||
m.league_id,
|
||
m.sport,
|
||
m.mst_utc as match_date_ms,
|
||
m.status,
|
||
m.state,
|
||
NULL::text as substate,
|
||
m.score_home,
|
||
m.score_away,
|
||
NULL::jsonb as odds,
|
||
NULL::jsonb as lineups,
|
||
NULL::jsonb as sidelined,
|
||
ref.name as referee_name,
|
||
ht.name as home_team_name,
|
||
at.name as away_team_name,
|
||
l.name as league_name,
|
||
'matches'::text as source_table
|
||
FROM matches m
|
||
LEFT JOIN teams ht ON ht.id = m.home_team_id
|
||
LEFT JOIN teams at ON at.id = m.away_team_id
|
||
LEFT JOIN leagues l ON l.id = m.league_id
|
||
LEFT JOIN match_officials ref ON ref.match_id = m.id AND ref.role_id = 1
|
||
WHERE m.id = %s
|
||
LIMIT 1
|
||
""",
|
||
(match_id,),
|
||
)
|
||
return cur.fetchone()
|
||
|
||
def _extract_odds(self, cur: RealDictCursor, row: Dict[str, Any]) -> Dict[str, float]:
|
||
odds_data = self._parse_odds_json(row.get("odds"))
|
||
sport_key = str(row.get("sport") or "football").lower()
|
||
|
||
missing_relational_keys = [k for k in self.RELATIONAL_ODDS_KEYS if k not in odds_data]
|
||
if missing_relational_keys:
|
||
# fallback to relational odds tables when live odds JSON is incomplete
|
||
cur.execute(
|
||
"""
|
||
SELECT oc.name as category_name, os.name as selection_name, os.odd_value
|
||
FROM odd_categories oc
|
||
JOIN odd_selections os ON os.odd_category_db_id = oc.db_id
|
||
WHERE oc.match_id = %s
|
||
ORDER BY oc.db_id ASC, os.db_id ASC
|
||
""",
|
||
(row["match_id"],),
|
||
)
|
||
relational_rows = cur.fetchall()
|
||
rel_odds = self._parse_relational_odds([dict(r) for r in relational_rows])
|
||
if rel_odds:
|
||
for key, value in rel_odds.items():
|
||
odds_data.setdefault(key, value)
|
||
|
||
# Odds staleness check: warn if odds haven't been updated within 48h of match
|
||
# Uses a savepoint to avoid aborting the transaction if the column doesn't exist
|
||
try:
|
||
cur.execute("SAVEPOINT odds_staleness_check")
|
||
match_ts_ms = int(row.get("match_date_ms") or 0)
|
||
if match_ts_ms > 0:
|
||
cur.execute(
|
||
"""
|
||
SELECT EXTRACT(EPOCH FROM (NOW() - MAX(oc.updated_at))) / 3600 AS hours_stale
|
||
FROM odd_categories oc
|
||
WHERE oc.match_id = %s AND oc.updated_at IS NOT NULL
|
||
""",
|
||
(row["match_id"],),
|
||
)
|
||
stale_row = cur.fetchone()
|
||
if stale_row and stale_row.get("hours_stale") is not None:
|
||
hours_stale = float(stale_row["hours_stale"])
|
||
if hours_stale > 48:
|
||
print(f"⚠️ [DataLoader] Odds for {row['match_id']} are {hours_stale:.0f}h stale (threshold: 48h)")
|
||
odds_data["_odds_stale"] = True
|
||
cur.execute("RELEASE SAVEPOINT odds_staleness_check")
|
||
except Exception:
|
||
cur.execute("ROLLBACK TO SAVEPOINT odds_staleness_check") # restore transaction
|
||
|
||
if sport_key == "basketball":
|
||
# Reuse football aliases when source only publishes generic match-result naming.
|
||
if "ml_h" not in odds_data and "ms_h" in odds_data:
|
||
odds_data["ml_h"] = float(odds_data["ms_h"])
|
||
if "ml_a" not in odds_data and "ms_a" in odds_data:
|
||
odds_data["ml_a"] = float(odds_data["ms_a"])
|
||
|
||
if "ml_h" not in odds_data:
|
||
odds_data["ml_h"] = 1.90
|
||
if "ml_a" not in odds_data:
|
||
odds_data["ml_a"] = 1.90
|
||
|
||
if "tot_line" in odds_data and "tot_o" not in odds_data:
|
||
odds_data["tot_o"] = 1.90
|
||
if "tot_line" in odds_data and "tot_u" not in odds_data:
|
||
odds_data["tot_u"] = 1.90
|
||
else:
|
||
if "ms_h" not in odds_data:
|
||
odds_data["ms_h"] = self.DEFAULT_MS_H
|
||
if "ms_d" not in odds_data:
|
||
odds_data["ms_d"] = self.DEFAULT_MS_D
|
||
if "ms_a" not in odds_data:
|
||
odds_data["ms_a"] = self.DEFAULT_MS_A
|
||
|
||
return odds_data
|
||
|
||
def _extract_lineups(
|
||
self,
|
||
cur: RealDictCursor,
|
||
row: Dict[str, Any],
|
||
) -> Tuple[Optional[List[str]], Optional[List[str]], str, float]:
|
||
live_lineups = row.get("lineups")
|
||
status_upper = str(row.get("status") or "").upper()
|
||
state_upper = str(row.get("state") or "").upper()
|
||
substate_upper = str(row.get("substate") or "").upper()
|
||
can_trust_feed_lineups = (
|
||
status_upper in {"LIVE", "1H", "2H", "HT", "FT", "FINISHED"}
|
||
or state_upper in {"LIVE", "FIRSTHALF", "SECONDHALF", "POSTGAME", "POST_GAME"}
|
||
or substate_upper in {"LIVE", "FIRSTHALF", "SECONDHALF"}
|
||
)
|
||
home, away = self._parse_lineups_json(live_lineups) if can_trust_feed_lineups else (None, None)
|
||
if (home and len(home) >= 9) and (away and len(away) >= 9):
|
||
return home, away, "confirmed_live", 1.0
|
||
|
||
home_id = str(row["home_team_id"])
|
||
away_id = str(row["away_team_id"])
|
||
|
||
# fallback 1: current match participation table.
|
||
# Trust this only for live/finished matches; pre-match rows can be stale feed snapshots.
|
||
if can_trust_feed_lineups:
|
||
cur.execute(
|
||
"""
|
||
SELECT team_id, player_id
|
||
FROM match_player_participation
|
||
WHERE match_id = %s
|
||
AND is_starting = true
|
||
""",
|
||
(row["match_id"],),
|
||
)
|
||
rows = cur.fetchall()
|
||
if rows:
|
||
home_players = [str(r["player_id"]) for r in rows if str(r["team_id"]) == home_id]
|
||
away_players = [str(r["player_id"]) for r in rows if str(r["team_id"]) == away_id]
|
||
if not home and home_players:
|
||
home = home_players
|
||
if not away and away_players:
|
||
away = away_players
|
||
if (home and len(home) >= 9) and (away and len(away) >= 9):
|
||
return home, away, "confirmed_participation", 0.98
|
||
|
||
# fallback 2: probable XI from historical starts before match date
|
||
before_date_ms = int(row.get("match_date_ms") or 0)
|
||
sidelined = self._parse_json_dict(row.get("sidelined")) or {}
|
||
home_excluded = self._sidelined_player_ids(sidelined.get("homeTeam"))
|
||
away_excluded = self._sidelined_player_ids(sidelined.get("awayTeam"))
|
||
used_probable = False
|
||
home_conf = 0.0
|
||
away_conf = 0.0
|
||
if not home or len(home) < 9:
|
||
home, home_conf = self._build_probable_xi(
|
||
cur,
|
||
home_id,
|
||
before_date_ms,
|
||
excluded_player_ids=home_excluded,
|
||
)
|
||
used_probable = used_probable or bool(home)
|
||
if not away or len(away) < 9:
|
||
away, away_conf = self._build_probable_xi(
|
||
cur,
|
||
away_id,
|
||
before_date_ms,
|
||
excluded_player_ids=away_excluded,
|
||
)
|
||
used_probable = used_probable or bool(away)
|
||
|
||
if used_probable:
|
||
inferred_conf = min(
|
||
home_conf if home else 0.0,
|
||
away_conf if away else 0.0,
|
||
)
|
||
return home, away, "probable_xi", inferred_conf
|
||
return home, away, "none", 0.0
|
||
|
||
def _calculate_team_form(
|
||
self,
|
||
cur: RealDictCursor,
|
||
team_id: str,
|
||
before_date_ms: int,
|
||
limit: int = 5,
|
||
) -> Tuple[float, float]:
|
||
if not team_id:
|
||
return 1.5, 1.2
|
||
cur.execute(
|
||
"""
|
||
SELECT
|
||
m.home_team_id,
|
||
m.away_team_id,
|
||
m.score_home,
|
||
m.score_away
|
||
FROM matches m
|
||
WHERE (m.home_team_id = %s OR m.away_team_id = %s)
|
||
AND m.status = 'FT'
|
||
AND m.score_home IS NOT NULL
|
||
AND m.score_away IS NOT NULL
|
||
AND m.mst_utc < %s
|
||
ORDER BY m.mst_utc DESC
|
||
LIMIT %s
|
||
""",
|
||
(team_id, team_id, before_date_ms, limit),
|
||
)
|
||
rows = cur.fetchall()
|
||
if not rows:
|
||
return 1.5, 1.2
|
||
|
||
weighted_for = 0.0
|
||
weighted_against = 0.0
|
||
total_weight = 0.0
|
||
for idx, row in enumerate(rows):
|
||
weight = float(limit - idx)
|
||
is_home = str(row["home_team_id"]) == team_id
|
||
goals_for = float(row["score_home"] if is_home else row["score_away"])
|
||
goals_against = float(row["score_away"] if is_home else row["score_home"])
|
||
weighted_for += goals_for * weight
|
||
weighted_against += goals_against * weight
|
||
total_weight += weight
|
||
|
||
if total_weight <= 0:
|
||
return 1.5, 1.2
|
||
return weighted_for / total_weight, weighted_against / total_weight
|
||
|
||
def _estimate_league_position(
|
||
self,
|
||
cur: RealDictCursor,
|
||
team_id: str,
|
||
league_id: Optional[str],
|
||
before_date_ms: int,
|
||
) -> int:
|
||
if not team_id or not league_id:
|
||
return 10
|
||
try:
|
||
cur.execute(
|
||
"""
|
||
SELECT
|
||
tm.team_id,
|
||
SUM(tm.points)::int AS points
|
||
FROM (
|
||
SELECT
|
||
m.home_team_id AS team_id,
|
||
CASE
|
||
WHEN m.score_home > m.score_away THEN 3
|
||
WHEN m.score_home = m.score_away THEN 1
|
||
ELSE 0
|
||
END AS points
|
||
FROM matches m
|
||
WHERE m.league_id = %s
|
||
AND m.status = 'FT'
|
||
AND m.score_home IS NOT NULL
|
||
AND m.score_away IS NOT NULL
|
||
AND m.mst_utc < %s
|
||
UNION ALL
|
||
SELECT
|
||
m.away_team_id AS team_id,
|
||
CASE
|
||
WHEN m.score_away > m.score_home THEN 3
|
||
WHEN m.score_away = m.score_home THEN 1
|
||
ELSE 0
|
||
END AS points
|
||
FROM matches m
|
||
WHERE m.league_id = %s
|
||
AND m.status = 'FT'
|
||
AND m.score_home IS NOT NULL
|
||
AND m.score_away IS NOT NULL
|
||
AND m.mst_utc < %s
|
||
) tm
|
||
GROUP BY tm.team_id
|
||
ORDER BY points DESC
|
||
""",
|
||
(league_id, before_date_ms, league_id, before_date_ms),
|
||
)
|
||
rows = cur.fetchall()
|
||
if not rows:
|
||
return 10
|
||
for idx, row in enumerate(rows, start=1):
|
||
if str(row["team_id"]) == team_id:
|
||
return idx
|
||
return min(20, len(rows))
|
||
except Exception:
|
||
return 10
|
||
|
||
def _build_probable_xi(
|
||
self,
|
||
cur: RealDictCursor,
|
||
team_id: str,
|
||
before_date_ms: int,
|
||
match_limit: int = 5,
|
||
lookback_days: int = 370,
|
||
max_staleness_days: int = 120,
|
||
excluded_player_ids: Optional[Set[str]] = None,
|
||
) -> Tuple[Optional[List[str]], float]:
|
||
if not team_id:
|
||
return None, 0.0
|
||
min_date_ms = max(0, before_date_ms - (lookback_days * 24 * 60 * 60 * 1000))
|
||
|
||
cur.execute(
|
||
"""
|
||
SELECT
|
||
mpp.player_id,
|
||
m.id AS match_id,
|
||
m.mst_utc,
|
||
m.home_team_id,
|
||
m.away_team_id
|
||
FROM match_player_participation mpp
|
||
JOIN matches m ON m.id = mpp.match_id
|
||
WHERE mpp.team_id = %s
|
||
AND mpp.is_starting = true
|
||
AND NOT EXISTS (
|
||
SELECT 1
|
||
FROM match_player_participation later_mpp
|
||
JOIN matches later_m ON later_m.id = later_mpp.match_id
|
||
WHERE later_mpp.player_id = mpp.player_id
|
||
AND later_mpp.team_id <> %s
|
||
AND later_m.mst_utc > m.mst_utc
|
||
AND later_m.mst_utc < %s
|
||
AND (
|
||
later_m.status = 'FT'
|
||
OR later_m.state = 'postGame'
|
||
OR (later_m.score_home IS NOT NULL AND later_m.score_away IS NOT NULL)
|
||
)
|
||
)
|
||
AND m.id IN (
|
||
SELECT m2.id
|
||
FROM matches m2
|
||
JOIN match_player_participation recent_mpp
|
||
ON recent_mpp.match_id = m2.id
|
||
AND recent_mpp.team_id = %s
|
||
AND recent_mpp.is_starting = true
|
||
WHERE (m2.home_team_id = %s OR m2.away_team_id = %s)
|
||
AND (
|
||
m2.status = 'FT'
|
||
OR m2.state = 'postGame'
|
||
OR (m2.score_home IS NOT NULL AND m2.score_away IS NOT NULL)
|
||
)
|
||
AND m2.mst_utc < %s
|
||
AND m2.mst_utc >= %s
|
||
GROUP BY m2.id
|
||
HAVING COUNT(recent_mpp.*) >= 9
|
||
ORDER BY MAX(m2.mst_utc) DESC
|
||
LIMIT %s
|
||
)
|
||
ORDER BY m.mst_utc DESC
|
||
""",
|
||
(
|
||
team_id,
|
||
team_id,
|
||
before_date_ms,
|
||
team_id,
|
||
team_id,
|
||
team_id,
|
||
before_date_ms,
|
||
min_date_ms,
|
||
match_limit,
|
||
),
|
||
)
|
||
rows = cur.fetchall()
|
||
if not rows:
|
||
return None, 0.0
|
||
|
||
latest_mst = max(int(row.get("mst_utc") or 0) for row in rows)
|
||
age_days = (before_date_ms - latest_mst) / (24 * 60 * 60 * 1000)
|
||
stale_projection = age_days > max_staleness_days
|
||
|
||
excluded = {str(pid) for pid in (excluded_player_ids or set()) if pid}
|
||
match_order: Dict[str, int] = {}
|
||
for row in rows:
|
||
match_id = str(row["match_id"])
|
||
if match_id not in match_order:
|
||
match_order[match_id] = len(match_order)
|
||
|
||
player_scores: Dict[str, Dict[str, float]] = {}
|
||
for row in rows:
|
||
player_id = str(row["player_id"])
|
||
if player_id in excluded:
|
||
continue
|
||
|
||
idx = match_order.get(str(row["match_id"]), match_limit)
|
||
recency_weight = max(1.0, float(match_limit - idx))
|
||
score = recency_weight
|
||
if idx == 0:
|
||
score += 3.0
|
||
elif idx == 1:
|
||
score += 1.5
|
||
|
||
stats = player_scores.setdefault(
|
||
player_id,
|
||
{
|
||
"score": 0.0,
|
||
"starts": 0.0,
|
||
"last_seen_rank": float(idx),
|
||
},
|
||
)
|
||
stats["score"] += score
|
||
stats["starts"] += 1.0
|
||
stats["last_seen_rank"] = min(stats["last_seen_rank"], float(idx))
|
||
|
||
if not player_scores:
|
||
return None, 0.0
|
||
|
||
ranked = sorted(
|
||
player_scores.items(),
|
||
key=lambda item: (
|
||
item[1]["score"],
|
||
item[1]["starts"],
|
||
-item[1]["last_seen_rank"],
|
||
),
|
||
reverse=True,
|
||
)
|
||
lineup = [player_id for player_id, _ in ranked[:11]]
|
||
|
||
coverage = min(1.0, len(lineup) / 11.0)
|
||
available_matches = max(1, len(match_order))
|
||
history_score = min(1.0, available_matches / float(match_limit))
|
||
core_stability = 0.0
|
||
if ranked:
|
||
stable_core = sum(1 for _, stats in ranked[:11] if stats["starts"] >= 2.0)
|
||
core_stability = stable_core / 11.0
|
||
|
||
staleness_factor = max(
|
||
0.35,
|
||
min(1.0, float(max_staleness_days) / max(age_days, 1.0)),
|
||
)
|
||
confidence = (
|
||
(coverage * 0.45) + (history_score * 0.25) + (core_stability * 0.30)
|
||
) * staleness_factor
|
||
if excluded:
|
||
confidence *= 0.92
|
||
|
||
confidence_cap = 0.58 if stale_projection else 0.88
|
||
return lineup or None, round(max(0.0, min(confidence_cap, confidence)), 3)
|
||
|
||
@staticmethod
|
||
def _sidelined_player_ids(team_data: Any) -> Set[str]:
|
||
if not isinstance(team_data, dict):
|
||
return set()
|
||
players = team_data.get("players")
|
||
if not isinstance(players, list):
|
||
return set()
|
||
|
||
ids: Set[str] = set()
|
||
for player in players:
|
||
if not isinstance(player, dict):
|
||
continue
|
||
player_id = (
|
||
player.get("playerId")
|
||
or player.get("player_id")
|
||
or player.get("id")
|
||
or player.get("personId")
|
||
)
|
||
if player_id:
|
||
ids.add(str(player_id))
|
||
return ids
|
||
|
||
def _parse_odds_json(self, odds_json: Any) -> Dict[str, float]:
|
||
odds_json = self._parse_json_dict(odds_json)
|
||
if odds_json is None:
|
||
return {}
|
||
|
||
parsed: Dict[str, float] = {}
|
||
for category, selections in odds_json.items():
|
||
if not isinstance(selections, dict):
|
||
continue
|
||
category_text = str(category or "")
|
||
category_norm = self._normalize_text(category)
|
||
|
||
if category_norm in ("ms", "maç sonucu", "mac sonucu"):
|
||
parsed["ms_h"] = self._selection_value(selections, ("1",), 0.0)
|
||
parsed["ms_d"] = self._selection_value(selections, ("x", "0"), 0.0)
|
||
parsed["ms_a"] = self._selection_value(selections, ("2",), 0.0)
|
||
elif "maç sonucu (uzt. dahil)" in category_norm or "mac sonucu (uzt. dahil)" in category_norm:
|
||
parsed["ml_h"] = self._selection_value(selections, ("1",), 0.0)
|
||
parsed["ml_a"] = self._selection_value(selections, ("2",), 0.0)
|
||
elif category_norm in ("1. yarı sonucu", "1. yari sonucu", "ilk yarı sonucu", "ilk yari sonucu", "iy sonucu"):
|
||
parsed["ht_h"] = self._selection_value(selections, ("1",), 0.0)
|
||
parsed["ht_d"] = self._selection_value(selections, ("x", "0"), 0.0)
|
||
parsed["ht_a"] = self._selection_value(selections, ("2",), 0.0)
|
||
elif self._is_first_half_ou05_category(category_norm):
|
||
parsed["ht_ou05_o"] = self._selection_value(selections, ("üst", "ust", "over"), 0.0)
|
||
parsed["ht_ou05_u"] = self._selection_value(selections, ("alt", "under"), 0.0)
|
||
elif self._is_first_half_ou15_category(category_norm):
|
||
parsed["ht_ou15_o"] = self._selection_value(selections, ("üst", "ust", "over"), 0.0)
|
||
parsed["ht_ou15_u"] = self._selection_value(selections, ("alt", "under"), 0.0)
|
||
elif category_norm in ("2.5 alt/üst", "2,5 alt/üst"):
|
||
parsed["ou25_o"] = self._selection_value(selections, ("üst", "ust", "over"), 0.0)
|
||
parsed["ou25_u"] = self._selection_value(selections, ("alt", "under"), 0.0)
|
||
elif category_norm in ("1.5 alt/üst", "1,5 alt/üst"):
|
||
parsed["ou15_o"] = self._selection_value(selections, ("üst", "ust", "over"), 0.0)
|
||
parsed["ou15_u"] = self._selection_value(selections, ("alt", "under"), 0.0)
|
||
elif category_norm in ("3.5 alt/üst", "3,5 alt/üst"):
|
||
parsed["ou35_o"] = self._selection_value(selections, ("üst", "ust", "over"), 0.0)
|
||
parsed["ou35_u"] = self._selection_value(selections, ("alt", "under"), 0.0)
|
||
elif category_norm in ("karşılıklı gol", "karsilikli gol", "kg"):
|
||
parsed["btts_y"] = self._selection_value(selections, ("var", "yes"), 0.0)
|
||
parsed["btts_n"] = self._selection_value(selections, ("yok", "no"), 0.0)
|
||
elif category_norm in ("çifte şans", "cifte sans"):
|
||
parsed["dc_1x"] = self._selection_value(selections, ("1-x", "1x"), 0.0)
|
||
parsed["dc_x2"] = self._selection_value(selections, ("x-2", "x2"), 0.0)
|
||
parsed["dc_12"] = self._selection_value(selections, ("1-2", "12"), 0.0)
|
||
elif category_norm in ("tek/çift", "tek/cift"):
|
||
parsed["oe_odd"] = self._selection_value(selections, ("tek", "odd"), 0.0)
|
||
parsed["oe_even"] = self._selection_value(selections, ("çift", "cift", "even"), 0.0)
|
||
elif self._is_cards_ou_category(category_norm):
|
||
parsed["cards_o"] = self._selection_value(selections, ("üst", "ust", "over"), 0.0)
|
||
parsed["cards_u"] = self._selection_value(selections, ("alt", "under"), 0.0)
|
||
elif category_norm in (
|
||
"ilk yarı/maç sonucu",
|
||
"ilk yari/mac sonucu",
|
||
"iy/ms",
|
||
):
|
||
for sel_key, sel_val in selections.items():
|
||
norm_sel = self._normalize_text(sel_key)
|
||
if "/" in norm_sel:
|
||
odds_key = f"htft_{norm_sel.replace('/', '').lower()}"
|
||
parsed[odds_key] = self._to_float(sel_val, 0.0)
|
||
|
||
# Basketball full-game total line, e.g. "Alt/Üst (163,5)"
|
||
if self._is_basketball_total_category(category_norm):
|
||
if "tot_line" not in parsed:
|
||
line = self._extract_parenthesized_number(category_text)
|
||
if line is not None:
|
||
parsed["tot_line"] = line
|
||
parsed.setdefault("tot_o", self._selection_value(selections, ("üst", "ust", "over"), 0.0))
|
||
parsed.setdefault("tot_u", self._selection_value(selections, ("alt", "under"), 0.0))
|
||
|
||
# Basketball spread, e.g. "Hnd. MS (0:5,5)"
|
||
if (
|
||
"hnd. ms" in category_norm
|
||
or "hand. ms" in category_norm
|
||
or "hnd ms" in category_norm
|
||
):
|
||
home_line = self._parse_handicap_home_line(category_text)
|
||
if home_line is not None and "spread_home_line" not in parsed:
|
||
parsed["spread_home_line"] = home_line
|
||
if home_line is not None:
|
||
self._set_basketball_handicap_odds(parsed, selections, home_line)
|
||
elif self._is_football_handicap_category(category_norm):
|
||
self._set_football_handicap_odds(parsed, selections)
|
||
return parsed
|
||
|
||
def _parse_relational_odds(self, rows: List[Dict[str, Any]]) -> Dict[str, float]:
|
||
parsed: Dict[str, float] = {}
|
||
for row in rows:
|
||
category_name = str(row.get("category_name") or "")
|
||
selection_name = str(row.get("selection_name") or "")
|
||
category_norm = self._normalize_text(category_name)
|
||
selection_norm = self._normalize_text(selection_name)
|
||
odd_val = self._to_float(row.get("odd_value"), 0.0)
|
||
if odd_val <= 0:
|
||
continue
|
||
|
||
if category_norm in ("maç sonucu", "mac sonucu", "ms"):
|
||
if selection_norm == "1":
|
||
parsed["ms_h"] = odd_val
|
||
elif selection_norm in ("x", "0"):
|
||
parsed["ms_d"] = odd_val
|
||
elif selection_norm == "2":
|
||
parsed["ms_a"] = odd_val
|
||
elif "maç sonucu (uzt. dahil)" in category_norm or "mac sonucu (uzt. dahil)" in category_norm:
|
||
if selection_norm == "1":
|
||
parsed.setdefault("ml_h", odd_val)
|
||
elif selection_norm == "2":
|
||
parsed.setdefault("ml_a", odd_val)
|
||
elif category_norm in ("1. yarı sonucu", "1. yari sonucu", "ilk yarı sonucu", "ilk yari sonucu", "iy sonucu"):
|
||
if selection_norm == "1":
|
||
parsed["ht_h"] = odd_val
|
||
elif selection_norm in ("x", "0"):
|
||
parsed["ht_d"] = odd_val
|
||
elif selection_norm == "2":
|
||
parsed["ht_a"] = odd_val
|
||
elif self._is_first_half_ou05_category(category_norm):
|
||
if "üst" in selection_norm or "ust" in selection_norm or "over" in selection_norm:
|
||
parsed["ht_ou05_o"] = odd_val
|
||
elif "alt" in selection_norm or "under" in selection_norm:
|
||
parsed["ht_ou05_u"] = odd_val
|
||
elif self._is_first_half_ou15_category(category_norm):
|
||
if "üst" in selection_norm or "ust" in selection_norm or "over" in selection_norm:
|
||
parsed["ht_ou15_o"] = odd_val
|
||
elif "alt" in selection_norm or "under" in selection_norm:
|
||
parsed["ht_ou15_u"] = odd_val
|
||
elif category_norm in ("2,5 alt/üst", "2.5 alt/üst"):
|
||
if "üst" in selection_norm or "ust" in selection_norm or "over" in selection_norm:
|
||
parsed["ou25_o"] = odd_val
|
||
elif "alt" in selection_norm or "under" in selection_norm:
|
||
parsed["ou25_u"] = odd_val
|
||
elif category_norm in ("1,5 alt/üst", "1.5 alt/üst"):
|
||
if "üst" in selection_norm or "ust" in selection_norm or "over" in selection_norm:
|
||
parsed["ou15_o"] = odd_val
|
||
elif "alt" in selection_norm or "under" in selection_norm:
|
||
parsed["ou15_u"] = odd_val
|
||
elif category_norm in ("3,5 alt/üst", "3.5 alt/üst"):
|
||
if "üst" in selection_norm or "ust" in selection_norm or "over" in selection_norm:
|
||
parsed["ou35_o"] = odd_val
|
||
elif "alt" in selection_norm or "under" in selection_norm:
|
||
parsed["ou35_u"] = odd_val
|
||
elif category_norm in ("karşılıklı gol", "karsilikli gol", "kg"):
|
||
if selection_norm == "var" or "yes" in selection_norm:
|
||
parsed["btts_y"] = odd_val
|
||
elif selection_norm == "yok" or "no" in selection_norm:
|
||
parsed["btts_n"] = odd_val
|
||
elif category_norm in ("çifte şans", "cifte sans"):
|
||
if selection_norm in ("1-x", "1x"):
|
||
parsed["dc_1x"] = odd_val
|
||
elif selection_norm in ("x-2", "x2"):
|
||
parsed["dc_x2"] = odd_val
|
||
elif selection_norm in ("1-2", "12"):
|
||
parsed["dc_12"] = odd_val
|
||
elif category_norm in ("tek/çift", "tek/cift"):
|
||
if selection_norm in ("tek", "odd"):
|
||
parsed["oe_odd"] = odd_val
|
||
elif selection_norm in ("çift", "cift", "even"):
|
||
parsed["oe_even"] = odd_val
|
||
elif self._is_cards_ou_category(category_norm):
|
||
if "üst" in selection_norm or "ust" in selection_norm or "over" in selection_norm:
|
||
parsed["cards_o"] = odd_val
|
||
elif "alt" in selection_norm or "under" in selection_norm:
|
||
parsed["cards_u"] = odd_val
|
||
elif category_norm in (
|
||
"ilk yarı/maç sonucu",
|
||
"ilk yari/mac sonucu",
|
||
"iy/ms",
|
||
):
|
||
if "/" in selection_norm:
|
||
odds_key = f"htft_{selection_norm.replace('/', '').lower()}"
|
||
parsed[odds_key] = odd_val
|
||
|
||
if self._is_basketball_total_category(category_norm):
|
||
if "tot_line" not in parsed:
|
||
line = self._extract_parenthesized_number(category_name)
|
||
if line is not None:
|
||
parsed["tot_line"] = line
|
||
if "üst" in selection_norm or "ust" in selection_norm or "over" in selection_norm:
|
||
parsed.setdefault("tot_o", odd_val)
|
||
elif "alt" in selection_norm or "under" in selection_norm:
|
||
parsed.setdefault("tot_u", odd_val)
|
||
|
||
if (
|
||
"hnd. ms" in category_norm
|
||
or "hand. ms" in category_norm
|
||
or "hnd ms" in category_norm
|
||
):
|
||
home_line = self._parse_handicap_home_line(category_name)
|
||
if home_line is not None and "spread_home_line" not in parsed:
|
||
parsed["spread_home_line"] = home_line
|
||
if home_line is not None:
|
||
sel_map = {selection_name: odd_val}
|
||
self._set_basketball_handicap_odds(parsed, sel_map, home_line)
|
||
elif self._is_football_handicap_category(category_norm):
|
||
self._set_football_handicap_odds(parsed, {selection_name: odd_val})
|
||
return parsed
|
||
|
||
def _is_basketball_total_category(self, category_norm: str) -> bool:
|
||
if "alt/üst" not in category_norm and "alt/ust" not in category_norm:
|
||
return False
|
||
banned = (
|
||
"1. yarı",
|
||
"1. yari",
|
||
"periyot",
|
||
"ev sahibi",
|
||
"deplasman",
|
||
)
|
||
return not any(token in category_norm for token in banned)
|
||
|
||
def _is_first_half_ou05_category(self, category_norm: str) -> bool:
|
||
if "alt/üst" not in category_norm and "alt/ust" not in category_norm:
|
||
return False
|
||
if not any(
|
||
token in category_norm
|
||
for token in ("1. yarı", "1. yari", "ilk yarı", "ilk yari")
|
||
):
|
||
if not re.search(r"\biy\b", category_norm):
|
||
return False
|
||
# Exclude team-specific first-half totals (home/away) and non-goal props.
|
||
if any(token in category_norm for token in ("ev sahibi", "deplasman", "korner", "kart")):
|
||
return False
|
||
# Match only exact 0.5 line (avoid false positives like 100,5 / 90,5 in basketball totals).
|
||
for token in re.findall(r"\d+(?:[.,]\d+)?", category_norm):
|
||
try:
|
||
if abs(float(token.replace(",", ".")) - 0.5) < 1e-9:
|
||
return True
|
||
except Exception:
|
||
continue
|
||
return False
|
||
|
||
def _is_first_half_ou15_category(self, category_norm: str) -> bool:
|
||
if "alt/üst" not in category_norm and "alt/ust" not in category_norm:
|
||
return False
|
||
if not any(
|
||
token in category_norm
|
||
for token in ("1. yarı", "1. yari", "ilk yarı", "ilk yari")
|
||
):
|
||
if not re.search(r"\biy\b", category_norm):
|
||
return False
|
||
if any(token in category_norm for token in ("ev sahibi", "deplasman", "korner", "kart")):
|
||
return False
|
||
for token in re.findall(r"\d+(?:[.,]\d+)?", category_norm):
|
||
try:
|
||
if abs(float(token.replace(",", ".")) - 1.5) < 1e-9:
|
||
return True
|
||
except Exception:
|
||
continue
|
||
return False
|
||
|
||
def _is_cards_ou_category(self, category_norm: str) -> bool:
|
||
if "kart" not in category_norm and "card" not in category_norm:
|
||
return False
|
||
return "alt/üst" in category_norm or "alt/ust" in category_norm
|
||
|
||
def _is_football_handicap_category(self, category_norm: str) -> bool:
|
||
if any(token in category_norm for token in ("hnd. ms", "hand. ms", "hnd ms")):
|
||
return False
|
||
return any(
|
||
token in category_norm
|
||
for token in (
|
||
"handikapli maç sonucu",
|
||
"handikapli mac sonucu",
|
||
"handikaplı maç sonucu",
|
||
"hnd. maç sonucu",
|
||
"hnd. mac sonucu",
|
||
"hnd maç sonucu",
|
||
"hnd mac sonucu",
|
||
)
|
||
)
|
||
|
||
def _extract_parenthesized_number(self, category_name: str) -> Optional[float]:
|
||
if not category_name:
|
||
return None
|
||
try:
|
||
left = category_name.find("(")
|
||
right = category_name.find(")", left + 1)
|
||
if left < 0 or right < 0:
|
||
return None
|
||
raw = category_name[left + 1 : right].strip().replace(",", ".")
|
||
out = float(raw)
|
||
return out if out > 0 else None
|
||
except Exception:
|
||
return None
|
||
|
||
def _parse_handicap_home_line(self, category_name: str) -> Optional[float]:
|
||
if not category_name:
|
||
return None
|
||
try:
|
||
left = category_name.find("(")
|
||
right = category_name.find(")", left + 1)
|
||
if left < 0 or right < 0:
|
||
return None
|
||
payload = category_name[left + 1 : right].strip().replace(",", ".")
|
||
if ":" not in payload:
|
||
return None
|
||
home_raw, away_raw = payload.split(":", 1)
|
||
home_hcp = float(home_raw.strip())
|
||
away_hcp = float(away_raw.strip())
|
||
if abs(home_hcp) < 1e-6 and away_hcp > 0:
|
||
return -away_hcp
|
||
if home_hcp > 0 and abs(away_hcp) < 1e-6:
|
||
return home_hcp
|
||
if abs(home_hcp - away_hcp) < 1e-6 and home_hcp > 0:
|
||
return 0.0
|
||
except Exception:
|
||
return None
|
||
return None
|
||
|
||
def _set_basketball_handicap_odds(
|
||
self,
|
||
out: Dict[str, float],
|
||
selections: Dict[str, Any],
|
||
home_line: float,
|
||
) -> None:
|
||
if not isinstance(selections, dict):
|
||
return
|
||
|
||
has_home_plus = False
|
||
home_plus_odd = 0.0
|
||
one_odd = 0.0
|
||
two_odd = 0.0
|
||
|
||
for key, value in selections.items():
|
||
norm_key = self._normalize_text(key)
|
||
odd = self._to_float(value, 0.0)
|
||
if odd <= 1.0:
|
||
continue
|
||
if norm_key == "1":
|
||
one_odd = odd
|
||
elif norm_key == "2":
|
||
two_odd = odd
|
||
if "+h" in norm_key or norm_key.endswith("h"):
|
||
has_home_plus = True
|
||
home_plus_odd = odd
|
||
|
||
if home_line < 0:
|
||
# Home gives points. \"1\" normally means home -line covers.
|
||
if one_odd > 1.0:
|
||
out.setdefault("spread_h", one_odd)
|
||
if home_plus_odd > 1.0:
|
||
out.setdefault("spread_a", home_plus_odd)
|
||
elif two_odd > 1.0:
|
||
out.setdefault("spread_a", two_odd)
|
||
elif home_line > 0:
|
||
# Home receives points. +h entry or \"1\" means home side.
|
||
if home_plus_odd > 1.0:
|
||
out.setdefault("spread_h", home_plus_odd)
|
||
elif one_odd > 1.0:
|
||
out.setdefault("spread_h", one_odd)
|
||
if two_odd > 1.0:
|
||
out.setdefault("spread_a", two_odd)
|
||
else:
|
||
if one_odd > 1.0:
|
||
out.setdefault("spread_h", one_odd)
|
||
if two_odd > 1.0:
|
||
out.setdefault("spread_a", two_odd)
|
||
|
||
def _set_football_handicap_odds(
|
||
self,
|
||
out: Dict[str, float],
|
||
selections: Dict[str, Any],
|
||
) -> None:
|
||
if not isinstance(selections, dict):
|
||
return
|
||
|
||
for key, value in selections.items():
|
||
norm_key = self._normalize_text(key)
|
||
odd = self._to_float(value, 0.0)
|
||
if odd <= 1.0:
|
||
continue
|
||
if norm_key == "1":
|
||
out["hcap_h"] = odd
|
||
elif norm_key in ("x", "0"):
|
||
out["hcap_d"] = odd
|
||
elif norm_key == "2":
|
||
out["hcap_a"] = odd
|
||
|
||
def _parse_lineups_json(
|
||
self,
|
||
lineups_json: Any,
|
||
) -> Tuple[Optional[List[str]], Optional[List[str]]]:
|
||
if isinstance(lineups_json, str):
|
||
try:
|
||
lineups_json = json.loads(lineups_json)
|
||
except Exception:
|
||
lineups_json = None
|
||
|
||
if not isinstance(lineups_json, dict):
|
||
return None, None
|
||
|
||
def parse_side(side: str) -> Optional[List[str]]:
|
||
# Try direct access first (home/away at root level)
|
||
side_obj = lineups_json.get(side)
|
||
|
||
# Fallback: Check if inside "stats" key (Mackolik format)
|
||
if not isinstance(side_obj, (dict, list)):
|
||
stats = lineups_json.get("stats")
|
||
if isinstance(stats, dict):
|
||
side_obj = stats.get(side)
|
||
|
||
if not isinstance(side_obj, (dict, list)):
|
||
return None
|
||
|
||
# Try standard formats (xi, starting, lineup)
|
||
entries = None
|
||
if isinstance(side_obj, dict):
|
||
entries = side_obj.get("xi") or side_obj.get("starting") or side_obj.get("lineup")
|
||
# If the dict itself contains player dicts (no wrapper keys)
|
||
if not entries and "position" in side_obj:
|
||
# side_obj is likely a single player dict, wrap it
|
||
entries = [side_obj]
|
||
elif isinstance(side_obj, list):
|
||
# side_obj is already a list of players
|
||
entries = side_obj
|
||
|
||
if not isinstance(entries, list):
|
||
return None
|
||
|
||
ids: List[str] = []
|
||
for p in entries:
|
||
if isinstance(p, dict):
|
||
player_id = p.get("id") or p.get("playerId") or p.get("personId")
|
||
if player_id:
|
||
ids.append(str(player_id))
|
||
elif p:
|
||
ids.append(str(p))
|
||
return ids or None
|
||
|
||
return parse_side("home"), parse_side("away")
|