991 lines
33 KiB
Python
991 lines
33 KiB
Python
"""
|
||
Feature Extractor - V2 Betting Engine
|
||
Pulls historical team stats, ELO, missing-player impact and live odds from
|
||
PostgreSQL and engineers a leakage-free feature vector for the ensemble model.
|
||
|
||
CRITICAL: Only pre-match data (matches before the target match) is used.
|
||
Post-match stats of the target match are NEVER included.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import json
|
||
import logging
|
||
from dataclasses import dataclass, field
|
||
from typing import Any
|
||
|
||
import numpy as np
|
||
from sqlalchemy import text
|
||
from sqlalchemy.ext.asyncio import AsyncSession
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
ROLLING_WINDOW: int = 5
|
||
H2H_WINDOW: int = 10
|
||
MAX_REST_DAYS: float = 14.0
|
||
|
||
|
||
@dataclass
|
||
class MatchFeatures:
|
||
"""Structured feature vector ready for the ensemble model."""
|
||
|
||
match_id: str = ""
|
||
home_team_id: str = ""
|
||
away_team_id: str = ""
|
||
|
||
# ELO & AI features
|
||
home_elo: float = 1500.0
|
||
away_elo: float = 1500.0
|
||
elo_diff: float = 0.0
|
||
missing_players_impact: float = 0.0
|
||
home_form_score: float = 0.0
|
||
away_form_score: float = 0.0
|
||
h2h_home_win_rate: float = 0.5
|
||
h2h_sample_size: int = 0
|
||
home_rest_days: float = 7.0
|
||
away_rest_days: float = 7.0
|
||
rest_diff: float = 0.0
|
||
home_lineup_availability: float = 1.0
|
||
away_lineup_availability: float = 1.0
|
||
|
||
# Rolling averages - Home (last 5 matches)
|
||
home_avg_possession: float = 50.0
|
||
home_avg_shots_on_target: float = 4.0
|
||
home_avg_total_shots: float = 10.0
|
||
home_avg_goals_scored: float = 1.3
|
||
home_avg_goals_conceded: float = 1.1
|
||
|
||
# Rolling averages - Away (last 5 matches)
|
||
away_avg_possession: float = 50.0
|
||
away_avg_shots_on_target: float = 4.0
|
||
away_avg_total_shots: float = 10.0
|
||
away_avg_goals_scored: float = 1.3
|
||
away_avg_goals_conceded: float = 1.1
|
||
|
||
# Implied probabilities from bookmaker odds
|
||
implied_prob_home: float = 0.33
|
||
implied_prob_draw: float = 0.33
|
||
implied_prob_away: float = 0.33
|
||
implied_prob_over25: float = 0.50
|
||
implied_prob_under25: float = 0.50
|
||
implied_prob_btts_yes: float = 0.50
|
||
implied_prob_btts_no: float = 0.50
|
||
|
||
# Raw decimal odds (for Edge/Kelly calculations downstream)
|
||
odds_home: float = 2.50
|
||
odds_draw: float = 3.20
|
||
odds_away: float = 2.80
|
||
odds_over25: float = 1.90
|
||
odds_under25: float = 1.90
|
||
odds_btts_yes: float = 1.85
|
||
odds_btts_no: float = 1.95
|
||
|
||
# Data quality
|
||
data_quality_score: float = 0.5
|
||
data_quality_flags: list[str] = field(default_factory=list)
|
||
|
||
# Metadata
|
||
match_name: str = ""
|
||
home_team_name: str = ""
|
||
away_team_name: str = ""
|
||
league_id: str = ""
|
||
league_name: str = ""
|
||
referee_name: str = ""
|
||
match_date_ms: int = 0
|
||
league_avg_goals: float = 2.6
|
||
referee_avg_goals: float = 2.6
|
||
referee_home_bias: float = 0.0
|
||
home_squad_strength: float = 0.5
|
||
away_squad_strength: float = 0.5
|
||
home_key_players: float = 0.0
|
||
away_key_players: float = 0.0
|
||
|
||
def to_model_array(self) -> np.ndarray:
|
||
"""Return the 24-feature vector the ensemble expects."""
|
||
return np.array(
|
||
[
|
||
self.home_elo,
|
||
self.away_elo,
|
||
self.elo_diff,
|
||
self.missing_players_impact,
|
||
self.home_avg_possession,
|
||
self.home_avg_shots_on_target,
|
||
self.home_avg_total_shots,
|
||
self.home_avg_goals_scored,
|
||
self.home_avg_goals_conceded,
|
||
self.away_avg_possession,
|
||
self.away_avg_shots_on_target,
|
||
self.away_avg_total_shots,
|
||
self.away_avg_goals_scored,
|
||
self.away_avg_goals_conceded,
|
||
self.implied_prob_home,
|
||
self.implied_prob_draw,
|
||
self.implied_prob_away,
|
||
self.implied_prob_over25,
|
||
self.implied_prob_under25,
|
||
self.implied_prob_btts_yes,
|
||
self.implied_prob_btts_no,
|
||
self.odds_home,
|
||
self.odds_draw,
|
||
self.odds_away,
|
||
],
|
||
dtype=np.float64,
|
||
)
|
||
|
||
@staticmethod
|
||
def feature_names() -> list[str]:
|
||
return [
|
||
"home_elo", "away_elo", "elo_diff", "missing_players_impact",
|
||
"home_avg_possession", "home_avg_shots_on_target",
|
||
"home_avg_total_shots", "home_avg_goals_scored",
|
||
"home_avg_goals_conceded",
|
||
"away_avg_possession", "away_avg_shots_on_target",
|
||
"away_avg_total_shots", "away_avg_goals_scored",
|
||
"away_avg_goals_conceded",
|
||
"implied_prob_home", "implied_prob_draw", "implied_prob_away",
|
||
"implied_prob_over25", "implied_prob_under25",
|
||
"implied_prob_btts_yes", "implied_prob_btts_no",
|
||
"odds_home", "odds_draw", "odds_away",
|
||
]
|
||
|
||
|
||
async def extract_features(session: AsyncSession, match_id: str) -> MatchFeatures | None:
|
||
"""Master extraction pipeline."""
|
||
feats = MatchFeatures(match_id=match_id)
|
||
flags: list[str] = []
|
||
|
||
match_row = await _load_match_header(session, match_id)
|
||
if match_row is None:
|
||
logger.warning("Match %s not found in live_matches or matches.", match_id)
|
||
return None
|
||
|
||
feats.home_team_id = match_row["home_team_id"] or ""
|
||
feats.away_team_id = match_row["away_team_id"] or ""
|
||
feats.match_name = match_row.get("match_name", "") or ""
|
||
feats.match_date_ms = int(match_row.get("mst_utc", 0) or 0)
|
||
feats.home_team_name = match_row.get("home_name", "") or ""
|
||
feats.away_team_name = match_row.get("away_name", "") or ""
|
||
feats.league_id = match_row.get("league_id", "") or ""
|
||
feats.league_name = match_row.get("league_name", "") or ""
|
||
feats.referee_name = match_row.get("referee_name", "") or ""
|
||
|
||
if not feats.home_team_id or not feats.away_team_id:
|
||
logger.warning("Match %s missing team IDs.", match_id)
|
||
flags.append("missing_team_ids")
|
||
feats.data_quality_flags = flags
|
||
feats.data_quality_score = 0.1
|
||
return feats
|
||
|
||
ai_row = await _load_ai_features(session, match_id)
|
||
if ai_row:
|
||
feats.home_elo = float(ai_row["home_elo"] or 1500.0)
|
||
feats.away_elo = float(ai_row["away_elo"] or 1500.0)
|
||
feats.missing_players_impact = float(ai_row["missing_players_impact"] or 0.0)
|
||
feats.home_form_score = float(ai_row["home_form_score"] or 0.0)
|
||
feats.away_form_score = float(ai_row["away_form_score"] or 0.0)
|
||
if ai_row.get("h2h_home_win_rate") is not None:
|
||
feats.h2h_home_win_rate = float(ai_row["h2h_home_win_rate"])
|
||
feats.h2h_sample_size = int(ai_row.get("h2h_total") or 0)
|
||
else:
|
||
flags.append("missing_ai_features")
|
||
|
||
feats.elo_diff = feats.home_elo - feats.away_elo
|
||
|
||
home_rolling = await _rolling_team_stats(
|
||
session, feats.home_team_id, feats.match_date_ms,
|
||
)
|
||
away_rolling = await _rolling_team_stats(
|
||
session, feats.away_team_id, feats.match_date_ms,
|
||
)
|
||
|
||
if home_rolling is not None:
|
||
feats.home_avg_possession = home_rolling["avg_possession"]
|
||
feats.home_avg_shots_on_target = home_rolling["avg_shots_on_target"]
|
||
feats.home_avg_total_shots = home_rolling["avg_total_shots"]
|
||
feats.home_avg_goals_scored = home_rolling["avg_goals_scored"]
|
||
feats.home_avg_goals_conceded = home_rolling["avg_goals_conceded"]
|
||
else:
|
||
flags.append("missing_home_stats")
|
||
|
||
if away_rolling is not None:
|
||
feats.away_avg_possession = away_rolling["avg_possession"]
|
||
feats.away_avg_shots_on_target = away_rolling["avg_shots_on_target"]
|
||
feats.away_avg_total_shots = away_rolling["avg_total_shots"]
|
||
feats.away_avg_goals_scored = away_rolling["avg_goals_scored"]
|
||
feats.away_avg_goals_conceded = away_rolling["avg_goals_conceded"]
|
||
else:
|
||
flags.append("missing_away_stats")
|
||
|
||
if abs(feats.home_form_score) < 1e-6:
|
||
feats.home_form_score = round(
|
||
feats.home_avg_goals_scored - feats.home_avg_goals_conceded,
|
||
3,
|
||
)
|
||
if abs(feats.away_form_score) < 1e-6:
|
||
feats.away_form_score = round(
|
||
feats.away_avg_goals_scored - feats.away_avg_goals_conceded,
|
||
3,
|
||
)
|
||
|
||
home_rest_days = await _load_rest_days(
|
||
session, feats.home_team_id, feats.match_date_ms,
|
||
)
|
||
away_rest_days = await _load_rest_days(
|
||
session, feats.away_team_id, feats.match_date_ms,
|
||
)
|
||
if home_rest_days is not None:
|
||
feats.home_rest_days = home_rest_days
|
||
else:
|
||
flags.append("missing_home_rest")
|
||
if away_rest_days is not None:
|
||
feats.away_rest_days = away_rest_days
|
||
else:
|
||
flags.append("missing_away_rest")
|
||
feats.rest_diff = round(feats.home_rest_days - feats.away_rest_days, 3)
|
||
|
||
if feats.h2h_sample_size == 0:
|
||
h2h = await _load_h2h_stats(
|
||
session,
|
||
feats.home_team_id,
|
||
feats.away_team_id,
|
||
feats.match_date_ms,
|
||
)
|
||
if h2h is not None:
|
||
feats.h2h_home_win_rate = h2h["home_win_rate"]
|
||
feats.h2h_sample_size = h2h["sample_size"]
|
||
else:
|
||
flags.append("missing_h2h")
|
||
|
||
league_profile = await _load_league_profile(
|
||
session,
|
||
feats.league_id,
|
||
feats.match_date_ms,
|
||
)
|
||
if league_profile is not None:
|
||
feats.league_avg_goals = league_profile["avg_goals"]
|
||
else:
|
||
flags.append("missing_league_profile")
|
||
|
||
referee_profile = await _load_referee_profile(
|
||
session,
|
||
feats.referee_name,
|
||
feats.match_date_ms,
|
||
)
|
||
if referee_profile is not None:
|
||
feats.referee_avg_goals = referee_profile["avg_goals"]
|
||
feats.referee_home_bias = referee_profile["home_bias"]
|
||
else:
|
||
flags.append("missing_referee_profile")
|
||
|
||
home_squad = await _load_team_squad_profile(
|
||
session,
|
||
feats.home_team_id,
|
||
feats.match_date_ms,
|
||
)
|
||
away_squad = await _load_team_squad_profile(
|
||
session,
|
||
feats.away_team_id,
|
||
feats.match_date_ms,
|
||
)
|
||
if home_squad is not None:
|
||
feats.home_squad_strength = home_squad["squad_strength"]
|
||
feats.home_key_players = home_squad["key_players"]
|
||
else:
|
||
flags.append("missing_home_squad_profile")
|
||
if away_squad is not None:
|
||
feats.away_squad_strength = away_squad["squad_strength"]
|
||
feats.away_key_players = away_squad["key_players"]
|
||
else:
|
||
flags.append("missing_away_squad_profile")
|
||
|
||
lineup_info = _extract_lineup_context(match_row)
|
||
feats.home_lineup_availability = lineup_info["home_availability"]
|
||
feats.away_lineup_availability = lineup_info["away_availability"]
|
||
if lineup_info["has_real_lineup_data"]:
|
||
feats.missing_players_impact = max(
|
||
feats.missing_players_impact,
|
||
round(
|
||
(
|
||
(1.0 - feats.home_lineup_availability)
|
||
+ (1.0 - feats.away_lineup_availability)
|
||
) / 2.0,
|
||
4,
|
||
),
|
||
)
|
||
else:
|
||
flags.append("missing_lineup_context")
|
||
|
||
odds_ok = await _extract_odds(session, match_id, feats)
|
||
if not odds_ok:
|
||
flags.append("missing_odds")
|
||
|
||
quality = 1.0
|
||
penalty_map = {
|
||
"missing_team_ids": 0.5,
|
||
"missing_ai_features": 0.05,
|
||
"missing_home_stats": 0.15,
|
||
"missing_away_stats": 0.15,
|
||
"missing_home_rest": 0.05,
|
||
"missing_away_rest": 0.05,
|
||
"missing_h2h": 0.05,
|
||
"missing_league_profile": 0.04,
|
||
"missing_referee_profile": 0.04,
|
||
"missing_home_squad_profile": 0.06,
|
||
"missing_away_squad_profile": 0.06,
|
||
"missing_lineup_context": 0.05,
|
||
"missing_odds": 0.2,
|
||
}
|
||
for flag in flags:
|
||
quality -= penalty_map.get(flag, 0.05)
|
||
feats.data_quality_score = max(0.0, round(quality, 2))
|
||
feats.data_quality_flags = flags
|
||
|
||
return feats
|
||
|
||
|
||
async def _load_match_header(
|
||
session: AsyncSession, match_id: str,
|
||
) -> dict[str, Any] | None:
|
||
"""Try live_matches first, then matches table."""
|
||
table_queries = {
|
||
"live_matches": """
|
||
SELECT
|
||
m.id,
|
||
m.home_team_id,
|
||
m.away_team_id,
|
||
m.match_name,
|
||
m.mst_utc,
|
||
m.sport,
|
||
m.league_id,
|
||
m.referee_name,
|
||
m.lineups,
|
||
m.sidelined,
|
||
ht.name AS home_name,
|
||
at.name AS away_name,
|
||
l.name AS league_name
|
||
FROM live_matches m
|
||
LEFT JOIN teams ht ON ht.id = m.home_team_id
|
||
LEFT JOIN teams at ON at.id = m.away_team_id
|
||
LEFT JOIN leagues l ON l.id = m.league_id
|
||
WHERE m.id = :match_id
|
||
LIMIT 1
|
||
""",
|
||
"matches": """
|
||
SELECT
|
||
m.id,
|
||
m.home_team_id,
|
||
m.away_team_id,
|
||
m.match_name,
|
||
m.mst_utc,
|
||
m.sport,
|
||
m.league_id,
|
||
ref.name AS referee_name,
|
||
NULL AS lineups,
|
||
NULL AS sidelined,
|
||
ht.name AS home_name,
|
||
at.name AS away_name,
|
||
l.name AS league_name
|
||
FROM matches m
|
||
LEFT JOIN teams ht ON ht.id = m.home_team_id
|
||
LEFT JOIN teams at ON at.id = m.away_team_id
|
||
LEFT JOIN leagues l ON l.id = m.league_id
|
||
LEFT JOIN match_officials ref ON ref.match_id = m.id AND ref.role_id = 1
|
||
WHERE m.id = :match_id
|
||
LIMIT 1
|
||
""",
|
||
}
|
||
for table in ("live_matches", "matches"):
|
||
query = text(table_queries[table])
|
||
result = await session.execute(query, {"match_id": match_id})
|
||
row = result.mappings().first()
|
||
if row:
|
||
return dict(row)
|
||
return None
|
||
|
||
|
||
async def _load_ai_features(
|
||
session: AsyncSession, match_id: str,
|
||
) -> dict[str, Any] | None:
|
||
query = text("""
|
||
SELECT
|
||
home_elo,
|
||
away_elo,
|
||
missing_players_impact,
|
||
home_form_score,
|
||
away_form_score,
|
||
h2h_home_win_rate,
|
||
h2h_total
|
||
FROM football_ai_features
|
||
WHERE match_id = :match_id
|
||
LIMIT 1
|
||
""")
|
||
result = await session.execute(query, {"match_id": match_id})
|
||
row = result.mappings().first()
|
||
return dict(row) if row else None
|
||
|
||
|
||
async def _rolling_team_stats(
|
||
session: AsyncSession,
|
||
team_id: str,
|
||
before_mst_utc: int,
|
||
) -> dict[str, float] | None:
|
||
"""Calculate rolling averages from the team's last N finished matches."""
|
||
query = text("""
|
||
WITH recent AS (
|
||
SELECT
|
||
m.id AS match_id,
|
||
m.home_team_id,
|
||
m.away_team_id,
|
||
m.score_home,
|
||
m.score_away,
|
||
ts.possession_percentage,
|
||
ts.shots_on_target,
|
||
ts.total_shots
|
||
FROM matches m
|
||
JOIN football_team_stats ts ON ts.match_id = m.id AND ts.team_id = :team_id
|
||
WHERE (m.home_team_id = :team_id OR m.away_team_id = :team_id)
|
||
AND m.mst_utc < :before_ts
|
||
AND m.sport = 'football'
|
||
AND m.score_home IS NOT NULL
|
||
AND m.score_away IS NOT NULL
|
||
ORDER BY m.mst_utc DESC
|
||
LIMIT :window
|
||
)
|
||
SELECT
|
||
COALESCE(AVG(possession_percentage), 50.0) AS avg_possession,
|
||
COALESCE(AVG(shots_on_target), 4.0) AS avg_shots_on_target,
|
||
COALESCE(AVG(total_shots), 10.0) AS avg_total_shots,
|
||
COALESCE(AVG(
|
||
CASE
|
||
WHEN home_team_id = :team_id THEN score_home
|
||
ELSE score_away
|
||
END
|
||
), 1.3) AS avg_goals_scored,
|
||
COALESCE(AVG(
|
||
CASE
|
||
WHEN home_team_id = :team_id THEN score_away
|
||
ELSE score_home
|
||
END
|
||
), 1.1) AS avg_goals_conceded,
|
||
COUNT(*) AS match_count
|
||
FROM recent
|
||
""")
|
||
result = await session.execute(
|
||
query,
|
||
{"team_id": team_id, "before_ts": before_mst_utc, "window": ROLLING_WINDOW},
|
||
)
|
||
row = result.mappings().first()
|
||
if row is None or int(row["match_count"]) == 0:
|
||
return None
|
||
return {
|
||
"avg_possession": round(float(row["avg_possession"]), 2),
|
||
"avg_shots_on_target": round(float(row["avg_shots_on_target"]), 2),
|
||
"avg_total_shots": round(float(row["avg_total_shots"]), 2),
|
||
"avg_goals_scored": round(float(row["avg_goals_scored"]), 2),
|
||
"avg_goals_conceded": round(float(row["avg_goals_conceded"]), 2),
|
||
}
|
||
|
||
|
||
async def _load_rest_days(
|
||
session: AsyncSession,
|
||
team_id: str,
|
||
before_mst_utc: int,
|
||
) -> float | None:
|
||
query = text("""
|
||
SELECT m.mst_utc
|
||
FROM matches m
|
||
WHERE (m.home_team_id = :team_id OR m.away_team_id = :team_id)
|
||
AND m.mst_utc < :before_ts
|
||
AND m.sport = 'football'
|
||
ORDER BY m.mst_utc DESC
|
||
LIMIT 1
|
||
""")
|
||
result = await session.execute(
|
||
query,
|
||
{"team_id": team_id, "before_ts": before_mst_utc},
|
||
)
|
||
last_match_ts = result.scalar_one_or_none()
|
||
if last_match_ts is None:
|
||
return None
|
||
|
||
rest_days = max(0.0, (float(before_mst_utc) - float(last_match_ts)) / 86400000.0)
|
||
return round(min(rest_days, MAX_REST_DAYS), 3)
|
||
|
||
|
||
async def _load_h2h_stats(
|
||
session: AsyncSession,
|
||
home_team_id: str,
|
||
away_team_id: str,
|
||
before_mst_utc: int,
|
||
) -> dict[str, float | int] | None:
|
||
query = text("""
|
||
SELECT
|
||
m.home_team_id,
|
||
m.away_team_id,
|
||
m.score_home,
|
||
m.score_away
|
||
FROM matches m
|
||
WHERE m.sport = 'football'
|
||
AND m.mst_utc < :before_ts
|
||
AND m.score_home IS NOT NULL
|
||
AND m.score_away IS NOT NULL
|
||
AND (
|
||
(m.home_team_id = :home_team_id AND m.away_team_id = :away_team_id)
|
||
OR
|
||
(m.home_team_id = :away_team_id AND m.away_team_id = :home_team_id)
|
||
)
|
||
ORDER BY m.mst_utc DESC
|
||
LIMIT :window
|
||
""")
|
||
result = await session.execute(
|
||
query,
|
||
{
|
||
"home_team_id": home_team_id,
|
||
"away_team_id": away_team_id,
|
||
"before_ts": before_mst_utc,
|
||
"window": H2H_WINDOW,
|
||
},
|
||
)
|
||
rows = result.mappings().all()
|
||
if not rows:
|
||
return None
|
||
|
||
home_wins = 0.0
|
||
draws = 0.0
|
||
sample_size = 0
|
||
for row in rows:
|
||
score_home = row["score_home"]
|
||
score_away = row["score_away"]
|
||
if score_home is None or score_away is None:
|
||
continue
|
||
sample_size += 1
|
||
row_home_team_id = row["home_team_id"]
|
||
row_away_team_id = row["away_team_id"]
|
||
|
||
current_home_score = float(score_home) if row_home_team_id == home_team_id else float(score_away)
|
||
current_away_score = float(score_away) if row_home_team_id == home_team_id else float(score_home)
|
||
|
||
if current_home_score > current_away_score:
|
||
home_wins += 1.0
|
||
elif current_home_score == current_away_score:
|
||
draws += 1.0
|
||
|
||
if sample_size == 0:
|
||
return None
|
||
|
||
# Count draws as a half-win signal instead of throwing them away.
|
||
home_win_rate = round((home_wins + draws * 0.5) / sample_size, 4)
|
||
return {
|
||
"home_win_rate": home_win_rate,
|
||
"sample_size": sample_size,
|
||
}
|
||
|
||
|
||
async def _load_league_profile(
|
||
session: AsyncSession,
|
||
league_id: str,
|
||
before_mst_utc: int,
|
||
) -> dict[str, float] | None:
|
||
if not league_id:
|
||
return None
|
||
|
||
query = text("""
|
||
SELECT
|
||
COALESCE(AVG(m.score_home + m.score_away), 2.6) AS avg_goals,
|
||
COUNT(*) AS match_count
|
||
FROM (
|
||
SELECT score_home, score_away
|
||
FROM matches
|
||
WHERE league_id = :league_id
|
||
AND sport = 'football'
|
||
AND status = 'FT'
|
||
AND score_home IS NOT NULL
|
||
AND score_away IS NOT NULL
|
||
AND mst_utc < :before_ts
|
||
ORDER BY mst_utc DESC
|
||
LIMIT 100
|
||
) m
|
||
""")
|
||
result = await session.execute(
|
||
query,
|
||
{"league_id": league_id, "before_ts": before_mst_utc},
|
||
)
|
||
row = result.mappings().first()
|
||
if row is None or int(row["match_count"] or 0) == 0:
|
||
return None
|
||
return {"avg_goals": round(float(row["avg_goals"]), 3)}
|
||
|
||
|
||
async def _load_referee_profile(
|
||
session: AsyncSession,
|
||
referee_name: str,
|
||
before_mst_utc: int,
|
||
) -> dict[str, float] | None:
|
||
if not referee_name:
|
||
return None
|
||
|
||
query = text("""
|
||
SELECT
|
||
COALESCE(AVG(CASE WHEN score_home > score_away THEN 1.0 ELSE 0.0 END), 0.46) - 0.46 AS home_bias,
|
||
COALESCE(AVG(score_home + score_away), 2.6) AS avg_goals,
|
||
COUNT(*) AS match_count
|
||
FROM (
|
||
SELECT m.score_home, m.score_away
|
||
FROM match_officials mo
|
||
JOIN matches m ON m.id = mo.match_id
|
||
WHERE mo.name = :referee_name
|
||
AND mo.role_id = 1
|
||
AND m.sport = 'football'
|
||
AND m.status = 'FT'
|
||
AND m.score_home IS NOT NULL
|
||
AND m.score_away IS NOT NULL
|
||
AND m.mst_utc < :before_ts
|
||
ORDER BY m.mst_utc DESC
|
||
LIMIT 30
|
||
) ref_matches
|
||
""")
|
||
result = await session.execute(
|
||
query,
|
||
{"referee_name": referee_name, "before_ts": before_mst_utc},
|
||
)
|
||
row = result.mappings().first()
|
||
if row is None or int(row["match_count"] or 0) == 0:
|
||
return None
|
||
return {
|
||
"home_bias": round(float(row["home_bias"]), 4),
|
||
"avg_goals": round(float(row["avg_goals"]), 3),
|
||
}
|
||
|
||
|
||
async def _load_team_squad_profile(
|
||
session: AsyncSession,
|
||
team_id: str,
|
||
before_mst_utc: int,
|
||
) -> dict[str, float] | None:
|
||
if not team_id:
|
||
return None
|
||
|
||
query = text("""
|
||
WITH recent_matches AS (
|
||
SELECT m.id, m.mst_utc
|
||
FROM matches m
|
||
WHERE (m.home_team_id = :team_id OR m.away_team_id = :team_id)
|
||
AND m.sport = 'football'
|
||
AND m.status = 'FT'
|
||
AND m.mst_utc < :before_ts
|
||
ORDER BY m.mst_utc DESC
|
||
LIMIT 8
|
||
),
|
||
player_base AS (
|
||
SELECT
|
||
mpp.player_id,
|
||
COUNT(*)::float AS appearances,
|
||
COUNT(*) FILTER (WHERE mpp.is_starting = true)::float AS starts
|
||
FROM match_player_participation mpp
|
||
JOIN recent_matches rm ON rm.id = mpp.match_id
|
||
WHERE mpp.team_id = :team_id
|
||
GROUP BY mpp.player_id
|
||
),
|
||
player_goals AS (
|
||
SELECT
|
||
mpe.player_id,
|
||
COUNT(*) FILTER (
|
||
WHERE mpe.event_type = 'goal'
|
||
AND COALESCE(mpe.event_subtype, '') NOT ILIKE '%penaltı kaçırma%'
|
||
)::float AS goals,
|
||
0.0::float AS assists
|
||
FROM match_player_events mpe
|
||
JOIN recent_matches rm ON rm.id = mpe.match_id
|
||
WHERE mpe.team_id = :team_id
|
||
GROUP BY mpe.player_id
|
||
UNION ALL
|
||
SELECT
|
||
mpe.assist_player_id AS player_id,
|
||
0.0::float AS goals,
|
||
COUNT(*) FILTER (
|
||
WHERE mpe.event_type = 'goal'
|
||
AND mpe.assist_player_id IS NOT NULL
|
||
)::float AS assists
|
||
FROM match_player_events mpe
|
||
JOIN recent_matches rm ON rm.id = mpe.match_id
|
||
WHERE mpe.team_id = :team_id
|
||
AND mpe.assist_player_id IS NOT NULL
|
||
GROUP BY mpe.assist_player_id
|
||
),
|
||
player_events AS (
|
||
SELECT
|
||
player_id,
|
||
SUM(goals) AS goals,
|
||
SUM(assists) AS assists
|
||
FROM player_goals
|
||
GROUP BY player_id
|
||
),
|
||
player_scores AS (
|
||
SELECT
|
||
pb.player_id,
|
||
(pb.starts * 1.5)
|
||
+ ((pb.appearances - pb.starts) * 0.5)
|
||
+ (COALESCE(pe.goals, 0.0) * 2.5)
|
||
+ (COALESCE(pe.assists, 0.0) * 1.5) AS score
|
||
FROM player_base pb
|
||
LEFT JOIN player_events pe ON pe.player_id = pb.player_id
|
||
)
|
||
SELECT
|
||
COALESCE(AVG(top_players.score), 0.0) AS avg_top_score,
|
||
COALESCE(COUNT(*) FILTER (WHERE top_players.score >= 6.0), 0) AS key_players,
|
||
COALESCE((SELECT COUNT(*) FROM recent_matches), 0) AS match_count
|
||
FROM (
|
||
SELECT score
|
||
FROM player_scores
|
||
ORDER BY score DESC
|
||
LIMIT 11
|
||
) top_players
|
||
""")
|
||
result = await session.execute(
|
||
query,
|
||
{"team_id": team_id, "before_ts": before_mst_utc},
|
||
)
|
||
row = result.mappings().first()
|
||
if row is None or int(row["match_count"] or 0) == 0:
|
||
return None
|
||
|
||
avg_top_score = float(row["avg_top_score"] or 0.0)
|
||
return {
|
||
"squad_strength": round(min(max(avg_top_score / 10.0, 0.0), 1.0), 4),
|
||
"key_players": float(row["key_players"] or 0),
|
||
}
|
||
|
||
|
||
def _safe_json(value: Any) -> dict[str, Any] | None:
|
||
if value is None:
|
||
return None
|
||
if isinstance(value, dict):
|
||
return value
|
||
if isinstance(value, str):
|
||
try:
|
||
parsed = json.loads(value)
|
||
except (TypeError, json.JSONDecodeError):
|
||
return None
|
||
return parsed if isinstance(parsed, dict) else None
|
||
return None
|
||
|
||
|
||
def _safe_list(value: Any) -> list[Any]:
|
||
if isinstance(value, list):
|
||
return value
|
||
return []
|
||
|
||
|
||
def _extract_lineup_context(match_row: dict[str, Any]) -> dict[str, float | bool]:
|
||
lineups = _safe_json(match_row.get("lineups"))
|
||
sidelined = _safe_json(match_row.get("sidelined"))
|
||
|
||
home_xi_count = 0
|
||
away_xi_count = 0
|
||
home_sidelined_count = 0
|
||
away_sidelined_count = 0
|
||
|
||
if lineups:
|
||
home_xi_count = len(_safe_list(lineups.get("home", {}).get("xi")))
|
||
away_xi_count = len(_safe_list(lineups.get("away", {}).get("xi")))
|
||
|
||
if sidelined:
|
||
home_team = sidelined.get("homeTeam", {})
|
||
away_team = sidelined.get("awayTeam", {})
|
||
home_sidelined_count = max(
|
||
int(home_team.get("totalSidelined") or 0),
|
||
len(_safe_list(home_team.get("players"))),
|
||
)
|
||
away_sidelined_count = max(
|
||
int(away_team.get("totalSidelined") or 0),
|
||
len(_safe_list(away_team.get("players"))),
|
||
)
|
||
|
||
has_real_lineup_data = any(
|
||
value > 0
|
||
for value in (
|
||
home_xi_count,
|
||
away_xi_count,
|
||
home_sidelined_count,
|
||
away_sidelined_count,
|
||
)
|
||
)
|
||
|
||
home_availability = _compute_availability(home_xi_count, home_sidelined_count)
|
||
away_availability = _compute_availability(away_xi_count, away_sidelined_count)
|
||
|
||
return {
|
||
"home_availability": home_availability,
|
||
"away_availability": away_availability,
|
||
"has_real_lineup_data": has_real_lineup_data,
|
||
}
|
||
|
||
|
||
def _compute_availability(xi_count: int, sidelined_count: int) -> float:
|
||
xi_ratio = min(max(xi_count / 11.0, 0.0), 1.0) if xi_count > 0 else 1.0
|
||
sidelined_penalty = min(max(sidelined_count / 11.0, 0.0), 1.0) * 0.35
|
||
return round(min(max(xi_ratio - sidelined_penalty, 0.0), 1.0), 4)
|
||
|
||
|
||
def _safe_odd(val: Any) -> float:
|
||
"""Parse an odds value that might be str, float, int, or None."""
|
||
if val is None:
|
||
return 0.0
|
||
try:
|
||
parsed = float(val)
|
||
return parsed if parsed > 1.0 else 0.0
|
||
except (ValueError, TypeError):
|
||
return 0.0
|
||
|
||
|
||
def _implied_prob(decimal_odd: float) -> float:
|
||
"""Convert decimal odds to implied probability, clamped [0, 1]."""
|
||
if decimal_odd <= 1.0:
|
||
return 0.0
|
||
return min(1.0, 1.0 / decimal_odd)
|
||
|
||
|
||
async def _extract_odds(
|
||
session: AsyncSession,
|
||
match_id: str,
|
||
feats: MatchFeatures,
|
||
) -> bool:
|
||
"""Extract odds from live JSON first, then relational tables."""
|
||
found = False
|
||
|
||
odds_json = await _load_live_odds_json(session, match_id)
|
||
if odds_json:
|
||
found = _parse_odds_json(odds_json, feats)
|
||
|
||
if not found:
|
||
found = await _load_relational_odds(session, match_id, feats)
|
||
|
||
if found:
|
||
feats.implied_prob_home = round(_implied_prob(feats.odds_home), 4)
|
||
feats.implied_prob_draw = round(_implied_prob(feats.odds_draw), 4)
|
||
feats.implied_prob_away = round(_implied_prob(feats.odds_away), 4)
|
||
feats.implied_prob_over25 = round(_implied_prob(feats.odds_over25), 4)
|
||
feats.implied_prob_under25 = round(_implied_prob(feats.odds_under25), 4)
|
||
feats.implied_prob_btts_yes = round(_implied_prob(feats.odds_btts_yes), 4)
|
||
feats.implied_prob_btts_no = round(_implied_prob(feats.odds_btts_no), 4)
|
||
|
||
return found
|
||
|
||
|
||
async def _load_live_odds_json(
|
||
session: AsyncSession, match_id: str,
|
||
) -> dict[str, Any] | None:
|
||
query = text("SELECT odds FROM live_matches WHERE id = :mid AND odds IS NOT NULL")
|
||
result = await session.execute(query, {"mid": match_id})
|
||
row = result.scalar_one_or_none()
|
||
if row is None:
|
||
return None
|
||
if isinstance(row, str):
|
||
try:
|
||
parsed = json.loads(row)
|
||
except (json.JSONDecodeError, TypeError):
|
||
return None
|
||
return parsed if isinstance(parsed, (dict, list)) else None
|
||
if isinstance(row, (dict, list)):
|
||
return row
|
||
return None
|
||
|
||
|
||
def _parse_odds_json(odds_blob: dict[str, Any] | list[Any], feats: MatchFeatures) -> bool:
|
||
"""Parse the Mackolik-style odds JSON structure."""
|
||
found_any = False
|
||
categories: list[dict[str, Any]] = []
|
||
if isinstance(odds_blob, list):
|
||
categories = [item for item in odds_blob if isinstance(item, dict)]
|
||
elif isinstance(odds_blob, dict):
|
||
raw_categories = odds_blob.get("categories", odds_blob.get("odds", []))
|
||
if isinstance(raw_categories, dict):
|
||
categories = [item for item in raw_categories.values() if isinstance(item, dict)]
|
||
elif isinstance(raw_categories, list):
|
||
categories = [item for item in raw_categories if isinstance(item, dict)]
|
||
|
||
for cat in categories:
|
||
cat_name = (cat.get("name") or cat.get("cn") or "").strip().lower()
|
||
selections = cat.get("selections") or cat.get("s") or []
|
||
|
||
if cat_name in ("mac sonucu", "match result", "1x2", "maç sonucu"):
|
||
sels = _selections_to_map(selections)
|
||
feats.odds_home = _safe_odd(sels.get("1")) or feats.odds_home
|
||
feats.odds_draw = _safe_odd(sels.get("x")) or feats.odds_draw
|
||
feats.odds_away = _safe_odd(sels.get("2")) or feats.odds_away
|
||
found_any = True
|
||
|
||
elif cat_name in ("2,5 alt/ust", "over/under 2.5", "2.5 alt/ust", "2,5 alt/üst", "2.5 alt/üst"):
|
||
sels = _selections_to_map(selections)
|
||
feats.odds_over25 = _safe_odd(sels.get("ust") or sels.get("over") or sels.get("üst")) or feats.odds_over25
|
||
feats.odds_under25 = _safe_odd(sels.get("alt") or sels.get("under")) or feats.odds_under25
|
||
found_any = True
|
||
|
||
elif cat_name in ("karsilikli gol", "both teams to score", "btts", "karşılıklı gol"):
|
||
sels = _selections_to_map(selections)
|
||
feats.odds_btts_yes = _safe_odd(sels.get("var") or sels.get("yes")) or feats.odds_btts_yes
|
||
feats.odds_btts_no = _safe_odd(sels.get("yok") or sels.get("no")) or feats.odds_btts_no
|
||
found_any = True
|
||
|
||
return found_any
|
||
|
||
|
||
def _selections_to_map(selections: list[Any] | dict[str, Any]) -> dict[str, Any]:
|
||
"""Normalize varied selection structures into {name_lower: odd_value}."""
|
||
result: dict[str, Any] = {}
|
||
if isinstance(selections, dict):
|
||
for key, value in selections.items():
|
||
result[str(key).strip().lower()] = value
|
||
elif isinstance(selections, list):
|
||
for sel in selections:
|
||
if isinstance(sel, dict):
|
||
name = (sel.get("name") or sel.get("n") or "").strip().lower()
|
||
value = sel.get("odd_value") or sel.get("ov") or sel.get("v")
|
||
if name:
|
||
result[name] = value
|
||
return result
|
||
|
||
|
||
async def _load_relational_odds(
|
||
session: AsyncSession, match_id: str, feats: MatchFeatures,
|
||
) -> bool:
|
||
"""Fallback: load odds from odd_categories + odd_selections."""
|
||
query = text("""
|
||
SELECT oc.name AS cat_name, os.name AS sel_name, os.odd_value
|
||
FROM odd_categories oc
|
||
JOIN odd_selections os ON os.odd_category_db_id = oc.db_id
|
||
WHERE oc.match_id = :match_id
|
||
AND oc.name IN ('Maç Sonucu', '2,5 Alt/Üst', 'Karşılıklı Gol')
|
||
""")
|
||
result = await session.execute(query, {"match_id": match_id})
|
||
rows = result.mappings().all()
|
||
if not rows:
|
||
return False
|
||
|
||
for row in rows:
|
||
cat = (row["cat_name"] or "").strip()
|
||
sel = (row["sel_name"] or "").strip().lower()
|
||
value = _safe_odd(row["odd_value"])
|
||
if value <= 1.0:
|
||
continue
|
||
|
||
if cat == "Maç Sonucu":
|
||
if sel == "1":
|
||
feats.odds_home = value
|
||
elif sel == "x":
|
||
feats.odds_draw = value
|
||
elif sel == "2":
|
||
feats.odds_away = value
|
||
elif cat == "2,5 Alt/Üst":
|
||
if sel in ("üst", "ust", "over"):
|
||
feats.odds_over25 = value
|
||
elif sel in ("alt", "under"):
|
||
feats.odds_under25 = value
|
||
elif cat == "Karşılıklı Gol":
|
||
if sel in ("var", "yes"):
|
||
feats.odds_btts_yes = value
|
||
elif sel in ("yok", "no"):
|
||
feats.odds_btts_no = value
|
||
|
||
return True
|