first (part 2: other directories)
Deploy Iddaai Backend / build-and-deploy (push) Failing after 18s

This commit is contained in:
2026-04-16 15:11:25 +03:00
parent 7814e0bc6b
commit 2f0b85a0c7
203 changed files with 59989 additions and 0 deletions
+990
View File
@@ -0,0 +1,990 @@
"""
Feature Extractor - V2 Betting Engine
Pulls historical team stats, ELO, missing-player impact and live odds from
PostgreSQL and engineers a leakage-free feature vector for the ensemble model.
CRITICAL: Only pre-match data (matches before the target match) is used.
Post-match stats of the target match are NEVER included.
"""
from __future__ import annotations
import json
import logging
from dataclasses import dataclass, field
from typing import Any
import numpy as np
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession
logger = logging.getLogger(__name__)
ROLLING_WINDOW: int = 5
H2H_WINDOW: int = 10
MAX_REST_DAYS: float = 14.0
@dataclass
class MatchFeatures:
"""Structured feature vector ready for the ensemble model."""
match_id: str = ""
home_team_id: str = ""
away_team_id: str = ""
# ELO & AI features
home_elo: float = 1500.0
away_elo: float = 1500.0
elo_diff: float = 0.0
missing_players_impact: float = 0.0
home_form_score: float = 0.0
away_form_score: float = 0.0
h2h_home_win_rate: float = 0.5
h2h_sample_size: int = 0
home_rest_days: float = 7.0
away_rest_days: float = 7.0
rest_diff: float = 0.0
home_lineup_availability: float = 1.0
away_lineup_availability: float = 1.0
# Rolling averages - Home (last 5 matches)
home_avg_possession: float = 50.0
home_avg_shots_on_target: float = 4.0
home_avg_total_shots: float = 10.0
home_avg_goals_scored: float = 1.3
home_avg_goals_conceded: float = 1.1
# Rolling averages - Away (last 5 matches)
away_avg_possession: float = 50.0
away_avg_shots_on_target: float = 4.0
away_avg_total_shots: float = 10.0
away_avg_goals_scored: float = 1.3
away_avg_goals_conceded: float = 1.1
# Implied probabilities from bookmaker odds
implied_prob_home: float = 0.33
implied_prob_draw: float = 0.33
implied_prob_away: float = 0.33
implied_prob_over25: float = 0.50
implied_prob_under25: float = 0.50
implied_prob_btts_yes: float = 0.50
implied_prob_btts_no: float = 0.50
# Raw decimal odds (for Edge/Kelly calculations downstream)
odds_home: float = 2.50
odds_draw: float = 3.20
odds_away: float = 2.80
odds_over25: float = 1.90
odds_under25: float = 1.90
odds_btts_yes: float = 1.85
odds_btts_no: float = 1.95
# Data quality
data_quality_score: float = 0.5
data_quality_flags: list[str] = field(default_factory=list)
# Metadata
match_name: str = ""
home_team_name: str = ""
away_team_name: str = ""
league_id: str = ""
league_name: str = ""
referee_name: str = ""
match_date_ms: int = 0
league_avg_goals: float = 2.6
referee_avg_goals: float = 2.6
referee_home_bias: float = 0.0
home_squad_strength: float = 0.5
away_squad_strength: float = 0.5
home_key_players: float = 0.0
away_key_players: float = 0.0
def to_model_array(self) -> np.ndarray:
"""Return the 24-feature vector the ensemble expects."""
return np.array(
[
self.home_elo,
self.away_elo,
self.elo_diff,
self.missing_players_impact,
self.home_avg_possession,
self.home_avg_shots_on_target,
self.home_avg_total_shots,
self.home_avg_goals_scored,
self.home_avg_goals_conceded,
self.away_avg_possession,
self.away_avg_shots_on_target,
self.away_avg_total_shots,
self.away_avg_goals_scored,
self.away_avg_goals_conceded,
self.implied_prob_home,
self.implied_prob_draw,
self.implied_prob_away,
self.implied_prob_over25,
self.implied_prob_under25,
self.implied_prob_btts_yes,
self.implied_prob_btts_no,
self.odds_home,
self.odds_draw,
self.odds_away,
],
dtype=np.float64,
)
@staticmethod
def feature_names() -> list[str]:
return [
"home_elo", "away_elo", "elo_diff", "missing_players_impact",
"home_avg_possession", "home_avg_shots_on_target",
"home_avg_total_shots", "home_avg_goals_scored",
"home_avg_goals_conceded",
"away_avg_possession", "away_avg_shots_on_target",
"away_avg_total_shots", "away_avg_goals_scored",
"away_avg_goals_conceded",
"implied_prob_home", "implied_prob_draw", "implied_prob_away",
"implied_prob_over25", "implied_prob_under25",
"implied_prob_btts_yes", "implied_prob_btts_no",
"odds_home", "odds_draw", "odds_away",
]
async def extract_features(session: AsyncSession, match_id: str) -> MatchFeatures | None:
"""Master extraction pipeline."""
feats = MatchFeatures(match_id=match_id)
flags: list[str] = []
match_row = await _load_match_header(session, match_id)
if match_row is None:
logger.warning("Match %s not found in live_matches or matches.", match_id)
return None
feats.home_team_id = match_row["home_team_id"] or ""
feats.away_team_id = match_row["away_team_id"] or ""
feats.match_name = match_row.get("match_name", "") or ""
feats.match_date_ms = int(match_row.get("mst_utc", 0) or 0)
feats.home_team_name = match_row.get("home_name", "") or ""
feats.away_team_name = match_row.get("away_name", "") or ""
feats.league_id = match_row.get("league_id", "") or ""
feats.league_name = match_row.get("league_name", "") or ""
feats.referee_name = match_row.get("referee_name", "") or ""
if not feats.home_team_id or not feats.away_team_id:
logger.warning("Match %s missing team IDs.", match_id)
flags.append("missing_team_ids")
feats.data_quality_flags = flags
feats.data_quality_score = 0.1
return feats
ai_row = await _load_ai_features(session, match_id)
if ai_row:
feats.home_elo = float(ai_row["home_elo"] or 1500.0)
feats.away_elo = float(ai_row["away_elo"] or 1500.0)
feats.missing_players_impact = float(ai_row["missing_players_impact"] or 0.0)
feats.home_form_score = float(ai_row["home_form_score"] or 0.0)
feats.away_form_score = float(ai_row["away_form_score"] or 0.0)
if ai_row.get("h2h_home_win_rate") is not None:
feats.h2h_home_win_rate = float(ai_row["h2h_home_win_rate"])
feats.h2h_sample_size = int(ai_row.get("h2h_total") or 0)
else:
flags.append("missing_ai_features")
feats.elo_diff = feats.home_elo - feats.away_elo
home_rolling = await _rolling_team_stats(
session, feats.home_team_id, feats.match_date_ms,
)
away_rolling = await _rolling_team_stats(
session, feats.away_team_id, feats.match_date_ms,
)
if home_rolling is not None:
feats.home_avg_possession = home_rolling["avg_possession"]
feats.home_avg_shots_on_target = home_rolling["avg_shots_on_target"]
feats.home_avg_total_shots = home_rolling["avg_total_shots"]
feats.home_avg_goals_scored = home_rolling["avg_goals_scored"]
feats.home_avg_goals_conceded = home_rolling["avg_goals_conceded"]
else:
flags.append("missing_home_stats")
if away_rolling is not None:
feats.away_avg_possession = away_rolling["avg_possession"]
feats.away_avg_shots_on_target = away_rolling["avg_shots_on_target"]
feats.away_avg_total_shots = away_rolling["avg_total_shots"]
feats.away_avg_goals_scored = away_rolling["avg_goals_scored"]
feats.away_avg_goals_conceded = away_rolling["avg_goals_conceded"]
else:
flags.append("missing_away_stats")
if abs(feats.home_form_score) < 1e-6:
feats.home_form_score = round(
feats.home_avg_goals_scored - feats.home_avg_goals_conceded,
3,
)
if abs(feats.away_form_score) < 1e-6:
feats.away_form_score = round(
feats.away_avg_goals_scored - feats.away_avg_goals_conceded,
3,
)
home_rest_days = await _load_rest_days(
session, feats.home_team_id, feats.match_date_ms,
)
away_rest_days = await _load_rest_days(
session, feats.away_team_id, feats.match_date_ms,
)
if home_rest_days is not None:
feats.home_rest_days = home_rest_days
else:
flags.append("missing_home_rest")
if away_rest_days is not None:
feats.away_rest_days = away_rest_days
else:
flags.append("missing_away_rest")
feats.rest_diff = round(feats.home_rest_days - feats.away_rest_days, 3)
if feats.h2h_sample_size == 0:
h2h = await _load_h2h_stats(
session,
feats.home_team_id,
feats.away_team_id,
feats.match_date_ms,
)
if h2h is not None:
feats.h2h_home_win_rate = h2h["home_win_rate"]
feats.h2h_sample_size = h2h["sample_size"]
else:
flags.append("missing_h2h")
league_profile = await _load_league_profile(
session,
feats.league_id,
feats.match_date_ms,
)
if league_profile is not None:
feats.league_avg_goals = league_profile["avg_goals"]
else:
flags.append("missing_league_profile")
referee_profile = await _load_referee_profile(
session,
feats.referee_name,
feats.match_date_ms,
)
if referee_profile is not None:
feats.referee_avg_goals = referee_profile["avg_goals"]
feats.referee_home_bias = referee_profile["home_bias"]
else:
flags.append("missing_referee_profile")
home_squad = await _load_team_squad_profile(
session,
feats.home_team_id,
feats.match_date_ms,
)
away_squad = await _load_team_squad_profile(
session,
feats.away_team_id,
feats.match_date_ms,
)
if home_squad is not None:
feats.home_squad_strength = home_squad["squad_strength"]
feats.home_key_players = home_squad["key_players"]
else:
flags.append("missing_home_squad_profile")
if away_squad is not None:
feats.away_squad_strength = away_squad["squad_strength"]
feats.away_key_players = away_squad["key_players"]
else:
flags.append("missing_away_squad_profile")
lineup_info = _extract_lineup_context(match_row)
feats.home_lineup_availability = lineup_info["home_availability"]
feats.away_lineup_availability = lineup_info["away_availability"]
if lineup_info["has_real_lineup_data"]:
feats.missing_players_impact = max(
feats.missing_players_impact,
round(
(
(1.0 - feats.home_lineup_availability)
+ (1.0 - feats.away_lineup_availability)
) / 2.0,
4,
),
)
else:
flags.append("missing_lineup_context")
odds_ok = await _extract_odds(session, match_id, feats)
if not odds_ok:
flags.append("missing_odds")
quality = 1.0
penalty_map = {
"missing_team_ids": 0.5,
"missing_ai_features": 0.05,
"missing_home_stats": 0.15,
"missing_away_stats": 0.15,
"missing_home_rest": 0.05,
"missing_away_rest": 0.05,
"missing_h2h": 0.05,
"missing_league_profile": 0.04,
"missing_referee_profile": 0.04,
"missing_home_squad_profile": 0.06,
"missing_away_squad_profile": 0.06,
"missing_lineup_context": 0.05,
"missing_odds": 0.2,
}
for flag in flags:
quality -= penalty_map.get(flag, 0.05)
feats.data_quality_score = max(0.0, round(quality, 2))
feats.data_quality_flags = flags
return feats
async def _load_match_header(
session: AsyncSession, match_id: str,
) -> dict[str, Any] | None:
"""Try live_matches first, then matches table."""
table_queries = {
"live_matches": """
SELECT
m.id,
m.home_team_id,
m.away_team_id,
m.match_name,
m.mst_utc,
m.sport,
m.league_id,
m.referee_name,
m.lineups,
m.sidelined,
ht.name AS home_name,
at.name AS away_name,
l.name AS league_name
FROM live_matches m
LEFT JOIN teams ht ON ht.id = m.home_team_id
LEFT JOIN teams at ON at.id = m.away_team_id
LEFT JOIN leagues l ON l.id = m.league_id
WHERE m.id = :match_id
LIMIT 1
""",
"matches": """
SELECT
m.id,
m.home_team_id,
m.away_team_id,
m.match_name,
m.mst_utc,
m.sport,
m.league_id,
ref.name AS referee_name,
NULL AS lineups,
NULL AS sidelined,
ht.name AS home_name,
at.name AS away_name,
l.name AS league_name
FROM matches m
LEFT JOIN teams ht ON ht.id = m.home_team_id
LEFT JOIN teams at ON at.id = m.away_team_id
LEFT JOIN leagues l ON l.id = m.league_id
LEFT JOIN match_officials ref ON ref.match_id = m.id AND ref.role_id = 1
WHERE m.id = :match_id
LIMIT 1
""",
}
for table in ("live_matches", "matches"):
query = text(table_queries[table])
result = await session.execute(query, {"match_id": match_id})
row = result.mappings().first()
if row:
return dict(row)
return None
async def _load_ai_features(
session: AsyncSession, match_id: str,
) -> dict[str, Any] | None:
query = text("""
SELECT
home_elo,
away_elo,
missing_players_impact,
home_form_score,
away_form_score,
h2h_home_win_rate,
h2h_total
FROM football_ai_features
WHERE match_id = :match_id
LIMIT 1
""")
result = await session.execute(query, {"match_id": match_id})
row = result.mappings().first()
return dict(row) if row else None
async def _rolling_team_stats(
session: AsyncSession,
team_id: str,
before_mst_utc: int,
) -> dict[str, float] | None:
"""Calculate rolling averages from the team's last N finished matches."""
query = text("""
WITH recent AS (
SELECT
m.id AS match_id,
m.home_team_id,
m.away_team_id,
m.score_home,
m.score_away,
ts.possession_percentage,
ts.shots_on_target,
ts.total_shots
FROM matches m
JOIN football_team_stats ts ON ts.match_id = m.id AND ts.team_id = :team_id
WHERE (m.home_team_id = :team_id OR m.away_team_id = :team_id)
AND m.mst_utc < :before_ts
AND m.sport = 'football'
AND m.score_home IS NOT NULL
AND m.score_away IS NOT NULL
ORDER BY m.mst_utc DESC
LIMIT :window
)
SELECT
COALESCE(AVG(possession_percentage), 50.0) AS avg_possession,
COALESCE(AVG(shots_on_target), 4.0) AS avg_shots_on_target,
COALESCE(AVG(total_shots), 10.0) AS avg_total_shots,
COALESCE(AVG(
CASE
WHEN home_team_id = :team_id THEN score_home
ELSE score_away
END
), 1.3) AS avg_goals_scored,
COALESCE(AVG(
CASE
WHEN home_team_id = :team_id THEN score_away
ELSE score_home
END
), 1.1) AS avg_goals_conceded,
COUNT(*) AS match_count
FROM recent
""")
result = await session.execute(
query,
{"team_id": team_id, "before_ts": before_mst_utc, "window": ROLLING_WINDOW},
)
row = result.mappings().first()
if row is None or int(row["match_count"]) == 0:
return None
return {
"avg_possession": round(float(row["avg_possession"]), 2),
"avg_shots_on_target": round(float(row["avg_shots_on_target"]), 2),
"avg_total_shots": round(float(row["avg_total_shots"]), 2),
"avg_goals_scored": round(float(row["avg_goals_scored"]), 2),
"avg_goals_conceded": round(float(row["avg_goals_conceded"]), 2),
}
async def _load_rest_days(
session: AsyncSession,
team_id: str,
before_mst_utc: int,
) -> float | None:
query = text("""
SELECT m.mst_utc
FROM matches m
WHERE (m.home_team_id = :team_id OR m.away_team_id = :team_id)
AND m.mst_utc < :before_ts
AND m.sport = 'football'
ORDER BY m.mst_utc DESC
LIMIT 1
""")
result = await session.execute(
query,
{"team_id": team_id, "before_ts": before_mst_utc},
)
last_match_ts = result.scalar_one_or_none()
if last_match_ts is None:
return None
rest_days = max(0.0, (float(before_mst_utc) - float(last_match_ts)) / 86400000.0)
return round(min(rest_days, MAX_REST_DAYS), 3)
async def _load_h2h_stats(
session: AsyncSession,
home_team_id: str,
away_team_id: str,
before_mst_utc: int,
) -> dict[str, float | int] | None:
query = text("""
SELECT
m.home_team_id,
m.away_team_id,
m.score_home,
m.score_away
FROM matches m
WHERE m.sport = 'football'
AND m.mst_utc < :before_ts
AND m.score_home IS NOT NULL
AND m.score_away IS NOT NULL
AND (
(m.home_team_id = :home_team_id AND m.away_team_id = :away_team_id)
OR
(m.home_team_id = :away_team_id AND m.away_team_id = :home_team_id)
)
ORDER BY m.mst_utc DESC
LIMIT :window
""")
result = await session.execute(
query,
{
"home_team_id": home_team_id,
"away_team_id": away_team_id,
"before_ts": before_mst_utc,
"window": H2H_WINDOW,
},
)
rows = result.mappings().all()
if not rows:
return None
home_wins = 0.0
draws = 0.0
sample_size = 0
for row in rows:
score_home = row["score_home"]
score_away = row["score_away"]
if score_home is None or score_away is None:
continue
sample_size += 1
row_home_team_id = row["home_team_id"]
row_away_team_id = row["away_team_id"]
current_home_score = float(score_home) if row_home_team_id == home_team_id else float(score_away)
current_away_score = float(score_away) if row_home_team_id == home_team_id else float(score_home)
if current_home_score > current_away_score:
home_wins += 1.0
elif current_home_score == current_away_score:
draws += 1.0
if sample_size == 0:
return None
# Count draws as a half-win signal instead of throwing them away.
home_win_rate = round((home_wins + draws * 0.5) / sample_size, 4)
return {
"home_win_rate": home_win_rate,
"sample_size": sample_size,
}
async def _load_league_profile(
session: AsyncSession,
league_id: str,
before_mst_utc: int,
) -> dict[str, float] | None:
if not league_id:
return None
query = text("""
SELECT
COALESCE(AVG(m.score_home + m.score_away), 2.6) AS avg_goals,
COUNT(*) AS match_count
FROM (
SELECT score_home, score_away
FROM matches
WHERE league_id = :league_id
AND sport = 'football'
AND status = 'FT'
AND score_home IS NOT NULL
AND score_away IS NOT NULL
AND mst_utc < :before_ts
ORDER BY mst_utc DESC
LIMIT 100
) m
""")
result = await session.execute(
query,
{"league_id": league_id, "before_ts": before_mst_utc},
)
row = result.mappings().first()
if row is None or int(row["match_count"] or 0) == 0:
return None
return {"avg_goals": round(float(row["avg_goals"]), 3)}
async def _load_referee_profile(
session: AsyncSession,
referee_name: str,
before_mst_utc: int,
) -> dict[str, float] | None:
if not referee_name:
return None
query = text("""
SELECT
COALESCE(AVG(CASE WHEN score_home > score_away THEN 1.0 ELSE 0.0 END), 0.46) - 0.46 AS home_bias,
COALESCE(AVG(score_home + score_away), 2.6) AS avg_goals,
COUNT(*) AS match_count
FROM (
SELECT m.score_home, m.score_away
FROM match_officials mo
JOIN matches m ON m.id = mo.match_id
WHERE mo.name = :referee_name
AND mo.role_id = 1
AND m.sport = 'football'
AND m.status = 'FT'
AND m.score_home IS NOT NULL
AND m.score_away IS NOT NULL
AND m.mst_utc < :before_ts
ORDER BY m.mst_utc DESC
LIMIT 30
) ref_matches
""")
result = await session.execute(
query,
{"referee_name": referee_name, "before_ts": before_mst_utc},
)
row = result.mappings().first()
if row is None or int(row["match_count"] or 0) == 0:
return None
return {
"home_bias": round(float(row["home_bias"]), 4),
"avg_goals": round(float(row["avg_goals"]), 3),
}
async def _load_team_squad_profile(
session: AsyncSession,
team_id: str,
before_mst_utc: int,
) -> dict[str, float] | None:
if not team_id:
return None
query = text("""
WITH recent_matches AS (
SELECT m.id, m.mst_utc
FROM matches m
WHERE (m.home_team_id = :team_id OR m.away_team_id = :team_id)
AND m.sport = 'football'
AND m.status = 'FT'
AND m.mst_utc < :before_ts
ORDER BY m.mst_utc DESC
LIMIT 8
),
player_base AS (
SELECT
mpp.player_id,
COUNT(*)::float AS appearances,
COUNT(*) FILTER (WHERE mpp.is_starting = true)::float AS starts
FROM match_player_participation mpp
JOIN recent_matches rm ON rm.id = mpp.match_id
WHERE mpp.team_id = :team_id
GROUP BY mpp.player_id
),
player_goals AS (
SELECT
mpe.player_id,
COUNT(*) FILTER (
WHERE mpe.event_type = 'goal'
AND COALESCE(mpe.event_subtype, '') NOT ILIKE '%penaltı kaçırma%'
)::float AS goals,
0.0::float AS assists
FROM match_player_events mpe
JOIN recent_matches rm ON rm.id = mpe.match_id
WHERE mpe.team_id = :team_id
GROUP BY mpe.player_id
UNION ALL
SELECT
mpe.assist_player_id AS player_id,
0.0::float AS goals,
COUNT(*) FILTER (
WHERE mpe.event_type = 'goal'
AND mpe.assist_player_id IS NOT NULL
)::float AS assists
FROM match_player_events mpe
JOIN recent_matches rm ON rm.id = mpe.match_id
WHERE mpe.team_id = :team_id
AND mpe.assist_player_id IS NOT NULL
GROUP BY mpe.assist_player_id
),
player_events AS (
SELECT
player_id,
SUM(goals) AS goals,
SUM(assists) AS assists
FROM player_goals
GROUP BY player_id
),
player_scores AS (
SELECT
pb.player_id,
(pb.starts * 1.5)
+ ((pb.appearances - pb.starts) * 0.5)
+ (COALESCE(pe.goals, 0.0) * 2.5)
+ (COALESCE(pe.assists, 0.0) * 1.5) AS score
FROM player_base pb
LEFT JOIN player_events pe ON pe.player_id = pb.player_id
)
SELECT
COALESCE(AVG(top_players.score), 0.0) AS avg_top_score,
COALESCE(COUNT(*) FILTER (WHERE top_players.score >= 6.0), 0) AS key_players,
COALESCE((SELECT COUNT(*) FROM recent_matches), 0) AS match_count
FROM (
SELECT score
FROM player_scores
ORDER BY score DESC
LIMIT 11
) top_players
""")
result = await session.execute(
query,
{"team_id": team_id, "before_ts": before_mst_utc},
)
row = result.mappings().first()
if row is None or int(row["match_count"] or 0) == 0:
return None
avg_top_score = float(row["avg_top_score"] or 0.0)
return {
"squad_strength": round(min(max(avg_top_score / 10.0, 0.0), 1.0), 4),
"key_players": float(row["key_players"] or 0),
}
def _safe_json(value: Any) -> dict[str, Any] | None:
if value is None:
return None
if isinstance(value, dict):
return value
if isinstance(value, str):
try:
parsed = json.loads(value)
except (TypeError, json.JSONDecodeError):
return None
return parsed if isinstance(parsed, dict) else None
return None
def _safe_list(value: Any) -> list[Any]:
if isinstance(value, list):
return value
return []
def _extract_lineup_context(match_row: dict[str, Any]) -> dict[str, float | bool]:
lineups = _safe_json(match_row.get("lineups"))
sidelined = _safe_json(match_row.get("sidelined"))
home_xi_count = 0
away_xi_count = 0
home_sidelined_count = 0
away_sidelined_count = 0
if lineups:
home_xi_count = len(_safe_list(lineups.get("home", {}).get("xi")))
away_xi_count = len(_safe_list(lineups.get("away", {}).get("xi")))
if sidelined:
home_team = sidelined.get("homeTeam", {})
away_team = sidelined.get("awayTeam", {})
home_sidelined_count = max(
int(home_team.get("totalSidelined") or 0),
len(_safe_list(home_team.get("players"))),
)
away_sidelined_count = max(
int(away_team.get("totalSidelined") or 0),
len(_safe_list(away_team.get("players"))),
)
has_real_lineup_data = any(
value > 0
for value in (
home_xi_count,
away_xi_count,
home_sidelined_count,
away_sidelined_count,
)
)
home_availability = _compute_availability(home_xi_count, home_sidelined_count)
away_availability = _compute_availability(away_xi_count, away_sidelined_count)
return {
"home_availability": home_availability,
"away_availability": away_availability,
"has_real_lineup_data": has_real_lineup_data,
}
def _compute_availability(xi_count: int, sidelined_count: int) -> float:
xi_ratio = min(max(xi_count / 11.0, 0.0), 1.0) if xi_count > 0 else 1.0
sidelined_penalty = min(max(sidelined_count / 11.0, 0.0), 1.0) * 0.35
return round(min(max(xi_ratio - sidelined_penalty, 0.0), 1.0), 4)
def _safe_odd(val: Any) -> float:
"""Parse an odds value that might be str, float, int, or None."""
if val is None:
return 0.0
try:
parsed = float(val)
return parsed if parsed > 1.0 else 0.0
except (ValueError, TypeError):
return 0.0
def _implied_prob(decimal_odd: float) -> float:
"""Convert decimal odds to implied probability, clamped [0, 1]."""
if decimal_odd <= 1.0:
return 0.0
return min(1.0, 1.0 / decimal_odd)
async def _extract_odds(
session: AsyncSession,
match_id: str,
feats: MatchFeatures,
) -> bool:
"""Extract odds from live JSON first, then relational tables."""
found = False
odds_json = await _load_live_odds_json(session, match_id)
if odds_json:
found = _parse_odds_json(odds_json, feats)
if not found:
found = await _load_relational_odds(session, match_id, feats)
if found:
feats.implied_prob_home = round(_implied_prob(feats.odds_home), 4)
feats.implied_prob_draw = round(_implied_prob(feats.odds_draw), 4)
feats.implied_prob_away = round(_implied_prob(feats.odds_away), 4)
feats.implied_prob_over25 = round(_implied_prob(feats.odds_over25), 4)
feats.implied_prob_under25 = round(_implied_prob(feats.odds_under25), 4)
feats.implied_prob_btts_yes = round(_implied_prob(feats.odds_btts_yes), 4)
feats.implied_prob_btts_no = round(_implied_prob(feats.odds_btts_no), 4)
return found
async def _load_live_odds_json(
session: AsyncSession, match_id: str,
) -> dict[str, Any] | None:
query = text("SELECT odds FROM live_matches WHERE id = :mid AND odds IS NOT NULL")
result = await session.execute(query, {"mid": match_id})
row = result.scalar_one_or_none()
if row is None:
return None
if isinstance(row, str):
try:
parsed = json.loads(row)
except (json.JSONDecodeError, TypeError):
return None
return parsed if isinstance(parsed, (dict, list)) else None
if isinstance(row, (dict, list)):
return row
return None
def _parse_odds_json(odds_blob: dict[str, Any] | list[Any], feats: MatchFeatures) -> bool:
"""Parse the Mackolik-style odds JSON structure."""
found_any = False
categories: list[dict[str, Any]] = []
if isinstance(odds_blob, list):
categories = [item for item in odds_blob if isinstance(item, dict)]
elif isinstance(odds_blob, dict):
raw_categories = odds_blob.get("categories", odds_blob.get("odds", []))
if isinstance(raw_categories, dict):
categories = [item for item in raw_categories.values() if isinstance(item, dict)]
elif isinstance(raw_categories, list):
categories = [item for item in raw_categories if isinstance(item, dict)]
for cat in categories:
cat_name = (cat.get("name") or cat.get("cn") or "").strip().lower()
selections = cat.get("selections") or cat.get("s") or []
if cat_name in ("mac sonucu", "match result", "1x2", "maç sonucu"):
sels = _selections_to_map(selections)
feats.odds_home = _safe_odd(sels.get("1")) or feats.odds_home
feats.odds_draw = _safe_odd(sels.get("x")) or feats.odds_draw
feats.odds_away = _safe_odd(sels.get("2")) or feats.odds_away
found_any = True
elif cat_name in ("2,5 alt/ust", "over/under 2.5", "2.5 alt/ust", "2,5 alt/üst", "2.5 alt/üst"):
sels = _selections_to_map(selections)
feats.odds_over25 = _safe_odd(sels.get("ust") or sels.get("over") or sels.get("üst")) or feats.odds_over25
feats.odds_under25 = _safe_odd(sels.get("alt") or sels.get("under")) or feats.odds_under25
found_any = True
elif cat_name in ("karsilikli gol", "both teams to score", "btts", "karşılıklı gol"):
sels = _selections_to_map(selections)
feats.odds_btts_yes = _safe_odd(sels.get("var") or sels.get("yes")) or feats.odds_btts_yes
feats.odds_btts_no = _safe_odd(sels.get("yok") or sels.get("no")) or feats.odds_btts_no
found_any = True
return found_any
def _selections_to_map(selections: list[Any] | dict[str, Any]) -> dict[str, Any]:
"""Normalize varied selection structures into {name_lower: odd_value}."""
result: dict[str, Any] = {}
if isinstance(selections, dict):
for key, value in selections.items():
result[str(key).strip().lower()] = value
elif isinstance(selections, list):
for sel in selections:
if isinstance(sel, dict):
name = (sel.get("name") or sel.get("n") or "").strip().lower()
value = sel.get("odd_value") or sel.get("ov") or sel.get("v")
if name:
result[name] = value
return result
async def _load_relational_odds(
session: AsyncSession, match_id: str, feats: MatchFeatures,
) -> bool:
"""Fallback: load odds from odd_categories + odd_selections."""
query = text("""
SELECT oc.name AS cat_name, os.name AS sel_name, os.odd_value
FROM odd_categories oc
JOIN odd_selections os ON os.odd_category_db_id = oc.db_id
WHERE oc.match_id = :match_id
AND oc.name IN ('Maç Sonucu', '2,5 Alt/Üst', 'Karşılıklı Gol')
""")
result = await session.execute(query, {"match_id": match_id})
rows = result.mappings().all()
if not rows:
return False
for row in rows:
cat = (row["cat_name"] or "").strip()
sel = (row["sel_name"] or "").strip().lower()
value = _safe_odd(row["odd_value"])
if value <= 1.0:
continue
if cat == "Maç Sonucu":
if sel == "1":
feats.odds_home = value
elif sel == "x":
feats.odds_draw = value
elif sel == "2":
feats.odds_away = value
elif cat == "2,5 Alt/Üst":
if sel in ("üst", "ust", "over"):
feats.odds_over25 = value
elif sel in ("alt", "under"):
feats.odds_under25 = value
elif cat == "Karşılıklı Gol":
if sel in ("var", "yes"):
feats.odds_btts_yes = value
elif sel in ("yok", "no"):
feats.odds_btts_no = value
return True