""" Feature Extractor - V2 Betting Engine Pulls historical team stats, ELO, missing-player impact and live odds from PostgreSQL and engineers a leakage-free feature vector for the ensemble model. CRITICAL: Only pre-match data (matches before the target match) is used. Post-match stats of the target match are NEVER included. """ from __future__ import annotations import json import logging from dataclasses import dataclass, field from typing import Any import numpy as np from sqlalchemy import text from sqlalchemy.ext.asyncio import AsyncSession logger = logging.getLogger(__name__) ROLLING_WINDOW: int = 5 H2H_WINDOW: int = 10 MAX_REST_DAYS: float = 14.0 @dataclass class MatchFeatures: """Structured feature vector ready for the ensemble model.""" match_id: str = "" home_team_id: str = "" away_team_id: str = "" # ELO & AI features home_elo: float = 1500.0 away_elo: float = 1500.0 elo_diff: float = 0.0 missing_players_impact: float = 0.0 home_form_score: float = 0.0 away_form_score: float = 0.0 h2h_home_win_rate: float = 0.5 h2h_sample_size: int = 0 home_rest_days: float = 7.0 away_rest_days: float = 7.0 rest_diff: float = 0.0 home_lineup_availability: float = 1.0 away_lineup_availability: float = 1.0 # Rolling averages - Home (last 5 matches) home_avg_possession: float = 50.0 home_avg_shots_on_target: float = 4.0 home_avg_total_shots: float = 10.0 home_avg_goals_scored: float = 1.3 home_avg_goals_conceded: float = 1.1 # Rolling averages - Away (last 5 matches) away_avg_possession: float = 50.0 away_avg_shots_on_target: float = 4.0 away_avg_total_shots: float = 10.0 away_avg_goals_scored: float = 1.3 away_avg_goals_conceded: float = 1.1 # Implied probabilities from bookmaker odds implied_prob_home: float = 0.33 implied_prob_draw: float = 0.33 implied_prob_away: float = 0.33 implied_prob_over25: float = 0.50 implied_prob_under25: float = 0.50 implied_prob_btts_yes: float = 0.50 implied_prob_btts_no: float = 0.50 # Raw decimal odds (for Edge/Kelly calculations downstream) odds_home: float = 2.50 odds_draw: float = 3.20 odds_away: float = 2.80 odds_over25: float = 1.90 odds_under25: float = 1.90 odds_btts_yes: float = 1.85 odds_btts_no: float = 1.95 # Data quality data_quality_score: float = 0.5 data_quality_flags: list[str] = field(default_factory=list) # Metadata match_name: str = "" home_team_name: str = "" away_team_name: str = "" league_id: str = "" league_name: str = "" referee_name: str = "" match_date_ms: int = 0 league_avg_goals: float = 2.6 referee_avg_goals: float = 2.6 referee_home_bias: float = 0.0 home_squad_strength: float = 0.5 away_squad_strength: float = 0.5 home_key_players: float = 0.0 away_key_players: float = 0.0 def to_model_array(self) -> np.ndarray: """Return the 24-feature vector the ensemble expects.""" return np.array( [ self.home_elo, self.away_elo, self.elo_diff, self.missing_players_impact, self.home_avg_possession, self.home_avg_shots_on_target, self.home_avg_total_shots, self.home_avg_goals_scored, self.home_avg_goals_conceded, self.away_avg_possession, self.away_avg_shots_on_target, self.away_avg_total_shots, self.away_avg_goals_scored, self.away_avg_goals_conceded, self.implied_prob_home, self.implied_prob_draw, self.implied_prob_away, self.implied_prob_over25, self.implied_prob_under25, self.implied_prob_btts_yes, self.implied_prob_btts_no, self.odds_home, self.odds_draw, self.odds_away, ], dtype=np.float64, ) @staticmethod def feature_names() -> list[str]: return [ "home_elo", "away_elo", "elo_diff", "missing_players_impact", "home_avg_possession", "home_avg_shots_on_target", "home_avg_total_shots", "home_avg_goals_scored", "home_avg_goals_conceded", "away_avg_possession", "away_avg_shots_on_target", "away_avg_total_shots", "away_avg_goals_scored", "away_avg_goals_conceded", "implied_prob_home", "implied_prob_draw", "implied_prob_away", "implied_prob_over25", "implied_prob_under25", "implied_prob_btts_yes", "implied_prob_btts_no", "odds_home", "odds_draw", "odds_away", ] async def extract_features(session: AsyncSession, match_id: str) -> MatchFeatures | None: """Master extraction pipeline.""" feats = MatchFeatures(match_id=match_id) flags: list[str] = [] match_row = await _load_match_header(session, match_id) if match_row is None: logger.warning("Match %s not found in live_matches or matches.", match_id) return None feats.home_team_id = match_row["home_team_id"] or "" feats.away_team_id = match_row["away_team_id"] or "" feats.match_name = match_row.get("match_name", "") or "" feats.match_date_ms = int(match_row.get("mst_utc", 0) or 0) feats.home_team_name = match_row.get("home_name", "") or "" feats.away_team_name = match_row.get("away_name", "") or "" feats.league_id = match_row.get("league_id", "") or "" feats.league_name = match_row.get("league_name", "") or "" feats.referee_name = match_row.get("referee_name", "") or "" if not feats.home_team_id or not feats.away_team_id: logger.warning("Match %s missing team IDs.", match_id) flags.append("missing_team_ids") feats.data_quality_flags = flags feats.data_quality_score = 0.1 return feats ai_row = await _load_ai_features(session, match_id) if ai_row: feats.home_elo = float(ai_row["home_elo"] or 1500.0) feats.away_elo = float(ai_row["away_elo"] or 1500.0) feats.missing_players_impact = float(ai_row["missing_players_impact"] or 0.0) feats.home_form_score = float(ai_row["home_form_score"] or 0.0) feats.away_form_score = float(ai_row["away_form_score"] or 0.0) if ai_row.get("h2h_home_win_rate") is not None: feats.h2h_home_win_rate = float(ai_row["h2h_home_win_rate"]) feats.h2h_sample_size = int(ai_row.get("h2h_total") or 0) else: flags.append("missing_ai_features") feats.elo_diff = feats.home_elo - feats.away_elo home_rolling = await _rolling_team_stats( session, feats.home_team_id, feats.match_date_ms, ) away_rolling = await _rolling_team_stats( session, feats.away_team_id, feats.match_date_ms, ) if home_rolling is not None: feats.home_avg_possession = home_rolling["avg_possession"] feats.home_avg_shots_on_target = home_rolling["avg_shots_on_target"] feats.home_avg_total_shots = home_rolling["avg_total_shots"] feats.home_avg_goals_scored = home_rolling["avg_goals_scored"] feats.home_avg_goals_conceded = home_rolling["avg_goals_conceded"] else: flags.append("missing_home_stats") if away_rolling is not None: feats.away_avg_possession = away_rolling["avg_possession"] feats.away_avg_shots_on_target = away_rolling["avg_shots_on_target"] feats.away_avg_total_shots = away_rolling["avg_total_shots"] feats.away_avg_goals_scored = away_rolling["avg_goals_scored"] feats.away_avg_goals_conceded = away_rolling["avg_goals_conceded"] else: flags.append("missing_away_stats") if abs(feats.home_form_score) < 1e-6: feats.home_form_score = round( feats.home_avg_goals_scored - feats.home_avg_goals_conceded, 3, ) if abs(feats.away_form_score) < 1e-6: feats.away_form_score = round( feats.away_avg_goals_scored - feats.away_avg_goals_conceded, 3, ) home_rest_days = await _load_rest_days( session, feats.home_team_id, feats.match_date_ms, ) away_rest_days = await _load_rest_days( session, feats.away_team_id, feats.match_date_ms, ) if home_rest_days is not None: feats.home_rest_days = home_rest_days else: flags.append("missing_home_rest") if away_rest_days is not None: feats.away_rest_days = away_rest_days else: flags.append("missing_away_rest") feats.rest_diff = round(feats.home_rest_days - feats.away_rest_days, 3) if feats.h2h_sample_size == 0: h2h = await _load_h2h_stats( session, feats.home_team_id, feats.away_team_id, feats.match_date_ms, ) if h2h is not None: feats.h2h_home_win_rate = h2h["home_win_rate"] feats.h2h_sample_size = h2h["sample_size"] else: flags.append("missing_h2h") league_profile = await _load_league_profile( session, feats.league_id, feats.match_date_ms, ) if league_profile is not None: feats.league_avg_goals = league_profile["avg_goals"] else: flags.append("missing_league_profile") referee_profile = await _load_referee_profile( session, feats.referee_name, feats.match_date_ms, ) if referee_profile is not None: feats.referee_avg_goals = referee_profile["avg_goals"] feats.referee_home_bias = referee_profile["home_bias"] else: flags.append("missing_referee_profile") home_squad = await _load_team_squad_profile( session, feats.home_team_id, feats.match_date_ms, ) away_squad = await _load_team_squad_profile( session, feats.away_team_id, feats.match_date_ms, ) if home_squad is not None: feats.home_squad_strength = home_squad["squad_strength"] feats.home_key_players = home_squad["key_players"] else: flags.append("missing_home_squad_profile") if away_squad is not None: feats.away_squad_strength = away_squad["squad_strength"] feats.away_key_players = away_squad["key_players"] else: flags.append("missing_away_squad_profile") lineup_info = _extract_lineup_context(match_row) feats.home_lineup_availability = lineup_info["home_availability"] feats.away_lineup_availability = lineup_info["away_availability"] if lineup_info["has_real_lineup_data"]: feats.missing_players_impact = max( feats.missing_players_impact, round( ( (1.0 - feats.home_lineup_availability) + (1.0 - feats.away_lineup_availability) ) / 2.0, 4, ), ) else: flags.append("missing_lineup_context") odds_ok = await _extract_odds(session, match_id, feats) if not odds_ok: flags.append("missing_odds") quality = 1.0 penalty_map = { "missing_team_ids": 0.5, "missing_ai_features": 0.05, "missing_home_stats": 0.15, "missing_away_stats": 0.15, "missing_home_rest": 0.05, "missing_away_rest": 0.05, "missing_h2h": 0.05, "missing_league_profile": 0.04, "missing_referee_profile": 0.04, "missing_home_squad_profile": 0.06, "missing_away_squad_profile": 0.06, "missing_lineup_context": 0.05, "missing_odds": 0.2, } for flag in flags: quality -= penalty_map.get(flag, 0.05) feats.data_quality_score = max(0.0, round(quality, 2)) feats.data_quality_flags = flags return feats async def _load_match_header( session: AsyncSession, match_id: str, ) -> dict[str, Any] | None: """Try live_matches first, then matches table.""" table_queries = { "live_matches": """ SELECT m.id, m.home_team_id, m.away_team_id, m.match_name, m.mst_utc, m.sport, m.league_id, m.referee_name, m.lineups, m.sidelined, ht.name AS home_name, at.name AS away_name, l.name AS league_name FROM live_matches m LEFT JOIN teams ht ON ht.id = m.home_team_id LEFT JOIN teams at ON at.id = m.away_team_id LEFT JOIN leagues l ON l.id = m.league_id WHERE m.id = :match_id LIMIT 1 """, "matches": """ SELECT m.id, m.home_team_id, m.away_team_id, m.match_name, m.mst_utc, m.sport, m.league_id, ref.name AS referee_name, NULL AS lineups, NULL AS sidelined, ht.name AS home_name, at.name AS away_name, l.name AS league_name FROM matches m LEFT JOIN teams ht ON ht.id = m.home_team_id LEFT JOIN teams at ON at.id = m.away_team_id LEFT JOIN leagues l ON l.id = m.league_id LEFT JOIN match_officials ref ON ref.match_id = m.id AND ref.role_id = 1 WHERE m.id = :match_id LIMIT 1 """, } for table in ("live_matches", "matches"): query = text(table_queries[table]) result = await session.execute(query, {"match_id": match_id}) row = result.mappings().first() if row: return dict(row) return None async def _load_ai_features( session: AsyncSession, match_id: str, ) -> dict[str, Any] | None: query = text(""" SELECT home_elo, away_elo, missing_players_impact, home_form_score, away_form_score, h2h_home_win_rate, h2h_total FROM football_ai_features WHERE match_id = :match_id LIMIT 1 """) result = await session.execute(query, {"match_id": match_id}) row = result.mappings().first() return dict(row) if row else None async def _rolling_team_stats( session: AsyncSession, team_id: str, before_mst_utc: int, ) -> dict[str, float] | None: """Calculate rolling averages from the team's last N finished matches.""" query = text(""" WITH recent AS ( SELECT m.id AS match_id, m.home_team_id, m.away_team_id, m.score_home, m.score_away, ts.possession_percentage, ts.shots_on_target, ts.total_shots FROM matches m JOIN football_team_stats ts ON ts.match_id = m.id AND ts.team_id = :team_id WHERE (m.home_team_id = :team_id OR m.away_team_id = :team_id) AND m.mst_utc < :before_ts AND m.sport = 'football' AND m.score_home IS NOT NULL AND m.score_away IS NOT NULL ORDER BY m.mst_utc DESC LIMIT :window ) SELECT COALESCE(AVG(possession_percentage), 50.0) AS avg_possession, COALESCE(AVG(shots_on_target), 4.0) AS avg_shots_on_target, COALESCE(AVG(total_shots), 10.0) AS avg_total_shots, COALESCE(AVG( CASE WHEN home_team_id = :team_id THEN score_home ELSE score_away END ), 1.3) AS avg_goals_scored, COALESCE(AVG( CASE WHEN home_team_id = :team_id THEN score_away ELSE score_home END ), 1.1) AS avg_goals_conceded, COUNT(*) AS match_count FROM recent """) result = await session.execute( query, {"team_id": team_id, "before_ts": before_mst_utc, "window": ROLLING_WINDOW}, ) row = result.mappings().first() if row is None or int(row["match_count"]) == 0: return None return { "avg_possession": round(float(row["avg_possession"]), 2), "avg_shots_on_target": round(float(row["avg_shots_on_target"]), 2), "avg_total_shots": round(float(row["avg_total_shots"]), 2), "avg_goals_scored": round(float(row["avg_goals_scored"]), 2), "avg_goals_conceded": round(float(row["avg_goals_conceded"]), 2), } async def _load_rest_days( session: AsyncSession, team_id: str, before_mst_utc: int, ) -> float | None: query = text(""" SELECT m.mst_utc FROM matches m WHERE (m.home_team_id = :team_id OR m.away_team_id = :team_id) AND m.mst_utc < :before_ts AND m.sport = 'football' ORDER BY m.mst_utc DESC LIMIT 1 """) result = await session.execute( query, {"team_id": team_id, "before_ts": before_mst_utc}, ) last_match_ts = result.scalar_one_or_none() if last_match_ts is None: return None rest_days = max(0.0, (float(before_mst_utc) - float(last_match_ts)) / 86400000.0) return round(min(rest_days, MAX_REST_DAYS), 3) async def _load_h2h_stats( session: AsyncSession, home_team_id: str, away_team_id: str, before_mst_utc: int, ) -> dict[str, float | int] | None: query = text(""" SELECT m.home_team_id, m.away_team_id, m.score_home, m.score_away FROM matches m WHERE m.sport = 'football' AND m.mst_utc < :before_ts AND m.score_home IS NOT NULL AND m.score_away IS NOT NULL AND ( (m.home_team_id = :home_team_id AND m.away_team_id = :away_team_id) OR (m.home_team_id = :away_team_id AND m.away_team_id = :home_team_id) ) ORDER BY m.mst_utc DESC LIMIT :window """) result = await session.execute( query, { "home_team_id": home_team_id, "away_team_id": away_team_id, "before_ts": before_mst_utc, "window": H2H_WINDOW, }, ) rows = result.mappings().all() if not rows: return None home_wins = 0.0 draws = 0.0 sample_size = 0 for row in rows: score_home = row["score_home"] score_away = row["score_away"] if score_home is None or score_away is None: continue sample_size += 1 row_home_team_id = row["home_team_id"] row_away_team_id = row["away_team_id"] current_home_score = float(score_home) if row_home_team_id == home_team_id else float(score_away) current_away_score = float(score_away) if row_home_team_id == home_team_id else float(score_home) if current_home_score > current_away_score: home_wins += 1.0 elif current_home_score == current_away_score: draws += 1.0 if sample_size == 0: return None # Count draws as a half-win signal instead of throwing them away. home_win_rate = round((home_wins + draws * 0.5) / sample_size, 4) return { "home_win_rate": home_win_rate, "sample_size": sample_size, } async def _load_league_profile( session: AsyncSession, league_id: str, before_mst_utc: int, ) -> dict[str, float] | None: if not league_id: return None query = text(""" SELECT COALESCE(AVG(m.score_home + m.score_away), 2.6) AS avg_goals, COUNT(*) AS match_count FROM ( SELECT score_home, score_away FROM matches WHERE league_id = :league_id AND sport = 'football' AND status = 'FT' AND score_home IS NOT NULL AND score_away IS NOT NULL AND mst_utc < :before_ts ORDER BY mst_utc DESC LIMIT 100 ) m """) result = await session.execute( query, {"league_id": league_id, "before_ts": before_mst_utc}, ) row = result.mappings().first() if row is None or int(row["match_count"] or 0) == 0: return None return {"avg_goals": round(float(row["avg_goals"]), 3)} async def _load_referee_profile( session: AsyncSession, referee_name: str, before_mst_utc: int, ) -> dict[str, float] | None: if not referee_name: return None query = text(""" SELECT COALESCE(AVG(CASE WHEN score_home > score_away THEN 1.0 ELSE 0.0 END), 0.46) - 0.46 AS home_bias, COALESCE(AVG(score_home + score_away), 2.6) AS avg_goals, COUNT(*) AS match_count FROM ( SELECT m.score_home, m.score_away FROM match_officials mo JOIN matches m ON m.id = mo.match_id WHERE mo.name = :referee_name AND mo.role_id = 1 AND m.sport = 'football' AND m.status = 'FT' AND m.score_home IS NOT NULL AND m.score_away IS NOT NULL AND m.mst_utc < :before_ts ORDER BY m.mst_utc DESC LIMIT 30 ) ref_matches """) result = await session.execute( query, {"referee_name": referee_name, "before_ts": before_mst_utc}, ) row = result.mappings().first() if row is None or int(row["match_count"] or 0) == 0: return None return { "home_bias": round(float(row["home_bias"]), 4), "avg_goals": round(float(row["avg_goals"]), 3), } async def _load_team_squad_profile( session: AsyncSession, team_id: str, before_mst_utc: int, ) -> dict[str, float] | None: if not team_id: return None query = text(""" WITH recent_matches AS ( SELECT m.id, m.mst_utc FROM matches m WHERE (m.home_team_id = :team_id OR m.away_team_id = :team_id) AND m.sport = 'football' AND m.status = 'FT' AND m.mst_utc < :before_ts ORDER BY m.mst_utc DESC LIMIT 8 ), player_base AS ( SELECT mpp.player_id, COUNT(*)::float AS appearances, COUNT(*) FILTER (WHERE mpp.is_starting = true)::float AS starts FROM match_player_participation mpp JOIN recent_matches rm ON rm.id = mpp.match_id WHERE mpp.team_id = :team_id GROUP BY mpp.player_id ), player_goals AS ( SELECT mpe.player_id, COUNT(*) FILTER ( WHERE mpe.event_type = 'goal' AND COALESCE(mpe.event_subtype, '') NOT ILIKE '%penaltı kaçırma%' )::float AS goals, 0.0::float AS assists FROM match_player_events mpe JOIN recent_matches rm ON rm.id = mpe.match_id WHERE mpe.team_id = :team_id GROUP BY mpe.player_id UNION ALL SELECT mpe.assist_player_id AS player_id, 0.0::float AS goals, COUNT(*) FILTER ( WHERE mpe.event_type = 'goal' AND mpe.assist_player_id IS NOT NULL )::float AS assists FROM match_player_events mpe JOIN recent_matches rm ON rm.id = mpe.match_id WHERE mpe.team_id = :team_id AND mpe.assist_player_id IS NOT NULL GROUP BY mpe.assist_player_id ), player_events AS ( SELECT player_id, SUM(goals) AS goals, SUM(assists) AS assists FROM player_goals GROUP BY player_id ), player_scores AS ( SELECT pb.player_id, (pb.starts * 1.5) + ((pb.appearances - pb.starts) * 0.5) + (COALESCE(pe.goals, 0.0) * 2.5) + (COALESCE(pe.assists, 0.0) * 1.5) AS score FROM player_base pb LEFT JOIN player_events pe ON pe.player_id = pb.player_id ) SELECT COALESCE(AVG(top_players.score), 0.0) AS avg_top_score, COALESCE(COUNT(*) FILTER (WHERE top_players.score >= 6.0), 0) AS key_players, COALESCE((SELECT COUNT(*) FROM recent_matches), 0) AS match_count FROM ( SELECT score FROM player_scores ORDER BY score DESC LIMIT 11 ) top_players """) result = await session.execute( query, {"team_id": team_id, "before_ts": before_mst_utc}, ) row = result.mappings().first() if row is None or int(row["match_count"] or 0) == 0: return None avg_top_score = float(row["avg_top_score"] or 0.0) return { "squad_strength": round(min(max(avg_top_score / 10.0, 0.0), 1.0), 4), "key_players": float(row["key_players"] or 0), } def _safe_json(value: Any) -> dict[str, Any] | None: if value is None: return None if isinstance(value, dict): return value if isinstance(value, str): try: parsed = json.loads(value) except (TypeError, json.JSONDecodeError): return None return parsed if isinstance(parsed, dict) else None return None def _safe_list(value: Any) -> list[Any]: if isinstance(value, list): return value return [] def _extract_lineup_context(match_row: dict[str, Any]) -> dict[str, float | bool]: lineups = _safe_json(match_row.get("lineups")) sidelined = _safe_json(match_row.get("sidelined")) home_xi_count = 0 away_xi_count = 0 home_sidelined_count = 0 away_sidelined_count = 0 if lineups: home_xi_count = len(_safe_list(lineups.get("home", {}).get("xi"))) away_xi_count = len(_safe_list(lineups.get("away", {}).get("xi"))) if sidelined: home_team = sidelined.get("homeTeam", {}) away_team = sidelined.get("awayTeam", {}) home_sidelined_count = max( int(home_team.get("totalSidelined") or 0), len(_safe_list(home_team.get("players"))), ) away_sidelined_count = max( int(away_team.get("totalSidelined") or 0), len(_safe_list(away_team.get("players"))), ) has_real_lineup_data = any( value > 0 for value in ( home_xi_count, away_xi_count, home_sidelined_count, away_sidelined_count, ) ) home_availability = _compute_availability(home_xi_count, home_sidelined_count) away_availability = _compute_availability(away_xi_count, away_sidelined_count) return { "home_availability": home_availability, "away_availability": away_availability, "has_real_lineup_data": has_real_lineup_data, } def _compute_availability(xi_count: int, sidelined_count: int) -> float: xi_ratio = min(max(xi_count / 11.0, 0.0), 1.0) if xi_count > 0 else 1.0 sidelined_penalty = min(max(sidelined_count / 11.0, 0.0), 1.0) * 0.35 return round(min(max(xi_ratio - sidelined_penalty, 0.0), 1.0), 4) def _safe_odd(val: Any) -> float: """Parse an odds value that might be str, float, int, or None.""" if val is None: return 0.0 try: parsed = float(val) return parsed if parsed > 1.0 else 0.0 except (ValueError, TypeError): return 0.0 def _implied_prob(decimal_odd: float) -> float: """Convert decimal odds to implied probability, clamped [0, 1].""" if decimal_odd <= 1.0: return 0.0 return min(1.0, 1.0 / decimal_odd) async def _extract_odds( session: AsyncSession, match_id: str, feats: MatchFeatures, ) -> bool: """Extract odds from live JSON first, then relational tables.""" found = False odds_json = await _load_live_odds_json(session, match_id) if odds_json: found = _parse_odds_json(odds_json, feats) if not found: found = await _load_relational_odds(session, match_id, feats) if found: feats.implied_prob_home = round(_implied_prob(feats.odds_home), 4) feats.implied_prob_draw = round(_implied_prob(feats.odds_draw), 4) feats.implied_prob_away = round(_implied_prob(feats.odds_away), 4) feats.implied_prob_over25 = round(_implied_prob(feats.odds_over25), 4) feats.implied_prob_under25 = round(_implied_prob(feats.odds_under25), 4) feats.implied_prob_btts_yes = round(_implied_prob(feats.odds_btts_yes), 4) feats.implied_prob_btts_no = round(_implied_prob(feats.odds_btts_no), 4) return found async def _load_live_odds_json( session: AsyncSession, match_id: str, ) -> dict[str, Any] | None: query = text("SELECT odds FROM live_matches WHERE id = :mid AND odds IS NOT NULL") result = await session.execute(query, {"mid": match_id}) row = result.scalar_one_or_none() if row is None: return None if isinstance(row, str): try: parsed = json.loads(row) except (json.JSONDecodeError, TypeError): return None return parsed if isinstance(parsed, (dict, list)) else None if isinstance(row, (dict, list)): return row return None def _parse_odds_json(odds_blob: dict[str, Any] | list[Any], feats: MatchFeatures) -> bool: """Parse the Mackolik-style odds JSON structure.""" found_any = False categories: list[dict[str, Any]] = [] if isinstance(odds_blob, list): categories = [item for item in odds_blob if isinstance(item, dict)] elif isinstance(odds_blob, dict): raw_categories = odds_blob.get("categories", odds_blob.get("odds", [])) if isinstance(raw_categories, dict): categories = [item for item in raw_categories.values() if isinstance(item, dict)] elif isinstance(raw_categories, list): categories = [item for item in raw_categories if isinstance(item, dict)] for cat in categories: cat_name = (cat.get("name") or cat.get("cn") or "").strip().lower() selections = cat.get("selections") or cat.get("s") or [] if cat_name in ("mac sonucu", "match result", "1x2", "maç sonucu"): sels = _selections_to_map(selections) feats.odds_home = _safe_odd(sels.get("1")) or feats.odds_home feats.odds_draw = _safe_odd(sels.get("x")) or feats.odds_draw feats.odds_away = _safe_odd(sels.get("2")) or feats.odds_away found_any = True elif cat_name in ("2,5 alt/ust", "over/under 2.5", "2.5 alt/ust", "2,5 alt/üst", "2.5 alt/üst"): sels = _selections_to_map(selections) feats.odds_over25 = _safe_odd(sels.get("ust") or sels.get("over") or sels.get("üst")) or feats.odds_over25 feats.odds_under25 = _safe_odd(sels.get("alt") or sels.get("under")) or feats.odds_under25 found_any = True elif cat_name in ("karsilikli gol", "both teams to score", "btts", "karşılıklı gol"): sels = _selections_to_map(selections) feats.odds_btts_yes = _safe_odd(sels.get("var") or sels.get("yes")) or feats.odds_btts_yes feats.odds_btts_no = _safe_odd(sels.get("yok") or sels.get("no")) or feats.odds_btts_no found_any = True return found_any def _selections_to_map(selections: list[Any] | dict[str, Any]) -> dict[str, Any]: """Normalize varied selection structures into {name_lower: odd_value}.""" result: dict[str, Any] = {} if isinstance(selections, dict): for key, value in selections.items(): result[str(key).strip().lower()] = value elif isinstance(selections, list): for sel in selections: if isinstance(sel, dict): name = (sel.get("name") or sel.get("n") or "").strip().lower() value = sel.get("odd_value") or sel.get("ov") or sel.get("v") if name: result[name] = value return result async def _load_relational_odds( session: AsyncSession, match_id: str, feats: MatchFeatures, ) -> bool: """Fallback: load odds from odd_categories + odd_selections.""" query = text(""" SELECT oc.name AS cat_name, os.name AS sel_name, os.odd_value FROM odd_categories oc JOIN odd_selections os ON os.odd_category_db_id = oc.db_id WHERE oc.match_id = :match_id AND oc.name IN ('Maç Sonucu', '2,5 Alt/Üst', 'Karşılıklı Gol') """) result = await session.execute(query, {"match_id": match_id}) rows = result.mappings().all() if not rows: return False for row in rows: cat = (row["cat_name"] or "").strip() sel = (row["sel_name"] or "").strip().lower() value = _safe_odd(row["odd_value"]) if value <= 1.0: continue if cat == "Maç Sonucu": if sel == "1": feats.odds_home = value elif sel == "x": feats.odds_draw = value elif sel == "2": feats.odds_away = value elif cat == "2,5 Alt/Üst": if sel in ("üst", "ust", "over"): feats.odds_over25 = value elif sel in ("alt", "under"): feats.odds_under25 = value elif cat == "Karşılıklı Gol": if sel in ("var", "yes"): feats.odds_btts_yes = value elif sel in ("yok", "no"): feats.odds_btts_no = value return True