""" Feature Adapter for XGBoost Inference ===================================== Bridges the gap between V20 Engine outputs (CalculationContext) and XGBoost Models. Constructs the exact 44-feature vector used in training. """ from __future__ import annotations import os from typing import Any import psycopg2 from psycopg2.extensions import connection as PgConnection import pandas as pd import numpy as np from data.db import get_clean_dsn # Feature definitions (Must match train_xgboost_markets.py) # NOTE: 68 features - matching the trained XGBoost models FEATURES = [ # ELO "home_overall_elo", "away_overall_elo", "elo_diff", "home_home_elo", "away_away_elo", "form_elo_diff", # Form "home_goals_avg", "home_conceded_avg", "away_goals_avg", "away_conceded_avg", "home_clean_sheet_rate", "away_clean_sheet_rate", "home_scoring_rate", "away_scoring_rate", "home_winning_streak", "away_winning_streak", # H2H "h2h_home_win_rate", "h2h_draw_rate", "h2h_avg_goals", "h2h_btts_rate", "h2h_over25_rate", # Stats "home_avg_possession", "away_avg_possession", "home_avg_shots_on_target", "away_avg_shots_on_target", "home_shot_conversion", "away_shot_conversion", # Odds (Implicit market wisdom) "odds_ms_h", "odds_ms_d", "odds_ms_a", "implied_home", "implied_draw", "implied_away", "odds_ht_ms_h", "odds_ht_ms_d", "odds_ht_ms_a", "odds_ou05_o", "odds_ou05_u", "odds_ou15_o", "odds_ou15_u", "odds_ou25_o", "odds_ou25_u", "odds_ou35_o", "odds_ou35_u", "odds_ht_ou05_o", "odds_ht_ou05_u", "odds_ht_ou15_o", "odds_ht_ou15_u", "odds_btts_y", "odds_btts_n", # League/Context "league_avg_goals", "league_zero_goal_rate", "home_xga", "away_xga", # Upset features "upset_atmosphere", "upset_motivation", "upset_fatigue", "upset_potential", # Referee features "referee_home_bias", "referee_avg_goals", "referee_cards_total", "referee_avg_yellow", "referee_experience", # Momentum features "home_momentum_score", "away_momentum_score", "momentum_diff", ] class FeatureAdapter: """ Adapter to convert V20 context into XGBoost-compatible features. """ def __init__(self) -> None: self.conn: PgConnection | None = None self._connect_db() self.league_stats_cache: dict[str, dict[str, float]] = {} def _connect_db(self) -> None: try: # FeatureAdapter uses DB only for optional league stats enrichment. # Keep startup non-blocking when DB/tunnel is unavailable. if not os.getenv("DATABASE_URL", "").strip(): return self.conn = psycopg2.connect(get_clean_dsn()) except Exception as e: print(f"⚠️ FeatureAdapter DB connection failed: {e}") def get_features(self, ctx: Any) -> pd.DataFrame: """ Construct feature vector from CalculationContext. Returns a DataFrame with 1 row and correct columns. """ raw = ctx.team_pred.raw_features odds = ctx.odds_data or {} upset_features = getattr(ctx, "upset_features", {}) or {} momentum_features = getattr(ctx, "momentum_features", {}) or {} referee_features = getattr(ctx, "referee_features", {}) or {} # 1. Odds Features ms_h = float(odds.get("ms_h") or 0) ms_d = float(odds.get("ms_d") or 0) ms_a = float(odds.get("ms_a") or 0) implied_home, implied_draw, implied_away = 0.33, 0.33, 0.33 if ms_h > 0 and ms_d > 0 and ms_a > 0: raw_sum = 1/ms_h + 1/ms_d + 1/ms_a implied_home = (1/ms_h) / raw_sum implied_draw = (1/ms_d) / raw_sum implied_away = (1/ms_a) / raw_sum # 2. League Features # Using ctx.league_id if available, or just defaults league_stats = self._get_league_stats(ctx.league_id) # 3. Assemble Dictionary row = { # ELO (Explicit float casting) "home_overall_elo": float(raw.get("home_overall_elo") or 1500), "away_overall_elo": float(raw.get("away_overall_elo") or 1500), "elo_diff": float(raw.get("elo_diff") or 0), "home_home_elo": float(raw.get("home_home_elo") or 1500), "away_away_elo": float(raw.get("away_away_elo") or 1500), "form_elo_diff": float(raw.get("form_elo_diff") or 0), # Form (Explicit float casting) "home_goals_avg": float(raw.get("home_goals_avg") or 1.3), "home_conceded_avg": float(raw.get("home_conceded_avg") or 1.2), "away_goals_avg": float(raw.get("away_goals_avg") or 1.2), "away_conceded_avg": float(raw.get("away_conceded_avg") or 1.4), "home_clean_sheet_rate": float(raw.get("home_clean_sheet_rate") or 0.2), "away_clean_sheet_rate": float(raw.get("away_clean_sheet_rate") or 0.2), "home_scoring_rate": float(raw.get("home_scoring_rate") or 0.8), "away_scoring_rate": float(raw.get("away_scoring_rate") or 0.8), "home_winning_streak": float(raw.get("home_winning_streak") or 0), "away_winning_streak": float(raw.get("away_winning_streak") or 0), # H2H (Explicit float casting) "h2h_home_win_rate": float(raw.get("h2h_home_win_rate") or 0.33), "h2h_draw_rate": float(raw.get("h2h_draw_rate") or 0.33), "h2h_avg_goals": float(raw.get("h2h_avg_goals") or 2.5), "h2h_btts_rate": float(raw.get("h2h_btts_rate") or 0.5), "h2h_over25_rate": float(raw.get("h2h_over25_rate") or 0.5), # Stats (Explicit float casting to avoid XGBoost 'object' error) "home_avg_possession": float(raw.get("home_avg_possession") or 0.5), "away_avg_possession": float(raw.get("away_avg_possession") or 0.5), "home_avg_shots_on_target": float(raw.get("home_avg_shots_on_target") or 4.0), "away_avg_shots_on_target": float(raw.get("away_avg_shots_on_target") or 3.5), "home_shot_conversion": float(raw.get("home_shot_conversion") or 0.1), "away_shot_conversion": float(raw.get("away_shot_conversion") or 0.1), # Odds "odds_ms_h": ms_h, "odds_ms_d": ms_d, "odds_ms_a": ms_a, "implied_home": implied_home, "implied_draw": implied_draw, "implied_away": implied_away, "odds_ht_ms_h": float(odds.get("ht_ms_h") or 0.0), "odds_ht_ms_d": float(odds.get("ht_ms_d") or 0.0), "odds_ht_ms_a": float(odds.get("ht_ms_a") or 0.0), "odds_ou05_o": float(odds.get("ou05_o") or 0.0), "odds_ou05_u": float(odds.get("ou05_u") or 0.0), "odds_ou15_o": float(odds.get("ou15_o") or 0.0), "odds_ou15_u": float(odds.get("ou15_u") or 0.0), "odds_ou25_o": float(odds.get("ou25_o") or 0.0), "odds_ou25_u": float(odds.get("ou25_u") or 0.0), "odds_ou35_o": float(odds.get("ou35_o") or 0.0), "odds_ou35_u": float(odds.get("ou35_u") or 0.0), "odds_ht_ou05_o": float(odds.get("ht_ou05_o") or 0.0), "odds_ht_ou05_u": float(odds.get("ht_ou05_u") or 0.0), "odds_ht_ou15_o": float(odds.get("ht_ou15_o") or 0.0), "odds_ht_ou15_u": float(odds.get("ht_ou15_u") or 0.0), "odds_btts_y": float(odds.get("btts_y") or 0.0), "odds_btts_n": float(odds.get("btts_n") or 0.0), # League/Def "league_avg_goals": float(league_stats.get("avg_goals") or 2.7), "league_zero_goal_rate": float(league_stats.get("zero_rate") or 0.07), "home_xga": float(raw.get("home_xga") or 1.2), "away_xga": float(raw.get("away_xga") or 1.4), # Upset features (default values - computed separately in upset_engine_v2) "upset_atmosphere": float(raw.get("upset_atmosphere") or 0.0), "upset_motivation": float(raw.get("upset_motivation") or 0.0), "upset_fatigue": float(raw.get("upset_fatigue") or 0.0), "upset_potential": float(raw.get("upset_potential") or 0.0), # Referee features (default values) "referee_home_bias": float(raw.get("referee_home_bias") or 0.0), "referee_avg_goals": float(raw.get("referee_avg_goals") or 2.5), "referee_cards_total": float(raw.get("referee_cards_total") or 4.0), "referee_avg_yellow": float(raw.get("referee_avg_yellow") or 3.0), "referee_experience": float(raw.get("referee_experience") or 0), # Momentum features (default values) "home_momentum_score": float(raw.get("home_momentum_score") or 0.0), "away_momentum_score": float(raw.get("away_momentum_score") or 0.0), "momentum_diff": float(raw.get("momentum_diff") or 0.0), } # Return as DataFrame (cols sorted by FEATURES list to ensure alignment) df = pd.DataFrame([row], columns=FEATURES) return df def _get_league_stats(self, league_id: str | None) -> dict[str, float]: """Get cached league stats or default.""" if not league_id: return {"avg_goals": 2.7, "zero_rate": 0.07} if league_id in self.league_stats_cache: return self.league_stats_cache[league_id] if self.conn: try: with self.conn.cursor() as cur: cur.execute(""" SELECT AVG(score_home + score_away), AVG(CASE WHEN score_home=0 AND score_away=0 THEN 1.0 ELSE 0.0 END) FROM matches WHERE league_id = %s AND status = 'FT' AND mst_utc > EXTRACT(EPOCH FROM NOW() - INTERVAL '1 year') """, (league_id,)) res = cur.fetchone() if res and res[0]: stats = { "avg_goals": float(res[0]), "zero_rate": float(res[1]) } self.league_stats_cache[league_id] = stats return stats except Exception: pass # Default fallback return {"avg_goals": 2.7, "zero_rate": 0.07} # Singleton _adapter: FeatureAdapter | None = None def get_feature_adapter() -> FeatureAdapter: global _adapter if _adapter is None: _adapter = FeatureAdapter() return _adapter