This commit is contained in:
Executable
+256
@@ -0,0 +1,256 @@
|
||||
"""
|
||||
Feature Adapter for XGBoost Inference
|
||||
=====================================
|
||||
Bridges the gap between V20 Engine outputs (CalculationContext) and XGBoost Models.
|
||||
Constructs the exact 44-feature vector used in training.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
import psycopg2
|
||||
from psycopg2.extensions import connection as PgConnection
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
from data.db import get_clean_dsn
|
||||
|
||||
# Feature definitions (Must match train_xgboost_markets.py)
|
||||
# NOTE: 68 features - matching the trained XGBoost models
|
||||
FEATURES = [
|
||||
# ELO
|
||||
"home_overall_elo", "away_overall_elo", "elo_diff",
|
||||
"home_home_elo", "away_away_elo", "form_elo_diff",
|
||||
|
||||
# Form
|
||||
"home_goals_avg", "home_conceded_avg",
|
||||
"away_goals_avg", "away_conceded_avg",
|
||||
"home_clean_sheet_rate", "away_clean_sheet_rate",
|
||||
"home_scoring_rate", "away_scoring_rate",
|
||||
"home_winning_streak", "away_winning_streak",
|
||||
|
||||
# H2H
|
||||
"h2h_home_win_rate", "h2h_draw_rate",
|
||||
"h2h_avg_goals", "h2h_btts_rate", "h2h_over25_rate",
|
||||
|
||||
# Stats
|
||||
"home_avg_possession", "away_avg_possession",
|
||||
"home_avg_shots_on_target", "away_avg_shots_on_target",
|
||||
"home_shot_conversion", "away_shot_conversion",
|
||||
|
||||
# Odds (Implicit market wisdom)
|
||||
"odds_ms_h", "odds_ms_d", "odds_ms_a",
|
||||
"implied_home", "implied_draw", "implied_away",
|
||||
|
||||
"odds_ht_ms_h", "odds_ht_ms_d", "odds_ht_ms_a",
|
||||
|
||||
"odds_ou05_o", "odds_ou05_u",
|
||||
"odds_ou15_o", "odds_ou15_u",
|
||||
"odds_ou25_o", "odds_ou25_u",
|
||||
"odds_ou35_o", "odds_ou35_u",
|
||||
|
||||
"odds_ht_ou05_o", "odds_ht_ou05_u",
|
||||
"odds_ht_ou15_o", "odds_ht_ou15_u",
|
||||
|
||||
"odds_btts_y", "odds_btts_n",
|
||||
|
||||
# League/Context
|
||||
"league_avg_goals", "league_zero_goal_rate",
|
||||
"home_xga", "away_xga",
|
||||
|
||||
# Upset features
|
||||
"upset_atmosphere", "upset_motivation", "upset_fatigue", "upset_potential",
|
||||
|
||||
# Referee features
|
||||
"referee_home_bias", "referee_avg_goals", "referee_cards_total",
|
||||
"referee_avg_yellow", "referee_experience",
|
||||
|
||||
# Momentum features
|
||||
"home_momentum_score", "away_momentum_score", "momentum_diff",
|
||||
]
|
||||
|
||||
class FeatureAdapter:
|
||||
"""
|
||||
Adapter to convert V20 context into XGBoost-compatible features.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.conn: PgConnection | None = None
|
||||
self._connect_db()
|
||||
self.league_stats_cache: dict[str, dict[str, float]] = {}
|
||||
|
||||
def _connect_db(self) -> None:
|
||||
try:
|
||||
# FeatureAdapter uses DB only for optional league stats enrichment.
|
||||
# Keep startup non-blocking when DB/tunnel is unavailable.
|
||||
if not os.getenv("DATABASE_URL", "").strip():
|
||||
return
|
||||
self.conn = psycopg2.connect(get_clean_dsn())
|
||||
except Exception as e:
|
||||
print(f"⚠️ FeatureAdapter DB connection failed: {e}")
|
||||
|
||||
def get_features(self, ctx: Any) -> pd.DataFrame:
|
||||
"""
|
||||
Construct feature vector from CalculationContext.
|
||||
Returns a DataFrame with 1 row and correct columns.
|
||||
"""
|
||||
raw = ctx.team_pred.raw_features
|
||||
odds = ctx.odds_data or {}
|
||||
upset_features = getattr(ctx, "upset_features", {}) or {}
|
||||
momentum_features = getattr(ctx, "momentum_features", {}) or {}
|
||||
referee_features = getattr(ctx, "referee_features", {}) or {}
|
||||
|
||||
# 1. Odds Features
|
||||
ms_h = float(odds.get("ms_h") or 0)
|
||||
ms_d = float(odds.get("ms_d") or 0)
|
||||
ms_a = float(odds.get("ms_a") or 0)
|
||||
|
||||
implied_home, implied_draw, implied_away = 0.33, 0.33, 0.33
|
||||
if ms_h > 0 and ms_d > 0 and ms_a > 0:
|
||||
raw_sum = 1/ms_h + 1/ms_d + 1/ms_a
|
||||
implied_home = (1/ms_h) / raw_sum
|
||||
implied_draw = (1/ms_d) / raw_sum
|
||||
implied_away = (1/ms_a) / raw_sum
|
||||
|
||||
# 2. League Features
|
||||
# Using ctx.league_id if available, or just defaults
|
||||
league_stats = self._get_league_stats(ctx.league_id)
|
||||
|
||||
# 3. Assemble Dictionary
|
||||
row = {
|
||||
# ELO (Explicit float casting)
|
||||
"home_overall_elo": float(raw.get("home_overall_elo") or 1500),
|
||||
"away_overall_elo": float(raw.get("away_overall_elo") or 1500),
|
||||
"elo_diff": float(raw.get("elo_diff") or 0),
|
||||
"home_home_elo": float(raw.get("home_home_elo") or 1500),
|
||||
"away_away_elo": float(raw.get("away_away_elo") or 1500),
|
||||
"form_elo_diff": float(raw.get("form_elo_diff") or 0),
|
||||
|
||||
# Form (Explicit float casting)
|
||||
"home_goals_avg": float(raw.get("home_goals_avg") or 1.3),
|
||||
"home_conceded_avg": float(raw.get("home_conceded_avg") or 1.2),
|
||||
"away_goals_avg": float(raw.get("away_goals_avg") or 1.2),
|
||||
"away_conceded_avg": float(raw.get("away_conceded_avg") or 1.4),
|
||||
"home_clean_sheet_rate": float(raw.get("home_clean_sheet_rate") or 0.2),
|
||||
"away_clean_sheet_rate": float(raw.get("away_clean_sheet_rate") or 0.2),
|
||||
"home_scoring_rate": float(raw.get("home_scoring_rate") or 0.8),
|
||||
"away_scoring_rate": float(raw.get("away_scoring_rate") or 0.8),
|
||||
"home_winning_streak": float(raw.get("home_winning_streak") or 0),
|
||||
"away_winning_streak": float(raw.get("away_winning_streak") or 0),
|
||||
|
||||
# H2H (Explicit float casting)
|
||||
"h2h_home_win_rate": float(raw.get("h2h_home_win_rate") or 0.33),
|
||||
"h2h_draw_rate": float(raw.get("h2h_draw_rate") or 0.33),
|
||||
"h2h_avg_goals": float(raw.get("h2h_avg_goals") or 2.5),
|
||||
"h2h_btts_rate": float(raw.get("h2h_btts_rate") or 0.5),
|
||||
"h2h_over25_rate": float(raw.get("h2h_over25_rate") or 0.5),
|
||||
|
||||
# Stats (Explicit float casting to avoid XGBoost 'object' error)
|
||||
"home_avg_possession": float(raw.get("home_avg_possession") or 0.5),
|
||||
"away_avg_possession": float(raw.get("away_avg_possession") or 0.5),
|
||||
"home_avg_shots_on_target": float(raw.get("home_avg_shots_on_target") or 4.0),
|
||||
"away_avg_shots_on_target": float(raw.get("away_avg_shots_on_target") or 3.5),
|
||||
"home_shot_conversion": float(raw.get("home_shot_conversion") or 0.1),
|
||||
"away_shot_conversion": float(raw.get("away_shot_conversion") or 0.1),
|
||||
|
||||
# Odds
|
||||
"odds_ms_h": ms_h,
|
||||
"odds_ms_d": ms_d,
|
||||
"odds_ms_a": ms_a,
|
||||
"implied_home": implied_home,
|
||||
"implied_draw": implied_draw,
|
||||
"implied_away": implied_away,
|
||||
|
||||
"odds_ht_ms_h": float(odds.get("ht_ms_h") or 0.0),
|
||||
"odds_ht_ms_d": float(odds.get("ht_ms_d") or 0.0),
|
||||
"odds_ht_ms_a": float(odds.get("ht_ms_a") or 0.0),
|
||||
|
||||
"odds_ou05_o": float(odds.get("ou05_o") or 0.0),
|
||||
"odds_ou05_u": float(odds.get("ou05_u") or 0.0),
|
||||
"odds_ou15_o": float(odds.get("ou15_o") or 0.0),
|
||||
"odds_ou15_u": float(odds.get("ou15_u") or 0.0),
|
||||
"odds_ou25_o": float(odds.get("ou25_o") or 0.0),
|
||||
"odds_ou25_u": float(odds.get("ou25_u") or 0.0),
|
||||
"odds_ou35_o": float(odds.get("ou35_o") or 0.0),
|
||||
"odds_ou35_u": float(odds.get("ou35_u") or 0.0),
|
||||
|
||||
"odds_ht_ou05_o": float(odds.get("ht_ou05_o") or 0.0),
|
||||
"odds_ht_ou05_u": float(odds.get("ht_ou05_u") or 0.0),
|
||||
"odds_ht_ou15_o": float(odds.get("ht_ou15_o") or 0.0),
|
||||
"odds_ht_ou15_u": float(odds.get("ht_ou15_u") or 0.0),
|
||||
|
||||
"odds_btts_y": float(odds.get("btts_y") or 0.0),
|
||||
"odds_btts_n": float(odds.get("btts_n") or 0.0),
|
||||
|
||||
# League/Def
|
||||
"league_avg_goals": float(league_stats.get("avg_goals") or 2.7),
|
||||
"league_zero_goal_rate": float(league_stats.get("zero_rate") or 0.07),
|
||||
"home_xga": float(raw.get("home_xga") or 1.2),
|
||||
"away_xga": float(raw.get("away_xga") or 1.4),
|
||||
|
||||
# Upset features (default values - computed separately in upset_engine_v2)
|
||||
"upset_atmosphere": float(raw.get("upset_atmosphere") or 0.0),
|
||||
"upset_motivation": float(raw.get("upset_motivation") or 0.0),
|
||||
"upset_fatigue": float(raw.get("upset_fatigue") or 0.0),
|
||||
"upset_potential": float(raw.get("upset_potential") or 0.0),
|
||||
|
||||
# Referee features (default values)
|
||||
"referee_home_bias": float(raw.get("referee_home_bias") or 0.0),
|
||||
"referee_avg_goals": float(raw.get("referee_avg_goals") or 2.5),
|
||||
"referee_cards_total": float(raw.get("referee_cards_total") or 4.0),
|
||||
"referee_avg_yellow": float(raw.get("referee_avg_yellow") or 3.0),
|
||||
"referee_experience": float(raw.get("referee_experience") or 0),
|
||||
|
||||
# Momentum features (default values)
|
||||
"home_momentum_score": float(raw.get("home_momentum_score") or 0.0),
|
||||
"away_momentum_score": float(raw.get("away_momentum_score") or 0.0),
|
||||
"momentum_diff": float(raw.get("momentum_diff") or 0.0),
|
||||
}
|
||||
|
||||
# Return as DataFrame (cols sorted by FEATURES list to ensure alignment)
|
||||
df = pd.DataFrame([row], columns=FEATURES)
|
||||
return df
|
||||
|
||||
def _get_league_stats(self, league_id: str | None) -> dict[str, float]:
|
||||
"""Get cached league stats or default."""
|
||||
if not league_id:
|
||||
return {"avg_goals": 2.7, "zero_rate": 0.07}
|
||||
|
||||
if league_id in self.league_stats_cache:
|
||||
return self.league_stats_cache[league_id]
|
||||
|
||||
if self.conn:
|
||||
try:
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute("""
|
||||
SELECT AVG(score_home + score_away),
|
||||
AVG(CASE WHEN score_home=0 AND score_away=0 THEN 1.0 ELSE 0.0 END)
|
||||
FROM matches
|
||||
WHERE league_id = %s AND status = 'FT'
|
||||
AND mst_utc > EXTRACT(EPOCH FROM NOW() - INTERVAL '1 year')
|
||||
""", (league_id,))
|
||||
res = cur.fetchone()
|
||||
if res and res[0]:
|
||||
stats = {
|
||||
"avg_goals": float(res[0]),
|
||||
"zero_rate": float(res[1])
|
||||
}
|
||||
self.league_stats_cache[league_id] = stats
|
||||
return stats
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Default fallback
|
||||
return {"avg_goals": 2.7, "zero_rate": 0.07}
|
||||
|
||||
# Singleton
|
||||
_adapter: FeatureAdapter | None = None
|
||||
|
||||
|
||||
def get_feature_adapter() -> FeatureAdapter:
|
||||
global _adapter
|
||||
if _adapter is None:
|
||||
_adapter = FeatureAdapter()
|
||||
return _adapter
|
||||
Reference in New Issue
Block a user