"""Feature Builder Mixin — V25/V28 feature vector assembly. Auto-extracted mixin module — split from services/single_match_orchestrator.py. All methods here are composed into SingleMatchOrchestrator via inheritance. `self` attributes (self.dsn, self.enrichment, self.v25_predictor, etc.) are initialised in the main __init__. """ from __future__ import annotations import json import re import time import math import os import pickle from collections import defaultdict from typing import Any, Dict, List, Optional, Set, Tuple, overload import pandas as pd import numpy as np import psycopg2 from psycopg2.extras import RealDictCursor from data.db import get_clean_dsn from schemas.prediction import FullMatchPrediction from schemas.match_data import MatchData from models.v25_ensemble import V25Predictor, get_v25_predictor try: from models.v27_predictor import V27Predictor, compute_divergence, compute_value_edge except ImportError: class V27Predictor: # type: ignore[no-redef] def __init__(self): self.models = {} def load_models(self): return False def predict_all(self, features): return {} def compute_divergence(*args, **kwargs): return {} def compute_value_edge(*args, **kwargs): return {} from features.odds_band_analyzer import OddsBandAnalyzer try: from models.basketball_v25 import ( BasketballMatchPrediction, get_basketball_v25_predictor, ) except ImportError: BasketballMatchPrediction = Any # type: ignore[misc] def get_basketball_v25_predictor() -> Any: raise ImportError("Basketball predictor is not available") from core.engines.player_predictor import PlayerPrediction, get_player_predictor from features.upset_engine import get_upset_engine from services.feature_enrichment import FeatureEnrichmentService from services.betting_brain import BettingBrain from services.v26_shadow_engine import V26ShadowEngine, get_v26_shadow_engine from services.match_commentary import generate_match_commentary from utils.top_leagues import load_top_league_ids from utils.league_reliability import load_league_reliability from config.config_loader import build_threshold_dict, get_threshold_default from models.calibration import get_calibrator class FeatureBuilderMixin: def _build_v25_features(self, data: MatchData) -> Dict[str, float]: """ Build the single authoritative V25 pre-match feature vector. """ odds = self._sanitize_v25_odds(data.odds_data or {}) ms_h = float(odds.get('ms_h') or 0) ms_d = float(odds.get('ms_d') or 0) ms_a = float(odds.get('ms_a') or 0) # Implied probabilities (vig-normalised) implied_home, implied_draw, implied_away = 0.33, 0.33, 0.33 if ms_h > 0 and ms_d > 0 and ms_a > 0: raw_sum = 1 / ms_h + 1 / ms_d + 1 / ms_a implied_home = (1 / ms_h) / raw_sum implied_draw = (1 / ms_d) / raw_sum implied_away = (1 / ms_a) / raw_sum upset_potential = max( 0.0, min( 1.0, 1.0 - abs(implied_home - implied_away) + (implied_draw * 0.35), ), ) # All enrichment queries in a single DB connection home_elo, away_elo = 1500.0, 1500.0 home_venue_elo, away_venue_elo = 1500.0, 1500.0 home_form_elo_val, away_form_elo_val = 1500.0, 1500.0 enr = self.enrichment # Defaults — overridden by successful queries home_stats = dict(enr._DEFAULT_TEAM_STATS) away_stats = dict(enr._DEFAULT_TEAM_STATS) h2h = dict(enr._DEFAULT_H2H) home_form = dict(enr._DEFAULT_FORM) away_form = dict(enr._DEFAULT_FORM) ref = dict(enr._DEFAULT_REFEREE) league = dict(enr._DEFAULT_LEAGUE) home_momentum, away_momentum = 0.0, 0.0 home_rolling = dict(enr._DEFAULT_ROLLING) away_rolling = dict(enr._DEFAULT_ROLLING) home_venue = dict(enr._DEFAULT_VENUE) away_venue = dict(enr._DEFAULT_VENUE) home_rest, away_rest = 7.0, 7.0 odds_band_features = {} enrichment_failures = [] try: with psycopg2.connect(self.dsn) as conn: with conn.cursor(cursor_factory=RealDictCursor) as cur: # ELO try: cur.execute( "SELECT home_elo, away_elo, " " home_home_elo, away_away_elo, " " home_form_elo, away_form_elo " "FROM football_ai_features " "WHERE match_id = %s LIMIT 1", (data.match_id,), ) elo_row = cur.fetchone() if elo_row: home_elo = float(elo_row.get('home_elo') or 1500.0) away_elo = float(elo_row.get('away_elo') or 1500.0) home_venue_elo = float(elo_row.get('home_home_elo') or home_elo) away_venue_elo = float(elo_row.get('away_away_elo') or away_elo) home_form_elo_val = float(elo_row.get('home_form_elo') or home_elo) away_form_elo_val = float(elo_row.get('away_form_elo') or away_elo) else: cur.execute( "SELECT team_id, overall_elo, home_elo, away_elo, form_elo " "FROM team_elo_ratings WHERE team_id IN (%s, %s)", (data.home_team_id, data.away_team_id), ) by_team = {str(r.get("team_id")): r for r in cur.fetchall()} home_row = by_team.get(str(data.home_team_id)) away_row = by_team.get(str(data.away_team_id)) if home_row: home_elo = float(home_row.get("overall_elo") or 1500.0) home_venue_elo = float(home_row.get("home_elo") or home_elo) home_form_elo_val = float(home_row.get("form_elo") or home_elo) if away_row: away_elo = float(away_row.get("overall_elo") or 1500.0) away_venue_elo = float(away_row.get("away_elo") or away_elo) away_form_elo_val = float(away_row.get("form_elo") or away_elo) setattr(data, "feature_source", "football_ai_features" if elo_row else "live_prematch_enrichment") # Staleness check: both teams at exact 1500 → ELO was never computed if home_elo == 1500.0 and away_elo == 1500.0: enrichment_failures.append("elo_stale:both_teams_at_default_1500") except Exception as e: enrichment_failures.append(f"elo:{e}") setattr(data, "feature_source", "fallback_defaults") # Team stats try: home_stats = enr.compute_team_stats(cur, data.home_team_id, data.match_date_ms) away_stats = enr.compute_team_stats(cur, data.away_team_id, data.match_date_ms) except Exception as e: enrichment_failures.append(f"team_stats:{e}") # H2H try: h2h = enr.compute_h2h(cur, data.home_team_id, data.away_team_id, data.match_date_ms) except Exception as e: enrichment_failures.append(f"h2h:{e}") # Form try: home_form = enr.compute_form_streaks(cur, data.home_team_id, data.match_date_ms) away_form = enr.compute_form_streaks(cur, data.away_team_id, data.match_date_ms) except Exception as e: enrichment_failures.append(f"form:{e}") # Referee try: ref = enr.compute_referee_stats(cur, data.referee_name, data.match_date_ms) except Exception as e: enrichment_failures.append(f"referee:{e}") # League try: league = enr.compute_league_averages(cur, data.league_id, data.match_date_ms) except Exception as e: enrichment_failures.append(f"league:{e}") # Momentum try: home_momentum = enr.compute_momentum(cur, data.home_team_id, data.match_date_ms) away_momentum = enr.compute_momentum(cur, data.away_team_id, data.match_date_ms) except Exception as e: enrichment_failures.append(f"momentum:{e}") # V27 Rolling + Venue + Rest try: home_rolling = enr.compute_rolling_stats(cur, data.home_team_id, data.match_date_ms) away_rolling = enr.compute_rolling_stats(cur, data.away_team_id, data.match_date_ms) home_venue = enr.compute_venue_stats(cur, data.home_team_id, data.match_date_ms, is_home=True) away_venue = enr.compute_venue_stats(cur, data.away_team_id, data.match_date_ms, is_home=False) home_rest = enr.compute_days_rest(cur, data.home_team_id, data.match_date_ms) away_rest = enr.compute_days_rest(cur, data.away_team_id, data.match_date_ms) except Exception as e: enrichment_failures.append(f"rolling/venue:{e}") # V28 Odds-Band try: odds_band_features = self.odds_band_analyzer.compute_all( cur=cur, home_team_id=data.home_team_id, away_team_id=data.away_team_id, league_id=data.league_id, odds=odds, before_ts=data.match_date_ms, referee_name=data.referee_name, ) except Exception as e: enrichment_failures.append(f"odds_band:{e}") except Exception as e: enrichment_failures.append(f"db_connection:{e}") setattr(data, "feature_source", "fallback_defaults") setattr(data, "odds_band_features", odds_band_features) if enrichment_failures: print(f"⚠️ Enrichment partial failures for {data.match_id}: {', '.join(enrichment_failures)}") # ── Cup game detection (used by upset engine + elo dampening below) ── _league_name_lower = (getattr(data, 'league_name', '') or '').lower() _cup_keywords = ("kupa", "cup", "coupe", "copa", "coppa", "pokal", "trophy", "shield", "ziraat", "süper kupa", "super cup", "beker", "taça", "taca") _is_cup_match = any(kw in _league_name_lower for kw in _cup_keywords) # ── League size hint: top European leagues 18-20 teams, lower 16-24 ── # We don't have a per-league team count, so fall back to 20 (standard). # When standings infra lands this should pull from seasons table. _league_total_teams = 20 # Upset engine features upset_atmosphere, upset_motivation, upset_fatigue = 0.0, 0.0, 0.0 try: upset_engine = get_upset_engine() # Use the real position estimates from data_loader; fall back to mid- # table (10) only when the loader couldn't compute one. Hardcoding 10 # for every team made motivation_score collapse to 0 for everyone. _home_pos = getattr(data, 'home_position', None) _away_pos = getattr(data, 'away_position', None) if _home_pos is None or _home_pos <= 0: _home_pos = 10 if _away_pos is None or _away_pos <= 0: _away_pos = 10 upset_feats = upset_engine.get_features( home_team_name=getattr(data, 'home_team_name', '') or '', home_team_id=data.home_team_id, away_team_name=getattr(data, 'away_team_name', '') or '', league_name=getattr(data, 'league_name', '') or '', home_position=_home_pos, away_position=_away_pos, match_date_ms=data.match_date_ms, is_cup_match=_is_cup_match, home_days_rest=int(home_rest), away_days_rest=int(away_rest), total_teams=_league_total_teams, ) upset_atmosphere = upset_feats.get('upset_atmosphere', 0.0) upset_motivation = upset_feats.get('upset_motivation', 0.0) upset_fatigue = upset_feats.get('upset_fatigue', 0.0) except Exception as e: print(f"⚠️ Upset engine failed: {e}") odds_presence = { 'odds_ms_h_present': 1.0 if ms_h > 1.01 else 0.0, 'odds_ms_d_present': 1.0 if ms_d > 1.01 else 0.0, 'odds_ms_a_present': 1.0 if ms_a > 1.01 else 0.0, 'odds_ht_ms_h_present': 1.0 if float(odds.get('ht_h') or 0) > 1.01 else 0.0, 'odds_ht_ms_d_present': 1.0 if float(odds.get('ht_d') or 0) > 1.01 else 0.0, 'odds_ht_ms_a_present': 1.0 if float(odds.get('ht_a') or 0) > 1.01 else 0.0, 'odds_ou05_o_present': 1.0 if float(odds.get('ou05_o') or 0) > 1.01 else 0.0, 'odds_ou05_u_present': 1.0 if float(odds.get('ou05_u') or 0) > 1.01 else 0.0, 'odds_ou15_o_present': 1.0 if float(odds.get('ou15_o') or 0) > 1.01 else 0.0, 'odds_ou15_u_present': 1.0 if float(odds.get('ou15_u') or 0) > 1.01 else 0.0, 'odds_ou25_o_present': 1.0 if float(odds.get('ou25_o') or 0) > 1.01 else 0.0, 'odds_ou25_u_present': 1.0 if float(odds.get('ou25_u') or 0) > 1.01 else 0.0, 'odds_ou35_o_present': 1.0 if float(odds.get('ou35_o') or 0) > 1.01 else 0.0, 'odds_ou35_u_present': 1.0 if float(odds.get('ou35_u') or 0) > 1.01 else 0.0, 'odds_ht_ou05_o_present': 1.0 if float(odds.get('ht_ou05_o') or 0) > 1.01 else 0.0, 'odds_ht_ou05_u_present': 1.0 if float(odds.get('ht_ou05_u') or 0) > 1.01 else 0.0, 'odds_ht_ou15_o_present': 1.0 if float(odds.get('ht_ou15_o') or 0) > 1.01 else 0.0, 'odds_ht_ou15_u_present': 1.0 if float(odds.get('ht_ou15_u') or 0) > 1.01 else 0.0, 'odds_btts_y_present': 1.0 if float(odds.get('btts_y') or 0) > 1.01 else 0.0, 'odds_btts_n_present': 1.0 if float(odds.get('btts_n') or 0) > 1.01 else 0.0, } # ── Calendar features (V27) ── import datetime match_dt = datetime.datetime.utcfromtimestamp(data.match_date_ms / 1000) match_month = match_dt.month is_season_start = 1.0 if match_month in (7, 8, 9) else 0.0 is_season_end = 1.0 if match_month in (5, 6) else 0.0 # ── Derived / Interaction features (V27) ── # Cup games: home ELO advantage is ~30% weaker (rotation, lower motivation) # Uses _is_cup_match computed earlier (before upset engine call). elo_diff = (home_elo - away_elo) * (0.70 if _is_cup_match else 1.0) form_elo_diff = home_form_elo_val - away_form_elo_val attack_vs_defense_home = data.home_goals_avg - data.away_conceded_avg attack_vs_defense_away = data.away_goals_avg - data.home_conceded_avg xga_home = data.home_conceded_avg xga_away = data.away_conceded_avg xg_diff = xga_home - xga_away mom_diff = home_momentum - away_momentum form_momentum_interaction = mom_diff * form_elo_diff / 1000.0 elo_form_consistency = 1.0 - abs(elo_diff - form_elo_diff) / max(abs(elo_diff), 100.0) upset_x_elo_gap = upset_potential * abs(elo_diff) / 500.0 return { # META (1) 'mst_utc': float(data.match_date_ms), # ELO (8) 'home_overall_elo': home_elo, 'away_overall_elo': away_elo, 'elo_diff': elo_diff, 'home_home_elo': home_venue_elo, 'away_away_elo': away_venue_elo, 'home_form_elo': home_form_elo_val, 'away_form_elo': away_form_elo_val, 'form_elo_diff': form_elo_diff, # Form (12) 'home_goals_avg': data.home_goals_avg, 'home_conceded_avg': data.home_conceded_avg, 'away_goals_avg': data.away_goals_avg, 'away_conceded_avg': data.away_conceded_avg, 'home_clean_sheet_rate': home_form['clean_sheet_rate'], 'away_clean_sheet_rate': away_form['clean_sheet_rate'], 'home_scoring_rate': home_form['scoring_rate'], 'away_scoring_rate': away_form['scoring_rate'], 'home_winning_streak': home_form['winning_streak'], 'away_winning_streak': away_form['winning_streak'], 'home_unbeaten_streak': home_form['unbeaten_streak'], 'away_unbeaten_streak': away_form['unbeaten_streak'], # H2H (10 — original 6 + V27 expanded 4) 'h2h_total_matches': h2h['total_matches'], 'h2h_home_win_rate': h2h['home_win_rate'], 'h2h_draw_rate': h2h['draw_rate'], 'h2h_avg_goals': h2h['avg_goals'], 'h2h_btts_rate': h2h['btts_rate'], 'h2h_over25_rate': h2h['over25_rate'], 'h2h_home_goals_avg': h2h['home_goals_avg'], 'h2h_away_goals_avg': h2h['away_goals_avg'], 'h2h_recent_trend': h2h['recent_trend'], 'h2h_venue_advantage': h2h['venue_advantage'], # Stats (8) 'home_avg_possession': home_stats['avg_possession'], 'away_avg_possession': away_stats['avg_possession'], 'home_avg_shots_on_target': home_stats['avg_shots_on_target'], 'away_avg_shots_on_target': away_stats['avg_shots_on_target'], 'home_shot_conversion': home_stats['shot_conversion'], 'away_shot_conversion': away_stats['shot_conversion'], 'home_avg_corners': home_stats['avg_corners'], 'away_avg_corners': away_stats['avg_corners'], # Odds (24) 'odds_ms_h': ms_h, 'odds_ms_d': ms_d, 'odds_ms_a': ms_a, 'implied_home': implied_home, 'implied_draw': implied_draw, 'implied_away': implied_away, 'odds_ht_ms_h': float(odds.get('ht_h') or 0), 'odds_ht_ms_d': float(odds.get('ht_d') or 0), 'odds_ht_ms_a': float(odds.get('ht_a') or 0), 'odds_ou05_o': float(odds.get('ou05_o') or 0), 'odds_ou05_u': float(odds.get('ou05_u') or 0), 'odds_ou15_o': float(odds.get('ou15_o') or 0), 'odds_ou15_u': float(odds.get('ou15_u') or 0), 'odds_ou25_o': float(odds.get('ou25_o') or 0), 'odds_ou25_u': float(odds.get('ou25_u') or 0), 'odds_ou35_o': float(odds.get('ou35_o') or 0), 'odds_ou35_u': float(odds.get('ou35_u') or 0), 'odds_ht_ou05_o': float(odds.get('ht_ou05_o') or 0), 'odds_ht_ou05_u': float(odds.get('ht_ou05_u') or 0), 'odds_ht_ou15_o': float(odds.get('ht_ou15_o') or 0), 'odds_ht_ou15_u': float(odds.get('ht_ou15_u') or 0), 'odds_btts_y': float(odds.get('btts_y') or 0), 'odds_btts_n': float(odds.get('btts_n') or 0), **odds_presence, # League (9 — original 2 + V27 expanded 5 + xga 2) 'home_xga': xga_home, 'away_xga': xga_away, 'league_avg_goals': league['avg_goals'], 'league_zero_goal_rate': league['zero_goal_rate'], 'league_home_win_rate': league['home_win_rate'], 'league_draw_rate': league['draw_rate'], 'league_btts_rate': league['btts_rate'], 'league_ou25_rate': league['ou25_rate'], 'league_reliability_score': league['reliability_score'], # Upset (4) 'upset_atmosphere': upset_atmosphere, 'upset_motivation': upset_motivation, 'upset_fatigue': upset_fatigue, 'upset_potential': upset_potential, # Referee (5) 'referee_home_bias': ref['home_bias'], 'referee_avg_goals': ref['avg_goals'], 'referee_cards_total': ref['cards_total'], 'referee_avg_yellow': ref['avg_yellow'], 'referee_experience': ref['experience'], # Momentum (3) 'home_momentum_score': home_momentum, 'away_momentum_score': away_momentum, 'momentum_diff': mom_diff, # ── V27 Rolling Stats (13) ── 'home_rolling5_goals': home_rolling['rolling5_goals'], 'home_rolling5_conceded': home_rolling['rolling5_conceded'], 'home_rolling10_goals': home_rolling['rolling10_goals'], 'home_rolling10_conceded': home_rolling['rolling10_conceded'], 'home_rolling20_goals': home_rolling['rolling20_goals'], 'home_rolling20_conceded': home_rolling['rolling20_conceded'], 'away_rolling5_goals': away_rolling['rolling5_goals'], 'away_rolling5_conceded': away_rolling['rolling5_conceded'], 'away_rolling10_goals': away_rolling['rolling10_goals'], 'away_rolling10_conceded': away_rolling['rolling10_conceded'], 'home_rolling5_cs': home_rolling['rolling5_cs'], 'away_rolling5_cs': away_rolling['rolling5_cs'], # ── V27 Venue Stats (4) ── 'home_venue_goals': home_venue['venue_goals'], 'home_venue_conceded': home_venue['venue_conceded'], 'away_venue_goals': away_venue['venue_goals'], 'away_venue_conceded': away_venue['venue_conceded'], # ── V27 Goal Trend (2) ── 'home_goal_trend': home_rolling['rolling5_goals'] - home_rolling['rolling10_goals'], 'away_goal_trend': away_rolling['rolling5_goals'] - away_rolling['rolling10_goals'], # ── V27 Calendar (4) ── 'home_days_rest': home_rest, 'away_days_rest': away_rest, 'match_month': float(match_month), 'is_season_start': is_season_start, 'is_season_end': is_season_end, # ── V27 Interaction (6) ── 'attack_vs_defense_home': attack_vs_defense_home, 'attack_vs_defense_away': attack_vs_defense_away, 'xg_diff': xg_diff, 'form_momentum_interaction': form_momentum_interaction, 'elo_form_consistency': elo_form_consistency, 'upset_x_elo_gap': upset_x_elo_gap, # Squad Features (9) — PlayerPredictorEngine **self._get_squad_features(data), # V28 Odds-Band Historical Performance Features **odds_band_features, } def _get_squad_features(self, data: MatchData) -> Dict[str, float]: """Non-fatal squad analysis with 12 player-level features.""" defaults = { 'home_squad_quality': 12.0, 'away_squad_quality': 12.0, 'squad_diff': 0.0, 'home_key_players': 3.0, 'away_key_players': 3.0, 'home_missing_impact': 0.0, 'away_missing_impact': 0.0, 'home_goals_form': 1.3, 'away_goals_form': 1.3, 'home_lineup_goals_per90': 0.0, 'away_lineup_goals_per90': 0.0, 'home_lineup_assists_per90': 0.0, 'away_lineup_assists_per90': 0.0, 'home_squad_continuity': 0.5, 'away_squad_continuity': 0.5, 'home_top_scorer_form': 0.0, 'away_top_scorer_form': 0.0, 'home_avg_player_exp': 0.0, 'away_avg_player_exp': 0.0, 'home_goals_diversity': 0.0, 'away_goals_diversity': 0.0, } try: engine = get_player_predictor() pred = engine.predict( match_id=data.match_id, home_team_id=data.home_team_id, away_team_id=data.away_team_id, home_lineup=data.home_lineup, away_lineup=data.away_lineup, sidelined_data=data.sidelined_data, ) result = { 'home_squad_quality': float(pred.home_squad_quality or 0.0), 'away_squad_quality': float(pred.away_squad_quality or 0.0), 'squad_diff': float(pred.squad_diff or 0.0), 'home_key_players': float(pred.home_key_players or 0), 'away_key_players': float(pred.away_key_players or 0), 'home_missing_impact': float(pred.home_missing_impact or 0.0), 'away_missing_impact': float(pred.away_missing_impact or 0.0), 'home_goals_form': float(pred.home_goals_form or 0.0), 'away_goals_form': float(pred.away_goals_form or 0.0), 'home_lineup_goals_per90': float(pred.home_lineup_goals_per90 or 0.0), 'away_lineup_goals_per90': float(pred.away_lineup_goals_per90 or 0.0), 'home_lineup_assists_per90': float(pred.home_lineup_assists_per90 or 0.0), 'away_lineup_assists_per90': float(pred.away_lineup_assists_per90 or 0.0), 'home_squad_continuity': float(pred.home_squad_continuity or 0.5), 'away_squad_continuity': float(pred.away_squad_continuity or 0.5), 'home_top_scorer_form': float(pred.home_top_scorer_form or 0), 'away_top_scorer_form': float(pred.away_top_scorer_form or 0), 'home_avg_player_exp': float(pred.home_avg_player_exp or 0.0), 'away_avg_player_exp': float(pred.away_avg_player_exp or 0.0), 'home_goals_diversity': float(pred.home_goals_diversity or 0.0), 'away_goals_diversity': float(pred.away_goals_diversity or 0.0), } for side in ('home', 'away'): sq = result[f'{side}_squad_quality'] if sq > 50 or sq < 0: print(f"🚨 SCALE MISMATCH: {side}_squad_quality={sq:.1f} " f"(expected 3-36). Check player_predictor formula!") return result except Exception as e: print(f"⚠️ Squad features failed: {e}") return defaults def _sanitize_v25_odds(self, odds_data: Dict[str, Any]) -> Dict[str, float]: sanitized: Dict[str, float] = {} for key in self.V25_ODDS_FEATURE_KEYS: sanitized[key] = self._real_market_odds(odds_data, key) for key in ("dc_1x", "dc_x2", "dc_12", "oe_odd", "oe_even", "cards_o", "cards_u", "hcap_h", "hcap_d", "hcap_a"): if key in odds_data: sanitized[key] = self._real_market_odds(odds_data, key) return sanitized