""" V27 Rolling Window Feature Calculator ====================================== Computes rolling averages over 5/10/20 match windows, with home/away splits and trend detection. """ from __future__ import annotations from typing import Dict, List, Tuple import math def calc_rolling_features( team_matches: List[Tuple], # [(mst, is_home, team_goals, opp_goals, opp_id), ...] before_date: int, team_is_home: bool, ) -> Dict[str, float]: """Calculate rolling window features for a team before a given date.""" valid = [m for m in team_matches if m[0] < before_date] defaults = { "rolling5_goals_avg": 1.3, "rolling5_conceded_avg": 1.2, "rolling10_goals_avg": 1.3, "rolling10_conceded_avg": 1.2, "rolling20_goals_avg": 1.3, "rolling20_conceded_avg": 1.2, "rolling5_clean_sheets": 0.25, "venue_goals_avg": 1.3, "venue_conceded_avg": 1.2, "goal_trend": 0.0, } if len(valid) < 3: return defaults result = {} for window in [5, 10, 20]: recent = valid[-window:] if len(valid) >= window else valid n = len(recent) g_sum = sum(m[2] for m in recent) c_sum = sum(m[3] for m in recent) result[f"rolling{window}_goals_avg"] = g_sum / n result[f"rolling{window}_conceded_avg"] = c_sum / n # Clean sheet rate (last 5) r5 = valid[-5:] if len(valid) >= 5 else valid result["rolling5_clean_sheets"] = sum(1 for m in r5 if m[3] == 0) / len(r5) # Venue-specific (home-only or away-only) venue_matches = [m for m in valid if m[1] == team_is_home] if venue_matches: vm = venue_matches[-10:] if len(venue_matches) >= 10 else venue_matches result["venue_goals_avg"] = sum(m[2] for m in vm) / len(vm) result["venue_conceded_avg"] = sum(m[3] for m in vm) / len(vm) else: result["venue_goals_avg"] = defaults["venue_goals_avg"] result["venue_conceded_avg"] = defaults["venue_conceded_avg"] # Goal trend: compare last 3 vs previous 3 if len(valid) >= 6: last3 = sum(m[2] for m in valid[-3:]) / 3 prev3 = sum(m[2] for m in valid[-6:-3]) / 3 result["goal_trend"] = last3 - prev3 else: result["goal_trend"] = 0.0 return result def calc_league_quality( all_matches: List[Tuple], # all FT matches in this league ) -> Dict[str, float]: """Calculate league-level quality features.""" defaults = { "league_home_win_rate": 0.45, "league_draw_rate": 0.25, "league_btts_rate": 0.50, "league_ou25_rate": 0.50, "league_reliability_score": 0.50, } if len(all_matches) < 20: return defaults n = len(all_matches) home_wins = sum(1 for m in all_matches if m[2] > m[3]) draws = sum(1 for m in all_matches if m[2] == m[3]) btts = sum(1 for m in all_matches if m[2] > 0 and m[3] > 0) ou25 = sum(1 for m in all_matches if (m[2] + m[3]) > 2.5) hw_rate = home_wins / n dr_rate = draws / n btts_rate = btts / n ou25_rate = ou25 / n # Reliability: leagues closer to averages are more predictable predictability = 1.0 - abs(hw_rate - 0.45) - abs(dr_rate - 0.27) * 0.5 reliability = max(0.2, min(0.95, predictability)) return { "league_home_win_rate": round(hw_rate, 4), "league_draw_rate": round(dr_rate, 4), "league_btts_rate": round(btts_rate, 4), "league_ou25_rate": round(ou25_rate, 4), "league_reliability_score": round(reliability, 4), } def calc_time_features( team_matches: List[Tuple], match_mst: int, ) -> Dict[str, float]: """Calculate time-based features.""" from datetime import datetime # Days since last match valid = [m for m in team_matches if m[0] < match_mst] if valid: last_mst = valid[-1][0] days_rest = (match_mst - last_mst) / 86_400_000 # ms to days days_rest = min(days_rest, 60.0) # cap at 60 days else: days_rest = 14.0 # Month and season flags try: dt = datetime.utcfromtimestamp(match_mst / 1000) month = dt.month is_season_start = 1.0 if month in (7, 8) else 0.0 is_season_end = 1.0 if month in (5, 6) else 0.0 except Exception: month = 6 is_season_start = 0.0 is_season_end = 0.0 return { "days_rest": round(days_rest, 2), "match_month": month, "is_season_start": is_season_start, "is_season_end": is_season_end, } def calc_advanced_h2h( team_matches: List[Tuple], home_id: int, away_id: int, before_date: int, ) -> Dict[str, float]: """Calculate advanced H2H features.""" defaults = { "h2h_home_goals_avg": 1.3, "h2h_away_goals_avg": 1.1, "h2h_recent_trend": 0.0, "h2h_venue_advantage": 0.0, } h2h = [m for m in team_matches if m[4] == away_id and m[0] < before_date] if not h2h: return defaults recent = h2h[-10:] home_goals_total = 0 away_goals_total = 0 venue_home_wins = 0 venue_total = 0 for mst, is_home, team_goals, opp_goals, _ in recent: if is_home: home_goals_total += team_goals away_goals_total += opp_goals venue_total += 1 if team_goals > opp_goals: venue_home_wins += 1 else: home_goals_total += opp_goals away_goals_total += team_goals n = len(recent) result = { "h2h_home_goals_avg": home_goals_total / n, "h2h_away_goals_avg": away_goals_total / n, "h2h_venue_advantage": venue_home_wins / venue_total if venue_total > 0 else 0.5, } # Recent trend: last 3 vs overall if len(h2h) >= 4: last3_pts = sum( 1.0 if m[2] > m[3] else (0.5 if m[2] == m[3] else 0.0) for m in h2h[-3:] ) / 3 overall_pts = sum( 1.0 if m[2] > m[3] else (0.5 if m[2] == m[3] else 0.0) for m in h2h ) / len(h2h) result["h2h_recent_trend"] = round(last3_pts - overall_pts, 4) else: result["h2h_recent_trend"] = 0.0 return result def calc_strength_diff( home_form: Dict[str, float], away_form: Dict[str, float], home_elo: Dict[str, float], away_elo: Dict[str, float], home_momentum: float, away_momentum: float, upset_potential: float, ) -> Dict[str, float]: """Calculate strength differential features.""" # Attack vs Defense mismatches h_attack = home_form.get("goals_avg", 1.3) a_defense = away_form.get("conceded_avg", 1.2) a_attack = away_form.get("goals_avg", 1.3) h_defense = home_form.get("conceded_avg", 1.2) atk_def_home = h_attack - a_defense # positive = home attack > away defense atk_def_away = a_attack - h_defense # XG diff approximation xg_diff = (h_attack + a_defense) / 2 - (a_attack + h_defense) / 2 # Form × Momentum interaction form_mom = (home_momentum - away_momentum) * ( home_form.get("scoring_rate", 0.75) - away_form.get("scoring_rate", 0.75) ) # ELO-Form consistency elo_diff = home_elo.get("overall", 1500) - away_elo.get("overall", 1500) form_diff = h_attack - a_attack elo_form_consistency = 1.0 if (elo_diff > 0 and form_diff > 0) or (elo_diff < 0 and form_diff < 0) else 0.0 # Upset × ELO gap elo_gap = abs(elo_diff) upset_x_elo = upset_potential * (elo_gap / 400.0) return { "attack_vs_defense_home": round(atk_def_home, 4), "attack_vs_defense_away": round(atk_def_away, 4), "xg_diff": round(xg_diff, 4), "form_momentum_interaction": round(form_mom, 4), "elo_form_consistency": elo_form_consistency, "upset_x_elo_gap": round(upset_x_elo, 4), }