gg
This commit is contained in:
@@ -0,0 +1,243 @@
|
||||
"""
|
||||
V27 Rolling Window Feature Calculator
|
||||
======================================
|
||||
Computes rolling averages over 5/10/20 match windows,
|
||||
with home/away splits and trend detection.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
from typing import Dict, List, Tuple
|
||||
import math
|
||||
|
||||
|
||||
def calc_rolling_features(
|
||||
team_matches: List[Tuple], # [(mst, is_home, team_goals, opp_goals, opp_id), ...]
|
||||
before_date: int,
|
||||
team_is_home: bool,
|
||||
) -> Dict[str, float]:
|
||||
"""Calculate rolling window features for a team before a given date."""
|
||||
valid = [m for m in team_matches if m[0] < before_date]
|
||||
|
||||
defaults = {
|
||||
"rolling5_goals_avg": 1.3, "rolling5_conceded_avg": 1.2,
|
||||
"rolling10_goals_avg": 1.3, "rolling10_conceded_avg": 1.2,
|
||||
"rolling20_goals_avg": 1.3, "rolling20_conceded_avg": 1.2,
|
||||
"rolling5_clean_sheets": 0.25,
|
||||
"venue_goals_avg": 1.3, "venue_conceded_avg": 1.2,
|
||||
"goal_trend": 0.0,
|
||||
}
|
||||
|
||||
if len(valid) < 3:
|
||||
return defaults
|
||||
|
||||
result = {}
|
||||
|
||||
for window in [5, 10, 20]:
|
||||
recent = valid[-window:] if len(valid) >= window else valid
|
||||
n = len(recent)
|
||||
g_sum = sum(m[2] for m in recent)
|
||||
c_sum = sum(m[3] for m in recent)
|
||||
result[f"rolling{window}_goals_avg"] = g_sum / n
|
||||
result[f"rolling{window}_conceded_avg"] = c_sum / n
|
||||
|
||||
# Clean sheet rate (last 5)
|
||||
r5 = valid[-5:] if len(valid) >= 5 else valid
|
||||
result["rolling5_clean_sheets"] = sum(1 for m in r5 if m[3] == 0) / len(r5)
|
||||
|
||||
# Venue-specific (home-only or away-only)
|
||||
venue_matches = [m for m in valid if m[1] == team_is_home]
|
||||
if venue_matches:
|
||||
vm = venue_matches[-10:] if len(venue_matches) >= 10 else venue_matches
|
||||
result["venue_goals_avg"] = sum(m[2] for m in vm) / len(vm)
|
||||
result["venue_conceded_avg"] = sum(m[3] for m in vm) / len(vm)
|
||||
else:
|
||||
result["venue_goals_avg"] = defaults["venue_goals_avg"]
|
||||
result["venue_conceded_avg"] = defaults["venue_conceded_avg"]
|
||||
|
||||
# Goal trend: compare last 3 vs previous 3
|
||||
if len(valid) >= 6:
|
||||
last3 = sum(m[2] for m in valid[-3:]) / 3
|
||||
prev3 = sum(m[2] for m in valid[-6:-3]) / 3
|
||||
result["goal_trend"] = last3 - prev3
|
||||
else:
|
||||
result["goal_trend"] = 0.0
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def calc_league_quality(
|
||||
all_matches: List[Tuple], # all FT matches in this league
|
||||
) -> Dict[str, float]:
|
||||
"""Calculate league-level quality features."""
|
||||
defaults = {
|
||||
"league_home_win_rate": 0.45,
|
||||
"league_draw_rate": 0.25,
|
||||
"league_btts_rate": 0.50,
|
||||
"league_ou25_rate": 0.50,
|
||||
"league_reliability_score": 0.50,
|
||||
}
|
||||
|
||||
if len(all_matches) < 20:
|
||||
return defaults
|
||||
|
||||
n = len(all_matches)
|
||||
home_wins = sum(1 for m in all_matches if m[2] > m[3])
|
||||
draws = sum(1 for m in all_matches if m[2] == m[3])
|
||||
btts = sum(1 for m in all_matches if m[2] > 0 and m[3] > 0)
|
||||
ou25 = sum(1 for m in all_matches if (m[2] + m[3]) > 2.5)
|
||||
|
||||
hw_rate = home_wins / n
|
||||
dr_rate = draws / n
|
||||
btts_rate = btts / n
|
||||
ou25_rate = ou25 / n
|
||||
|
||||
# Reliability: leagues closer to averages are more predictable
|
||||
predictability = 1.0 - abs(hw_rate - 0.45) - abs(dr_rate - 0.27) * 0.5
|
||||
reliability = max(0.2, min(0.95, predictability))
|
||||
|
||||
return {
|
||||
"league_home_win_rate": round(hw_rate, 4),
|
||||
"league_draw_rate": round(dr_rate, 4),
|
||||
"league_btts_rate": round(btts_rate, 4),
|
||||
"league_ou25_rate": round(ou25_rate, 4),
|
||||
"league_reliability_score": round(reliability, 4),
|
||||
}
|
||||
|
||||
|
||||
def calc_time_features(
|
||||
team_matches: List[Tuple],
|
||||
match_mst: int,
|
||||
) -> Dict[str, float]:
|
||||
"""Calculate time-based features."""
|
||||
from datetime import datetime
|
||||
|
||||
# Days since last match
|
||||
valid = [m for m in team_matches if m[0] < match_mst]
|
||||
if valid:
|
||||
last_mst = valid[-1][0]
|
||||
days_rest = (match_mst - last_mst) / 86_400_000 # ms to days
|
||||
days_rest = min(days_rest, 60.0) # cap at 60 days
|
||||
else:
|
||||
days_rest = 14.0
|
||||
|
||||
# Month and season flags
|
||||
try:
|
||||
dt = datetime.utcfromtimestamp(match_mst / 1000)
|
||||
month = dt.month
|
||||
is_season_start = 1.0 if month in (7, 8) else 0.0
|
||||
is_season_end = 1.0 if month in (5, 6) else 0.0
|
||||
except Exception:
|
||||
month = 6
|
||||
is_season_start = 0.0
|
||||
is_season_end = 0.0
|
||||
|
||||
return {
|
||||
"days_rest": round(days_rest, 2),
|
||||
"match_month": month,
|
||||
"is_season_start": is_season_start,
|
||||
"is_season_end": is_season_end,
|
||||
}
|
||||
|
||||
|
||||
def calc_advanced_h2h(
|
||||
team_matches: List[Tuple],
|
||||
home_id: int,
|
||||
away_id: int,
|
||||
before_date: int,
|
||||
) -> Dict[str, float]:
|
||||
"""Calculate advanced H2H features."""
|
||||
defaults = {
|
||||
"h2h_home_goals_avg": 1.3,
|
||||
"h2h_away_goals_avg": 1.1,
|
||||
"h2h_recent_trend": 0.0,
|
||||
"h2h_venue_advantage": 0.0,
|
||||
}
|
||||
|
||||
h2h = [m for m in team_matches if m[4] == away_id and m[0] < before_date]
|
||||
if not h2h:
|
||||
return defaults
|
||||
|
||||
recent = h2h[-10:]
|
||||
home_goals_total = 0
|
||||
away_goals_total = 0
|
||||
venue_home_wins = 0
|
||||
venue_total = 0
|
||||
|
||||
for mst, is_home, team_goals, opp_goals, _ in recent:
|
||||
if is_home:
|
||||
home_goals_total += team_goals
|
||||
away_goals_total += opp_goals
|
||||
venue_total += 1
|
||||
if team_goals > opp_goals:
|
||||
venue_home_wins += 1
|
||||
else:
|
||||
home_goals_total += opp_goals
|
||||
away_goals_total += team_goals
|
||||
|
||||
n = len(recent)
|
||||
result = {
|
||||
"h2h_home_goals_avg": home_goals_total / n,
|
||||
"h2h_away_goals_avg": away_goals_total / n,
|
||||
"h2h_venue_advantage": venue_home_wins / venue_total if venue_total > 0 else 0.5,
|
||||
}
|
||||
|
||||
# Recent trend: last 3 vs overall
|
||||
if len(h2h) >= 4:
|
||||
last3_pts = sum(
|
||||
1.0 if m[2] > m[3] else (0.5 if m[2] == m[3] else 0.0)
|
||||
for m in h2h[-3:]
|
||||
) / 3
|
||||
overall_pts = sum(
|
||||
1.0 if m[2] > m[3] else (0.5 if m[2] == m[3] else 0.0)
|
||||
for m in h2h
|
||||
) / len(h2h)
|
||||
result["h2h_recent_trend"] = round(last3_pts - overall_pts, 4)
|
||||
else:
|
||||
result["h2h_recent_trend"] = 0.0
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def calc_strength_diff(
|
||||
home_form: Dict[str, float],
|
||||
away_form: Dict[str, float],
|
||||
home_elo: Dict[str, float],
|
||||
away_elo: Dict[str, float],
|
||||
home_momentum: float,
|
||||
away_momentum: float,
|
||||
upset_potential: float,
|
||||
) -> Dict[str, float]:
|
||||
"""Calculate strength differential features."""
|
||||
# Attack vs Defense mismatches
|
||||
h_attack = home_form.get("goals_avg", 1.3)
|
||||
a_defense = away_form.get("conceded_avg", 1.2)
|
||||
a_attack = away_form.get("goals_avg", 1.3)
|
||||
h_defense = home_form.get("conceded_avg", 1.2)
|
||||
|
||||
atk_def_home = h_attack - a_defense # positive = home attack > away defense
|
||||
atk_def_away = a_attack - h_defense
|
||||
|
||||
# XG diff approximation
|
||||
xg_diff = (h_attack + a_defense) / 2 - (a_attack + h_defense) / 2
|
||||
|
||||
# Form × Momentum interaction
|
||||
form_mom = (home_momentum - away_momentum) * (
|
||||
home_form.get("scoring_rate", 0.75) - away_form.get("scoring_rate", 0.75)
|
||||
)
|
||||
|
||||
# ELO-Form consistency
|
||||
elo_diff = home_elo.get("overall", 1500) - away_elo.get("overall", 1500)
|
||||
form_diff = h_attack - a_attack
|
||||
elo_form_consistency = 1.0 if (elo_diff > 0 and form_diff > 0) or (elo_diff < 0 and form_diff < 0) else 0.0
|
||||
|
||||
# Upset × ELO gap
|
||||
elo_gap = abs(elo_diff)
|
||||
upset_x_elo = upset_potential * (elo_gap / 400.0)
|
||||
|
||||
return {
|
||||
"attack_vs_defense_home": round(atk_def_home, 4),
|
||||
"attack_vs_defense_away": round(atk_def_away, 4),
|
||||
"xg_diff": round(xg_diff, 4),
|
||||
"form_momentum_interaction": round(form_mom, 4),
|
||||
"elo_form_consistency": elo_form_consistency,
|
||||
"upset_x_elo_gap": round(upset_x_elo, 4),
|
||||
}
|
||||
Reference in New Issue
Block a user