This commit is contained in:
2026-04-22 02:17:02 +03:00
parent 2ccd6831eb
commit df428ed1e8
19 changed files with 6436 additions and 9 deletions
+243
View File
@@ -0,0 +1,243 @@
"""
V27 Rolling Window Feature Calculator
======================================
Computes rolling averages over 5/10/20 match windows,
with home/away splits and trend detection.
"""
from __future__ import annotations
from typing import Dict, List, Tuple
import math
def calc_rolling_features(
team_matches: List[Tuple], # [(mst, is_home, team_goals, opp_goals, opp_id), ...]
before_date: int,
team_is_home: bool,
) -> Dict[str, float]:
"""Calculate rolling window features for a team before a given date."""
valid = [m for m in team_matches if m[0] < before_date]
defaults = {
"rolling5_goals_avg": 1.3, "rolling5_conceded_avg": 1.2,
"rolling10_goals_avg": 1.3, "rolling10_conceded_avg": 1.2,
"rolling20_goals_avg": 1.3, "rolling20_conceded_avg": 1.2,
"rolling5_clean_sheets": 0.25,
"venue_goals_avg": 1.3, "venue_conceded_avg": 1.2,
"goal_trend": 0.0,
}
if len(valid) < 3:
return defaults
result = {}
for window in [5, 10, 20]:
recent = valid[-window:] if len(valid) >= window else valid
n = len(recent)
g_sum = sum(m[2] for m in recent)
c_sum = sum(m[3] for m in recent)
result[f"rolling{window}_goals_avg"] = g_sum / n
result[f"rolling{window}_conceded_avg"] = c_sum / n
# Clean sheet rate (last 5)
r5 = valid[-5:] if len(valid) >= 5 else valid
result["rolling5_clean_sheets"] = sum(1 for m in r5 if m[3] == 0) / len(r5)
# Venue-specific (home-only or away-only)
venue_matches = [m for m in valid if m[1] == team_is_home]
if venue_matches:
vm = venue_matches[-10:] if len(venue_matches) >= 10 else venue_matches
result["venue_goals_avg"] = sum(m[2] for m in vm) / len(vm)
result["venue_conceded_avg"] = sum(m[3] for m in vm) / len(vm)
else:
result["venue_goals_avg"] = defaults["venue_goals_avg"]
result["venue_conceded_avg"] = defaults["venue_conceded_avg"]
# Goal trend: compare last 3 vs previous 3
if len(valid) >= 6:
last3 = sum(m[2] for m in valid[-3:]) / 3
prev3 = sum(m[2] for m in valid[-6:-3]) / 3
result["goal_trend"] = last3 - prev3
else:
result["goal_trend"] = 0.0
return result
def calc_league_quality(
all_matches: List[Tuple], # all FT matches in this league
) -> Dict[str, float]:
"""Calculate league-level quality features."""
defaults = {
"league_home_win_rate": 0.45,
"league_draw_rate": 0.25,
"league_btts_rate": 0.50,
"league_ou25_rate": 0.50,
"league_reliability_score": 0.50,
}
if len(all_matches) < 20:
return defaults
n = len(all_matches)
home_wins = sum(1 for m in all_matches if m[2] > m[3])
draws = sum(1 for m in all_matches if m[2] == m[3])
btts = sum(1 for m in all_matches if m[2] > 0 and m[3] > 0)
ou25 = sum(1 for m in all_matches if (m[2] + m[3]) > 2.5)
hw_rate = home_wins / n
dr_rate = draws / n
btts_rate = btts / n
ou25_rate = ou25 / n
# Reliability: leagues closer to averages are more predictable
predictability = 1.0 - abs(hw_rate - 0.45) - abs(dr_rate - 0.27) * 0.5
reliability = max(0.2, min(0.95, predictability))
return {
"league_home_win_rate": round(hw_rate, 4),
"league_draw_rate": round(dr_rate, 4),
"league_btts_rate": round(btts_rate, 4),
"league_ou25_rate": round(ou25_rate, 4),
"league_reliability_score": round(reliability, 4),
}
def calc_time_features(
team_matches: List[Tuple],
match_mst: int,
) -> Dict[str, float]:
"""Calculate time-based features."""
from datetime import datetime
# Days since last match
valid = [m for m in team_matches if m[0] < match_mst]
if valid:
last_mst = valid[-1][0]
days_rest = (match_mst - last_mst) / 86_400_000 # ms to days
days_rest = min(days_rest, 60.0) # cap at 60 days
else:
days_rest = 14.0
# Month and season flags
try:
dt = datetime.utcfromtimestamp(match_mst / 1000)
month = dt.month
is_season_start = 1.0 if month in (7, 8) else 0.0
is_season_end = 1.0 if month in (5, 6) else 0.0
except Exception:
month = 6
is_season_start = 0.0
is_season_end = 0.0
return {
"days_rest": round(days_rest, 2),
"match_month": month,
"is_season_start": is_season_start,
"is_season_end": is_season_end,
}
def calc_advanced_h2h(
team_matches: List[Tuple],
home_id: int,
away_id: int,
before_date: int,
) -> Dict[str, float]:
"""Calculate advanced H2H features."""
defaults = {
"h2h_home_goals_avg": 1.3,
"h2h_away_goals_avg": 1.1,
"h2h_recent_trend": 0.0,
"h2h_venue_advantage": 0.0,
}
h2h = [m for m in team_matches if m[4] == away_id and m[0] < before_date]
if not h2h:
return defaults
recent = h2h[-10:]
home_goals_total = 0
away_goals_total = 0
venue_home_wins = 0
venue_total = 0
for mst, is_home, team_goals, opp_goals, _ in recent:
if is_home:
home_goals_total += team_goals
away_goals_total += opp_goals
venue_total += 1
if team_goals > opp_goals:
venue_home_wins += 1
else:
home_goals_total += opp_goals
away_goals_total += team_goals
n = len(recent)
result = {
"h2h_home_goals_avg": home_goals_total / n,
"h2h_away_goals_avg": away_goals_total / n,
"h2h_venue_advantage": venue_home_wins / venue_total if venue_total > 0 else 0.5,
}
# Recent trend: last 3 vs overall
if len(h2h) >= 4:
last3_pts = sum(
1.0 if m[2] > m[3] else (0.5 if m[2] == m[3] else 0.0)
for m in h2h[-3:]
) / 3
overall_pts = sum(
1.0 if m[2] > m[3] else (0.5 if m[2] == m[3] else 0.0)
for m in h2h
) / len(h2h)
result["h2h_recent_trend"] = round(last3_pts - overall_pts, 4)
else:
result["h2h_recent_trend"] = 0.0
return result
def calc_strength_diff(
home_form: Dict[str, float],
away_form: Dict[str, float],
home_elo: Dict[str, float],
away_elo: Dict[str, float],
home_momentum: float,
away_momentum: float,
upset_potential: float,
) -> Dict[str, float]:
"""Calculate strength differential features."""
# Attack vs Defense mismatches
h_attack = home_form.get("goals_avg", 1.3)
a_defense = away_form.get("conceded_avg", 1.2)
a_attack = away_form.get("goals_avg", 1.3)
h_defense = home_form.get("conceded_avg", 1.2)
atk_def_home = h_attack - a_defense # positive = home attack > away defense
atk_def_away = a_attack - h_defense
# XG diff approximation
xg_diff = (h_attack + a_defense) / 2 - (a_attack + h_defense) / 2
# Form × Momentum interaction
form_mom = (home_momentum - away_momentum) * (
home_form.get("scoring_rate", 0.75) - away_form.get("scoring_rate", 0.75)
)
# ELO-Form consistency
elo_diff = home_elo.get("overall", 1500) - away_elo.get("overall", 1500)
form_diff = h_attack - a_attack
elo_form_consistency = 1.0 if (elo_diff > 0 and form_diff > 0) or (elo_diff < 0 and form_diff < 0) else 0.0
# Upset × ELO gap
elo_gap = abs(elo_diff)
upset_x_elo = upset_potential * (elo_gap / 400.0)
return {
"attack_vs_defense_home": round(atk_def_home, 4),
"attack_vs_defense_away": round(atk_def_away, 4),
"xg_diff": round(xg_diff, 4),
"form_momentum_interaction": round(form_mom, 4),
"elo_form_consistency": elo_form_consistency,
"upset_x_elo_gap": round(upset_x_elo, 4),
}