Files
iddaai-be/ai-engine/features/rolling_features.py
T
2026-04-22 02:17:02 +03:00

244 lines
7.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
V27 Rolling Window Feature Calculator
======================================
Computes rolling averages over 5/10/20 match windows,
with home/away splits and trend detection.
"""
from __future__ import annotations
from typing import Dict, List, Tuple
import math
def calc_rolling_features(
team_matches: List[Tuple], # [(mst, is_home, team_goals, opp_goals, opp_id), ...]
before_date: int,
team_is_home: bool,
) -> Dict[str, float]:
"""Calculate rolling window features for a team before a given date."""
valid = [m for m in team_matches if m[0] < before_date]
defaults = {
"rolling5_goals_avg": 1.3, "rolling5_conceded_avg": 1.2,
"rolling10_goals_avg": 1.3, "rolling10_conceded_avg": 1.2,
"rolling20_goals_avg": 1.3, "rolling20_conceded_avg": 1.2,
"rolling5_clean_sheets": 0.25,
"venue_goals_avg": 1.3, "venue_conceded_avg": 1.2,
"goal_trend": 0.0,
}
if len(valid) < 3:
return defaults
result = {}
for window in [5, 10, 20]:
recent = valid[-window:] if len(valid) >= window else valid
n = len(recent)
g_sum = sum(m[2] for m in recent)
c_sum = sum(m[3] for m in recent)
result[f"rolling{window}_goals_avg"] = g_sum / n
result[f"rolling{window}_conceded_avg"] = c_sum / n
# Clean sheet rate (last 5)
r5 = valid[-5:] if len(valid) >= 5 else valid
result["rolling5_clean_sheets"] = sum(1 for m in r5 if m[3] == 0) / len(r5)
# Venue-specific (home-only or away-only)
venue_matches = [m for m in valid if m[1] == team_is_home]
if venue_matches:
vm = venue_matches[-10:] if len(venue_matches) >= 10 else venue_matches
result["venue_goals_avg"] = sum(m[2] for m in vm) / len(vm)
result["venue_conceded_avg"] = sum(m[3] for m in vm) / len(vm)
else:
result["venue_goals_avg"] = defaults["venue_goals_avg"]
result["venue_conceded_avg"] = defaults["venue_conceded_avg"]
# Goal trend: compare last 3 vs previous 3
if len(valid) >= 6:
last3 = sum(m[2] for m in valid[-3:]) / 3
prev3 = sum(m[2] for m in valid[-6:-3]) / 3
result["goal_trend"] = last3 - prev3
else:
result["goal_trend"] = 0.0
return result
def calc_league_quality(
all_matches: List[Tuple], # all FT matches in this league
) -> Dict[str, float]:
"""Calculate league-level quality features."""
defaults = {
"league_home_win_rate": 0.45,
"league_draw_rate": 0.25,
"league_btts_rate": 0.50,
"league_ou25_rate": 0.50,
"league_reliability_score": 0.50,
}
if len(all_matches) < 20:
return defaults
n = len(all_matches)
home_wins = sum(1 for m in all_matches if m[2] > m[3])
draws = sum(1 for m in all_matches if m[2] == m[3])
btts = sum(1 for m in all_matches if m[2] > 0 and m[3] > 0)
ou25 = sum(1 for m in all_matches if (m[2] + m[3]) > 2.5)
hw_rate = home_wins / n
dr_rate = draws / n
btts_rate = btts / n
ou25_rate = ou25 / n
# Reliability: leagues closer to averages are more predictable
predictability = 1.0 - abs(hw_rate - 0.45) - abs(dr_rate - 0.27) * 0.5
reliability = max(0.2, min(0.95, predictability))
return {
"league_home_win_rate": round(hw_rate, 4),
"league_draw_rate": round(dr_rate, 4),
"league_btts_rate": round(btts_rate, 4),
"league_ou25_rate": round(ou25_rate, 4),
"league_reliability_score": round(reliability, 4),
}
def calc_time_features(
team_matches: List[Tuple],
match_mst: int,
) -> Dict[str, float]:
"""Calculate time-based features."""
from datetime import datetime
# Days since last match
valid = [m for m in team_matches if m[0] < match_mst]
if valid:
last_mst = valid[-1][0]
days_rest = (match_mst - last_mst) / 86_400_000 # ms to days
days_rest = min(days_rest, 60.0) # cap at 60 days
else:
days_rest = 14.0
# Month and season flags
try:
dt = datetime.utcfromtimestamp(match_mst / 1000)
month = dt.month
is_season_start = 1.0 if month in (7, 8) else 0.0
is_season_end = 1.0 if month in (5, 6) else 0.0
except Exception:
month = 6
is_season_start = 0.0
is_season_end = 0.0
return {
"days_rest": round(days_rest, 2),
"match_month": month,
"is_season_start": is_season_start,
"is_season_end": is_season_end,
}
def calc_advanced_h2h(
team_matches: List[Tuple],
home_id: int,
away_id: int,
before_date: int,
) -> Dict[str, float]:
"""Calculate advanced H2H features."""
defaults = {
"h2h_home_goals_avg": 1.3,
"h2h_away_goals_avg": 1.1,
"h2h_recent_trend": 0.0,
"h2h_venue_advantage": 0.0,
}
h2h = [m for m in team_matches if m[4] == away_id and m[0] < before_date]
if not h2h:
return defaults
recent = h2h[-10:]
home_goals_total = 0
away_goals_total = 0
venue_home_wins = 0
venue_total = 0
for mst, is_home, team_goals, opp_goals, _ in recent:
if is_home:
home_goals_total += team_goals
away_goals_total += opp_goals
venue_total += 1
if team_goals > opp_goals:
venue_home_wins += 1
else:
home_goals_total += opp_goals
away_goals_total += team_goals
n = len(recent)
result = {
"h2h_home_goals_avg": home_goals_total / n,
"h2h_away_goals_avg": away_goals_total / n,
"h2h_venue_advantage": venue_home_wins / venue_total if venue_total > 0 else 0.5,
}
# Recent trend: last 3 vs overall
if len(h2h) >= 4:
last3_pts = sum(
1.0 if m[2] > m[3] else (0.5 if m[2] == m[3] else 0.0)
for m in h2h[-3:]
) / 3
overall_pts = sum(
1.0 if m[2] > m[3] else (0.5 if m[2] == m[3] else 0.0)
for m in h2h
) / len(h2h)
result["h2h_recent_trend"] = round(last3_pts - overall_pts, 4)
else:
result["h2h_recent_trend"] = 0.0
return result
def calc_strength_diff(
home_form: Dict[str, float],
away_form: Dict[str, float],
home_elo: Dict[str, float],
away_elo: Dict[str, float],
home_momentum: float,
away_momentum: float,
upset_potential: float,
) -> Dict[str, float]:
"""Calculate strength differential features."""
# Attack vs Defense mismatches
h_attack = home_form.get("goals_avg", 1.3)
a_defense = away_form.get("conceded_avg", 1.2)
a_attack = away_form.get("goals_avg", 1.3)
h_defense = home_form.get("conceded_avg", 1.2)
atk_def_home = h_attack - a_defense # positive = home attack > away defense
atk_def_away = a_attack - h_defense
# XG diff approximation
xg_diff = (h_attack + a_defense) / 2 - (a_attack + h_defense) / 2
# Form × Momentum interaction
form_mom = (home_momentum - away_momentum) * (
home_form.get("scoring_rate", 0.75) - away_form.get("scoring_rate", 0.75)
)
# ELO-Form consistency
elo_diff = home_elo.get("overall", 1500) - away_elo.get("overall", 1500)
form_diff = h_attack - a_attack
elo_form_consistency = 1.0 if (elo_diff > 0 and form_diff > 0) or (elo_diff < 0 and form_diff < 0) else 0.0
# Upset × ELO gap
elo_gap = abs(elo_diff)
upset_x_elo = upset_potential * (elo_gap / 400.0)
return {
"attack_vs_defense_home": round(atk_def_home, 4),
"attack_vs_defense_away": round(atk_def_away, 4),
"xg_diff": round(xg_diff, 4),
"form_momentum_interaction": round(form_mom, 4),
"elo_form_consistency": elo_form_consistency,
"upset_x_elo_gap": round(upset_x_elo, 4),
}