iddaai-be/ai-engine/features/rolling_features.py

"""
V27 Rolling Window Feature Calculator
======================================
Computes rolling averages over 5/10/20 match windows,
with home/away splits and trend detection.
"""
from __future__ import annotations
from typing import Dict, List, Tuple
import math


def calc_rolling_features(
    team_matches: List[Tuple],  # [(mst, is_home, team_goals, opp_goals, opp_id), ...]
    before_date: int,
    team_is_home: bool,
) -> Dict[str, float]:
    """Calculate rolling window features for a team before a given date."""
    valid = [m for m in team_matches if m[0] < before_date]

    defaults = {
        "rolling5_goals_avg": 1.3, "rolling5_conceded_avg": 1.2,
        "rolling10_goals_avg": 1.3, "rolling10_conceded_avg": 1.2,
        "rolling20_goals_avg": 1.3, "rolling20_conceded_avg": 1.2,
        "rolling5_clean_sheets": 0.25,
        "venue_goals_avg": 1.3, "venue_conceded_avg": 1.2,
        "goal_trend": 0.0,
    }

    if len(valid) < 3:
        return defaults

    result = {}

    for window in [5, 10, 20]:
        recent = valid[-window:] if len(valid) >= window else valid
        n = len(recent)
        g_sum = sum(m[2] for m in recent)
        c_sum = sum(m[3] for m in recent)
        result[f"rolling{window}_goals_avg"] = g_sum / n
        result[f"rolling{window}_conceded_avg"] = c_sum / n

    # Clean sheet rate (last 5)
    r5 = valid[-5:] if len(valid) >= 5 else valid
    result["rolling5_clean_sheets"] = sum(1 for m in r5 if m[3] == 0) / len(r5)

    # Venue-specific (home-only or away-only)
    venue_matches = [m for m in valid if m[1] == team_is_home]
    if venue_matches:
        vm = venue_matches[-10:] if len(venue_matches) >= 10 else venue_matches
        result["venue_goals_avg"] = sum(m[2] for m in vm) / len(vm)
        result["venue_conceded_avg"] = sum(m[3] for m in vm) / len(vm)
    else:
        result["venue_goals_avg"] = defaults["venue_goals_avg"]
        result["venue_conceded_avg"] = defaults["venue_conceded_avg"]

    # Goal trend: compare last 3 vs previous 3
    if len(valid) >= 6:
        last3 = sum(m[2] for m in valid[-3:]) / 3
        prev3 = sum(m[2] for m in valid[-6:-3]) / 3
        result["goal_trend"] = last3 - prev3
    else:
        result["goal_trend"] = 0.0

    return result


def calc_league_quality(
    all_matches: List[Tuple],  # all FT matches in this league
) -> Dict[str, float]:
    """Calculate league-level quality features."""
    defaults = {
        "league_home_win_rate": 0.45,
        "league_draw_rate": 0.25,
        "league_btts_rate": 0.50,
        "league_ou25_rate": 0.50,
        "league_reliability_score": 0.50,
    }

    if len(all_matches) < 20:
        return defaults

    n = len(all_matches)
    home_wins = sum(1 for m in all_matches if m[2] > m[3])
    draws = sum(1 for m in all_matches if m[2] == m[3])
    btts = sum(1 for m in all_matches if m[2] > 0 and m[3] > 0)
    ou25 = sum(1 for m in all_matches if (m[2] + m[3]) > 2.5)

    hw_rate = home_wins / n
    dr_rate = draws / n
    btts_rate = btts / n
    ou25_rate = ou25 / n

    # Reliability: leagues closer to averages are more predictable
    predictability = 1.0 - abs(hw_rate - 0.45) - abs(dr_rate - 0.27) * 0.5
    reliability = max(0.2, min(0.95, predictability))

    return {
        "league_home_win_rate": round(hw_rate, 4),
        "league_draw_rate": round(dr_rate, 4),
        "league_btts_rate": round(btts_rate, 4),
        "league_ou25_rate": round(ou25_rate, 4),
        "league_reliability_score": round(reliability, 4),
    }


def calc_time_features(
    team_matches: List[Tuple],
    match_mst: int,
) -> Dict[str, float]:
    """Calculate time-based features."""
    from datetime import datetime

    # Days since last match
    valid = [m for m in team_matches if m[0] < match_mst]
    if valid:
        last_mst = valid[-1][0]
        days_rest = (match_mst - last_mst) / 86_400_000  # ms to days
        days_rest = min(days_rest, 60.0)  # cap at 60 days
    else:
        days_rest = 14.0

    # Month and season flags
    try:
        dt = datetime.utcfromtimestamp(match_mst / 1000)
        month = dt.month
        is_season_start = 1.0 if month in (7, 8) else 0.0
        is_season_end = 1.0 if month in (5, 6) else 0.0
    except Exception:
        month = 6
        is_season_start = 0.0
        is_season_end = 0.0

    return {
        "days_rest": round(days_rest, 2),
        "match_month": month,
        "is_season_start": is_season_start,
        "is_season_end": is_season_end,
    }


def calc_advanced_h2h(
    team_matches: List[Tuple],
    home_id: int,
    away_id: int,
    before_date: int,
) -> Dict[str, float]:
    """Calculate advanced H2H features."""
    defaults = {
        "h2h_home_goals_avg": 1.3,
        "h2h_away_goals_avg": 1.1,
        "h2h_recent_trend": 0.0,
        "h2h_venue_advantage": 0.0,
    }

    h2h = [m for m in team_matches if m[4] == away_id and m[0] < before_date]
    if not h2h:
        return defaults

    recent = h2h[-10:]
    home_goals_total = 0
    away_goals_total = 0
    venue_home_wins = 0
    venue_total = 0

    for mst, is_home, team_goals, opp_goals, _ in recent:
        if is_home:
            home_goals_total += team_goals
            away_goals_total += opp_goals
            venue_total += 1
            if team_goals > opp_goals:
                venue_home_wins += 1
        else:
            home_goals_total += opp_goals
            away_goals_total += team_goals

    n = len(recent)
    result = {
        "h2h_home_goals_avg": home_goals_total / n,
        "h2h_away_goals_avg": away_goals_total / n,
        "h2h_venue_advantage": venue_home_wins / venue_total if venue_total > 0 else 0.5,
    }

    # Recent trend: last 3 vs overall
    if len(h2h) >= 4:
        last3_pts = sum(
            1.0 if m[2] > m[3] else (0.5 if m[2] == m[3] else 0.0)
            for m in h2h[-3:]
        ) / 3
        overall_pts = sum(
            1.0 if m[2] > m[3] else (0.5 if m[2] == m[3] else 0.0)
            for m in h2h
        ) / len(h2h)
        result["h2h_recent_trend"] = round(last3_pts - overall_pts, 4)
    else:
        result["h2h_recent_trend"] = 0.0

    return result


def calc_strength_diff(
    home_form: Dict[str, float],
    away_form: Dict[str, float],
    home_elo: Dict[str, float],
    away_elo: Dict[str, float],
    home_momentum: float,
    away_momentum: float,
    upset_potential: float,
) -> Dict[str, float]:
    """Calculate strength differential features."""
    # Attack vs Defense mismatches
    h_attack = home_form.get("goals_avg", 1.3)
    a_defense = away_form.get("conceded_avg", 1.2)
    a_attack = away_form.get("goals_avg", 1.3)
    h_defense = home_form.get("conceded_avg", 1.2)

    atk_def_home = h_attack - a_defense  # positive = home attack > away defense
    atk_def_away = a_attack - h_defense

    # XG diff approximation
    xg_diff = (h_attack + a_defense) / 2 - (a_attack + h_defense) / 2

    # Form × Momentum interaction
    form_mom = (home_momentum - away_momentum) * (
        home_form.get("scoring_rate", 0.75) - away_form.get("scoring_rate", 0.75)
    )

    # ELO-Form consistency
    elo_diff = home_elo.get("overall", 1500) - away_elo.get("overall", 1500)
    form_diff = h_attack - a_attack
    elo_form_consistency = 1.0 if (elo_diff > 0 and form_diff > 0) or (elo_diff < 0 and form_diff < 0) else 0.0

    # Upset × ELO gap
    elo_gap = abs(elo_diff)
    upset_x_elo = upset_potential * (elo_gap / 400.0)

    return {
        "attack_vs_defense_home": round(atk_def_home, 4),
        "attack_vs_defense_away": round(atk_def_away, 4),
        "xg_diff": round(xg_diff, 4),
        "form_momentum_interaction": round(form_mom, 4),
        "elo_form_consistency": elo_form_consistency,
        "upset_x_elo_gap": round(upset_x_elo, 4),
    }