first (part 2: other directories)

2026-04-16 15:11:25 +03:00
parent 7814e0bc6b
commit 2f0b85a0c7
203 changed files with 59989 additions and 0 deletions
@@ -0,0 +1,167 @@
+"""
+Shared VQWEN feature contract
+=============================
+
+One place defines how VQWEN features are produced.
+Both training and runtime inference must use this module so the model sees
+the same feature semantics in historical data and live analysis.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+import numpy as np
+
+FEATURE_COLUMNS = [
+    "elo_diff",
+    "h_xg",
+    "a_xg",
+    "total_xg",
+    "pow_diff",
+    "rest_diff",
+    "h_fat",
+    "a_fat",
+    "imp_h",
+    "imp_d",
+    "imp_a",
+    "h_xi",
+    "a_xi",
+    "h2h_h_wr",
+    "form_diff",
+]
+
+
+@dataclass(slots=True)
+class VqwenFeatureInput:
+    home_elo: float
+    away_elo: float
+    home_avg_goals_scored: float
+    away_avg_goals_scored: float
+    home_avg_goals_conceded: float
+    away_avg_goals_conceded: float
+    home_avg_shots_on_target: float
+    away_avg_shots_on_target: float
+    home_avg_possession: float
+    away_avg_possession: float
+    home_rest_days: float
+    away_rest_days: float
+    implied_prob_home: float
+    implied_prob_draw: float
+    implied_prob_away: float
+    home_lineup_availability: float = 1.0
+    away_lineup_availability: float = 1.0
+    h2h_home_win_rate: float = 0.5
+    home_form_score: float = 0.0
+    away_form_score: float = 0.0
+    league_avg_goals: float = 2.6
+    referee_avg_goals: float = 2.6
+    referee_home_bias: float = 0.0
+    home_squad_strength: float = 0.5
+    away_squad_strength: float = 0.5
+    home_key_players: float = 0.0
+    away_key_players: float = 0.0
+    missing_players_impact: float = 0.0
+
+
+def fatigue_multiplier(rest_days: float) -> float:
+    if rest_days < 3.0:
+        return 0.85
+    if rest_days < 5.0:
+        return 0.95
+    return 1.0
+
+
+def clamp(value: float, lower: float, upper: float) -> float:
+    return min(max(float(value), lower), upper)
+
+
+def build_vqwen_feature_row(values: VqwenFeatureInput) -> dict[str, float]:
+    home_fatigue = fatigue_multiplier(values.home_rest_days)
+    away_fatigue = fatigue_multiplier(values.away_rest_days)
+    goal_environment = (
+        float(values.league_avg_goals) + float(values.referee_avg_goals)
+    ) / 2.0
+    goal_environment_multiplier = clamp(goal_environment / 2.6, 0.85, 1.2)
+    squad_diff = float(values.home_squad_strength) - float(values.away_squad_strength)
+    key_player_diff = float(values.home_key_players) - float(values.away_key_players)
+    missing_penalty = clamp(float(values.missing_players_impact), 0.0, 1.0)
+    referee_bias = clamp(float(values.referee_home_bias), -0.25, 0.25)
+    home_squad_multiplier = clamp(
+        1.0 + squad_diff * 0.08 + key_player_diff * 0.025 - missing_penalty * 0.08 + referee_bias * 0.03,
+        0.82,
+        1.18,
+    )
+    away_squad_multiplier = clamp(
+        1.0 - squad_diff * 0.08 - key_player_diff * 0.025 - missing_penalty * 0.08 - referee_bias * 0.03,
+        0.82,
+        1.18,
+    )
+
+    home_xg = max(
+        0.05,
+        (
+            float(values.home_avg_goals_scored)
+            + float(values.away_avg_goals_conceded)
+        )
+        / 2.0,
+    ) * home_fatigue * goal_environment_multiplier * home_squad_multiplier
+    away_xg = max(
+        0.05,
+        (
+            float(values.away_avg_goals_scored)
+            + float(values.home_avg_goals_conceded)
+        )
+        / 2.0,
+    ) * away_fatigue * goal_environment_multiplier * away_squad_multiplier
+
+    home_power = (
+        float(values.home_avg_goals_scored) * 5.0
+        - float(values.home_avg_goals_conceded) * 5.0
+        + float(values.home_avg_shots_on_target) * 2.0
+        + float(values.home_avg_possession) * 0.1
+        + float(values.home_squad_strength) * 3.0
+        + float(values.home_key_players) * 0.8
+        + referee_bias * 6.0
+    )
+    away_power = (
+        float(values.away_avg_goals_scored) * 5.0
+        - float(values.away_avg_goals_conceded) * 5.0
+        + float(values.away_avg_shots_on_target) * 2.0
+        + float(values.away_avg_possession) * 0.1
+        + float(values.away_squad_strength) * 3.0
+        + float(values.away_key_players) * 0.8
+        - referee_bias * 6.0
+    )
+
+    return {
+        "elo_diff": float(values.home_elo) - float(values.away_elo),
+        "h_xg": home_xg,
+        "a_xg": away_xg,
+        "total_xg": home_xg + away_xg,
+        "pow_diff": home_power - away_power,
+        "rest_diff": float(values.home_rest_days) - float(values.away_rest_days),
+        "h_fat": home_fatigue,
+        "a_fat": away_fatigue,
+        "imp_h": clamp(values.implied_prob_home, 0.01, 0.98),
+        "imp_d": clamp(values.implied_prob_draw, 0.01, 0.98),
+        "imp_a": clamp(values.implied_prob_away, 0.01, 0.98),
+        # Column names are preserved for artifact compatibility.
+        # Semantics are now "pre-match lineup availability" instead of leaked
+        # post-match starting-XI counts.
+        "h_xi": clamp(values.home_lineup_availability, 0.0, 1.0),
+        "a_xi": clamp(values.away_lineup_availability, 0.0, 1.0),
+        "h2h_h_wr": clamp(values.h2h_home_win_rate, 0.0, 1.0),
+        "form_diff": (
+            float(values.home_form_score)
+            - float(values.away_form_score)
+            + squad_diff * 1.5
+            + key_player_diff * 0.35
+            + referee_bias * 2.0
+            - missing_penalty * 1.75
+        ),
+    }
+
+
+def row_to_array(row: dict[str, float]) -> np.ndarray:
+    return np.array([[float(row[column]) for column in FEATURE_COLUMNS]], dtype=np.float64)