This commit is contained in:
@@ -0,0 +1,167 @@
|
||||
"""
|
||||
Shared VQWEN feature contract
|
||||
=============================
|
||||
|
||||
One place defines how VQWEN features are produced.
|
||||
Both training and runtime inference must use this module so the model sees
|
||||
the same feature semantics in historical data and live analysis.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
import numpy as np
|
||||
|
||||
FEATURE_COLUMNS = [
|
||||
"elo_diff",
|
||||
"h_xg",
|
||||
"a_xg",
|
||||
"total_xg",
|
||||
"pow_diff",
|
||||
"rest_diff",
|
||||
"h_fat",
|
||||
"a_fat",
|
||||
"imp_h",
|
||||
"imp_d",
|
||||
"imp_a",
|
||||
"h_xi",
|
||||
"a_xi",
|
||||
"h2h_h_wr",
|
||||
"form_diff",
|
||||
]
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class VqwenFeatureInput:
|
||||
home_elo: float
|
||||
away_elo: float
|
||||
home_avg_goals_scored: float
|
||||
away_avg_goals_scored: float
|
||||
home_avg_goals_conceded: float
|
||||
away_avg_goals_conceded: float
|
||||
home_avg_shots_on_target: float
|
||||
away_avg_shots_on_target: float
|
||||
home_avg_possession: float
|
||||
away_avg_possession: float
|
||||
home_rest_days: float
|
||||
away_rest_days: float
|
||||
implied_prob_home: float
|
||||
implied_prob_draw: float
|
||||
implied_prob_away: float
|
||||
home_lineup_availability: float = 1.0
|
||||
away_lineup_availability: float = 1.0
|
||||
h2h_home_win_rate: float = 0.5
|
||||
home_form_score: float = 0.0
|
||||
away_form_score: float = 0.0
|
||||
league_avg_goals: float = 2.6
|
||||
referee_avg_goals: float = 2.6
|
||||
referee_home_bias: float = 0.0
|
||||
home_squad_strength: float = 0.5
|
||||
away_squad_strength: float = 0.5
|
||||
home_key_players: float = 0.0
|
||||
away_key_players: float = 0.0
|
||||
missing_players_impact: float = 0.0
|
||||
|
||||
|
||||
def fatigue_multiplier(rest_days: float) -> float:
|
||||
if rest_days < 3.0:
|
||||
return 0.85
|
||||
if rest_days < 5.0:
|
||||
return 0.95
|
||||
return 1.0
|
||||
|
||||
|
||||
def clamp(value: float, lower: float, upper: float) -> float:
|
||||
return min(max(float(value), lower), upper)
|
||||
|
||||
|
||||
def build_vqwen_feature_row(values: VqwenFeatureInput) -> dict[str, float]:
|
||||
home_fatigue = fatigue_multiplier(values.home_rest_days)
|
||||
away_fatigue = fatigue_multiplier(values.away_rest_days)
|
||||
goal_environment = (
|
||||
float(values.league_avg_goals) + float(values.referee_avg_goals)
|
||||
) / 2.0
|
||||
goal_environment_multiplier = clamp(goal_environment / 2.6, 0.85, 1.2)
|
||||
squad_diff = float(values.home_squad_strength) - float(values.away_squad_strength)
|
||||
key_player_diff = float(values.home_key_players) - float(values.away_key_players)
|
||||
missing_penalty = clamp(float(values.missing_players_impact), 0.0, 1.0)
|
||||
referee_bias = clamp(float(values.referee_home_bias), -0.25, 0.25)
|
||||
home_squad_multiplier = clamp(
|
||||
1.0 + squad_diff * 0.08 + key_player_diff * 0.025 - missing_penalty * 0.08 + referee_bias * 0.03,
|
||||
0.82,
|
||||
1.18,
|
||||
)
|
||||
away_squad_multiplier = clamp(
|
||||
1.0 - squad_diff * 0.08 - key_player_diff * 0.025 - missing_penalty * 0.08 - referee_bias * 0.03,
|
||||
0.82,
|
||||
1.18,
|
||||
)
|
||||
|
||||
home_xg = max(
|
||||
0.05,
|
||||
(
|
||||
float(values.home_avg_goals_scored)
|
||||
+ float(values.away_avg_goals_conceded)
|
||||
)
|
||||
/ 2.0,
|
||||
) * home_fatigue * goal_environment_multiplier * home_squad_multiplier
|
||||
away_xg = max(
|
||||
0.05,
|
||||
(
|
||||
float(values.away_avg_goals_scored)
|
||||
+ float(values.home_avg_goals_conceded)
|
||||
)
|
||||
/ 2.0,
|
||||
) * away_fatigue * goal_environment_multiplier * away_squad_multiplier
|
||||
|
||||
home_power = (
|
||||
float(values.home_avg_goals_scored) * 5.0
|
||||
- float(values.home_avg_goals_conceded) * 5.0
|
||||
+ float(values.home_avg_shots_on_target) * 2.0
|
||||
+ float(values.home_avg_possession) * 0.1
|
||||
+ float(values.home_squad_strength) * 3.0
|
||||
+ float(values.home_key_players) * 0.8
|
||||
+ referee_bias * 6.0
|
||||
)
|
||||
away_power = (
|
||||
float(values.away_avg_goals_scored) * 5.0
|
||||
- float(values.away_avg_goals_conceded) * 5.0
|
||||
+ float(values.away_avg_shots_on_target) * 2.0
|
||||
+ float(values.away_avg_possession) * 0.1
|
||||
+ float(values.away_squad_strength) * 3.0
|
||||
+ float(values.away_key_players) * 0.8
|
||||
- referee_bias * 6.0
|
||||
)
|
||||
|
||||
return {
|
||||
"elo_diff": float(values.home_elo) - float(values.away_elo),
|
||||
"h_xg": home_xg,
|
||||
"a_xg": away_xg,
|
||||
"total_xg": home_xg + away_xg,
|
||||
"pow_diff": home_power - away_power,
|
||||
"rest_diff": float(values.home_rest_days) - float(values.away_rest_days),
|
||||
"h_fat": home_fatigue,
|
||||
"a_fat": away_fatigue,
|
||||
"imp_h": clamp(values.implied_prob_home, 0.01, 0.98),
|
||||
"imp_d": clamp(values.implied_prob_draw, 0.01, 0.98),
|
||||
"imp_a": clamp(values.implied_prob_away, 0.01, 0.98),
|
||||
# Column names are preserved for artifact compatibility.
|
||||
# Semantics are now "pre-match lineup availability" instead of leaked
|
||||
# post-match starting-XI counts.
|
||||
"h_xi": clamp(values.home_lineup_availability, 0.0, 1.0),
|
||||
"a_xi": clamp(values.away_lineup_availability, 0.0, 1.0),
|
||||
"h2h_h_wr": clamp(values.h2h_home_win_rate, 0.0, 1.0),
|
||||
"form_diff": (
|
||||
float(values.home_form_score)
|
||||
- float(values.away_form_score)
|
||||
+ squad_diff * 1.5
|
||||
+ key_player_diff * 0.35
|
||||
+ referee_bias * 2.0
|
||||
- missing_penalty * 1.75
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def row_to_array(row: dict[str, float]) -> np.ndarray:
|
||||
return np.array([[float(row[column]) for column in FEATURE_COLUMNS]], dtype=np.float64)
|
||||
Reference in New Issue
Block a user