287 lines
11 KiB
Python
Executable File
287 lines
11 KiB
Python
Executable File
"""
|
|
Team Predictor Engine - V20 Ensemble Component
|
|
Combines ELO ratings, form stats, H2H records and team statistics.
|
|
|
|
Weight: 30% in ensemble
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
from typing import Dict, Optional, Tuple, Any
|
|
from dataclasses import dataclass, field
|
|
|
|
# Add parent to path
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
|
|
|
from features.elo_system import get_elo_system
|
|
from features.h2h_engine import get_h2h_engine
|
|
from features.momentum_engine import get_momentum_engine, MomentumData
|
|
from features.team_stats_engine import get_team_stats_engine
|
|
|
|
|
|
@dataclass
|
|
class TeamPrediction:
|
|
"""Team engine prediction output."""
|
|
home_win_prob: float = 0.33
|
|
draw_prob: float = 0.33
|
|
away_win_prob: float = 0.33
|
|
home_xg: float = 1.3
|
|
away_xg: float = 1.1
|
|
form_advantage: float = 0.0 # -1 to +1, positive = home advantage
|
|
h2h_advantage: float = 0.0 # -1 to +1
|
|
elo_diff: float = 0.0
|
|
confidence: float = 0.0
|
|
|
|
def to_dict(self) -> dict:
|
|
return {
|
|
"home_win_prob": round(self.home_win_prob * 100, 1),
|
|
"draw_prob": round(self.draw_prob * 100, 1),
|
|
"away_win_prob": round(self.away_win_prob * 100, 1),
|
|
"home_xg": round(self.home_xg, 2),
|
|
"away_xg": round(self.away_xg, 2),
|
|
"form_advantage": round(self.form_advantage, 2),
|
|
"h2h_advantage": round(self.h2h_advantage, 2),
|
|
"elo_diff": round(self.elo_diff, 0),
|
|
"confidence": round(self.confidence, 1)
|
|
}
|
|
|
|
raw_features: Dict[str, Any] = field(default_factory=dict)
|
|
|
|
|
|
class TeamPredictorEngine:
|
|
"""
|
|
Team-based prediction engine.
|
|
|
|
Uses:
|
|
- ELO Rating System (venue-adjusted, league-weighted)
|
|
- H2H Engine (head-to-head history)
|
|
- Momentum Engine (recent form)
|
|
- Team Stats Engine (possession, shots, corners)
|
|
"""
|
|
|
|
def __init__(self):
|
|
self.elo_system = get_elo_system()
|
|
self.h2h_engine = get_h2h_engine()
|
|
self.momentum_engine = get_momentum_engine()
|
|
self.team_stats_engine = get_team_stats_engine()
|
|
|
|
print("✅ TeamPredictorEngine initialized")
|
|
|
|
def predict(self,
|
|
home_team_id: str,
|
|
away_team_id: str,
|
|
match_date_ms: int,
|
|
home_team_name: str = "",
|
|
away_team_name: str = "") -> TeamPrediction:
|
|
"""
|
|
Generate team-based prediction.
|
|
|
|
Args:
|
|
home_team_id: Home team ID
|
|
away_team_id: Away team ID
|
|
match_date_ms: Match date in milliseconds
|
|
home_team_name: Home team name (for ELO)
|
|
away_team_name: Away team name (for ELO)
|
|
|
|
Returns:
|
|
TeamPrediction with 1X2 probabilities and xG
|
|
"""
|
|
|
|
# 1. Get ELO predictions
|
|
elo_pred = self.elo_system.predict_match(home_team_id, away_team_id)
|
|
elo_features = self.elo_system.get_match_features(home_team_id, away_team_id)
|
|
|
|
# 2. Get H2H features
|
|
try:
|
|
h2h_features = self.h2h_engine.get_features(
|
|
home_team_id, away_team_id, match_date_ms
|
|
)
|
|
except Exception:
|
|
h2h_features = {
|
|
"h2h_home_win_rate": 0.5,
|
|
"h2h_away_win_rate": 0.5,
|
|
"h2h_avg_goals": 2.5,
|
|
"h2h_btts_rate": 0.5
|
|
}
|
|
|
|
# 3. Get Momentum/Form features
|
|
try:
|
|
# key: form_score should be 0-1 derived from momentum_score (-1 to 1)
|
|
home_mom_data = self.momentum_engine.calculate_momentum(home_team_id, match_date_ms)
|
|
away_mom_data = self.momentum_engine.calculate_momentum(away_team_id, match_date_ms)
|
|
|
|
home_form_score = (home_mom_data.momentum_score + 1) / 2
|
|
away_form_score = (away_mom_data.momentum_score + 1) / 2
|
|
except Exception as e:
|
|
print(f"⚠️ MomentumEngine error: {e}")
|
|
home_mom_data = MomentumData()
|
|
away_mom_data = MomentumData()
|
|
home_form_score = 0.5
|
|
away_form_score = 0.5
|
|
|
|
# 4. Get Team Stats
|
|
home_stats = self.team_stats_engine.get_features(home_team_id, match_date_ms)
|
|
away_stats = self.team_stats_engine.get_features(away_team_id, match_date_ms)
|
|
|
|
# 5. Combine predictions
|
|
# ELO-based 1X2 (60% weight)
|
|
elo_home = elo_pred.get("home_win_prob", 0.33)
|
|
elo_draw = elo_pred.get("draw_prob", 0.33)
|
|
elo_away = elo_pred.get("away_win_prob", 0.33)
|
|
|
|
# Adjust based on H2H (20% weight)
|
|
h2h_home_rate = h2h_features.get("h2h_home_win_rate", 0.5)
|
|
h2h_away_rate = h2h_features.get("h2h_away_win_rate", 0.5)
|
|
|
|
# Adjust based on form (20% weight)
|
|
home_form = home_form_score
|
|
away_form = away_form_score
|
|
form_diff = (home_form - away_form) # -1 to +1
|
|
|
|
# Weighted combination
|
|
final_home = elo_home * 0.6 + h2h_home_rate * 0.2 + (0.5 + form_diff * 0.3) * 0.2
|
|
final_away = elo_away * 0.6 + h2h_away_rate * 0.2 + (0.5 - form_diff * 0.3) * 0.2
|
|
final_draw = 1.0 - final_home - final_away
|
|
|
|
# Normalize
|
|
total = final_home + final_draw + final_away
|
|
if total > 0:
|
|
final_home /= total
|
|
final_draw /= total
|
|
final_away /= total
|
|
|
|
# Calculate xG based on stats and form (conservative base)
|
|
home_conversion = home_stats.get("shot_conversion_rate", 0.1)
|
|
away_conversion = away_stats.get("shot_conversion_rate", 0.1)
|
|
|
|
base_home_xg = 1.35 + (home_conversion * 3.0)
|
|
base_away_xg = 1.10 + (away_conversion * 2.5)
|
|
|
|
# Defense weakness factor: opponent's defensive quality affects xG
|
|
# Higher shots on target against = weaker defense
|
|
away_def_weakness = away_stats.get("shot_accuracy", 0.35) # opponent's shot accuracy as proxy
|
|
home_def_weakness = home_stats.get("shot_accuracy", 0.35)
|
|
|
|
# Adjust xG: stronger opponent defense → lower xG
|
|
home_xg = base_home_xg * (1 + form_diff * 0.15) * (0.8 + away_def_weakness * 0.6)
|
|
away_xg = base_away_xg * (1 - form_diff * 0.15) * (0.8 + home_def_weakness * 0.6)
|
|
|
|
# Apply xG Underperformance Penalty directly to calculated xG
|
|
# If a team chronically underperforms its xG, we subtract that historical difference here
|
|
if hasattr(home_mom_data, 'xg_underperformance') and home_mom_data.xg_underperformance > 0.2:
|
|
home_xg -= min(0.5, home_mom_data.xg_underperformance * 0.5)
|
|
|
|
if hasattr(away_mom_data, 'xg_underperformance') and away_mom_data.xg_underperformance > 0.2:
|
|
away_xg -= min(0.5, away_mom_data.xg_underperformance * 0.5)
|
|
|
|
# H2H adjustment (more conservative)
|
|
h2h_avg_goals = h2h_features.get("h2h_avg_goals", 2.5)
|
|
if h2h_avg_goals > 3.0:
|
|
home_xg *= 1.05
|
|
away_xg *= 1.05
|
|
elif h2h_avg_goals < 2.0:
|
|
home_xg *= 0.95
|
|
away_xg *= 0.95
|
|
|
|
# Clamp xG to reasonable range
|
|
home_xg = max(0.5, min(3.5, home_xg))
|
|
away_xg = max(0.3, min(3.0, away_xg))
|
|
|
|
# Calculate confidence
|
|
# Higher when ELO, H2H, and Form all agree
|
|
elo_winner = "H" if elo_home > max(elo_draw, elo_away) else ("A" if elo_away > elo_draw else "D")
|
|
h2h_winner = "H" if h2h_home_rate > h2h_away_rate else "A"
|
|
form_winner = "H" if form_diff > 0.1 else ("A" if form_diff < -0.1 else "D")
|
|
|
|
agreement = sum([
|
|
elo_winner == h2h_winner,
|
|
elo_winner == form_winner,
|
|
h2h_winner == form_winner
|
|
])
|
|
|
|
max_prob = max(final_home, final_draw, final_away)
|
|
confidence = max_prob * 100 * (0.7 + agreement * 0.1)
|
|
|
|
# Collect Raw Features for XGBoost
|
|
# Note: home_mom_data is an object now
|
|
def get_rate(val): return val if val is not None else 0.5
|
|
|
|
raw_features = {
|
|
**elo_features, # 8 features
|
|
|
|
# Form Features (need key mapping to match extract_training_data.py)
|
|
"home_goals_avg": 1.5 + home_mom_data.goals_trend, # Proxy
|
|
"home_conceded_avg": 1.5 - home_mom_data.conceded_trend, # Proxy
|
|
"away_goals_avg": 1.5 + away_mom_data.goals_trend,
|
|
"away_conceded_avg": 1.5 - away_mom_data.conceded_trend,
|
|
|
|
"home_clean_sheet_rate": 0.2, # Not in new MomentumData
|
|
"away_clean_sheet_rate": 0.2,
|
|
"home_scoring_rate": 0.8,
|
|
"away_scoring_rate": 0.8,
|
|
|
|
"home_winning_streak": home_mom_data.winning_streak,
|
|
"away_winning_streak": away_mom_data.winning_streak,
|
|
"home_unbeaten_streak": home_mom_data.unbeaten_streak,
|
|
"away_unbeaten_streak": away_mom_data.unbeaten_streak,
|
|
|
|
# H2H Features
|
|
**h2h_features,
|
|
|
|
# Team Stats
|
|
"home_avg_possession": home_stats.get("avg_possession", 0.5),
|
|
"away_avg_possession": away_stats.get("avg_possession", 0.5),
|
|
"home_avg_shots_on_target": home_stats.get("avg_shots_on_target", 3.5),
|
|
"away_avg_shots_on_target": away_stats.get("avg_shots_on_target", 3.5),
|
|
"home_shot_conversion": home_stats.get("shot_conversion_rate", 0.1),
|
|
"away_shot_conversion": away_stats.get("shot_conversion_rate", 0.1),
|
|
"home_avg_corners": home_stats.get("avg_corners", 4.5),
|
|
"away_avg_corners": away_stats.get("avg_corners", 4.5),
|
|
|
|
# Derived
|
|
"home_xga": 1.5 - home_mom_data.conceded_trend, # reusing as proxy
|
|
"away_xga": 1.5 - away_mom_data.conceded_trend
|
|
}
|
|
|
|
return TeamPrediction(
|
|
home_win_prob=final_home,
|
|
draw_prob=final_draw,
|
|
away_win_prob=final_away,
|
|
home_xg=home_xg,
|
|
away_xg=away_xg,
|
|
form_advantage=form_diff,
|
|
h2h_advantage=h2h_home_rate - h2h_away_rate,
|
|
elo_diff=elo_features.get("elo_diff", 0),
|
|
confidence=confidence,
|
|
raw_features=raw_features
|
|
)
|
|
|
|
|
|
# Singleton
|
|
_engine: Optional[TeamPredictorEngine] = None
|
|
|
|
|
|
def get_team_predictor() -> TeamPredictorEngine:
|
|
global _engine
|
|
if _engine is None:
|
|
_engine = TeamPredictorEngine()
|
|
return _engine
|
|
|
|
|
|
if __name__ == "__main__":
|
|
engine = get_team_predictor()
|
|
|
|
print("\n🧪 Team Predictor Engine Test")
|
|
print("=" * 50)
|
|
|
|
# Test with sample IDs
|
|
pred = engine.predict(
|
|
home_team_id="test_home",
|
|
away_team_id="test_away",
|
|
match_date_ms=1707393600000
|
|
)
|
|
|
|
print(f"\n📊 Prediction:")
|
|
for k, v in pred.to_dict().items():
|
|
print(f" {k}: {v}")
|