This commit is contained in:
Executable
+16
@@ -0,0 +1,16 @@
|
||||
# ai-engine/core/engines/__init__.py
|
||||
"""
|
||||
V20 Ensemble Prediction Engines
|
||||
"""
|
||||
|
||||
from .team_predictor import TeamPredictorEngine, get_team_predictor
|
||||
from .player_predictor import PlayerPredictorEngine, get_player_predictor
|
||||
from .odds_predictor import OddsPredictorEngine, get_odds_predictor
|
||||
from .referee_predictor import RefereePredictorEngine, get_referee_predictor
|
||||
|
||||
__all__ = [
|
||||
"TeamPredictorEngine", "get_team_predictor",
|
||||
"PlayerPredictorEngine", "get_player_predictor",
|
||||
"OddsPredictorEngine", "get_odds_predictor",
|
||||
"RefereePredictorEngine", "get_referee_predictor"
|
||||
]
|
||||
Executable
+237
@@ -0,0 +1,237 @@
|
||||
"""
|
||||
Odds Predictor Engine - V20 Ensemble Component
|
||||
Uses market odds and Poisson mathematics for predictions.
|
||||
|
||||
Weight: 30% in ensemble
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from typing import Dict, Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from features.poisson_engine import get_poisson_engine
|
||||
from features.value_calculator import get_value_calculator
|
||||
|
||||
|
||||
@dataclass
|
||||
class OddsPrediction:
|
||||
"""Odds engine prediction output."""
|
||||
# Market-implied probabilities
|
||||
market_home_prob: float = 0.33
|
||||
market_draw_prob: float = 0.33
|
||||
market_away_prob: float = 0.33
|
||||
|
||||
# Poisson xG
|
||||
poisson_home_xg: float = 1.3
|
||||
poisson_away_xg: float = 1.1
|
||||
|
||||
# Over/Under probabilities
|
||||
over_15_prob: float = 0.75
|
||||
over_25_prob: float = 0.55
|
||||
over_35_prob: float = 0.30
|
||||
|
||||
# BTTS
|
||||
btts_yes_prob: float = 0.50
|
||||
|
||||
# Most likely scores
|
||||
most_likely_score: str = "1-1"
|
||||
second_likely_score: str = "1-0"
|
||||
third_likely_score: str = "2-1"
|
||||
|
||||
# Value bet opportunities
|
||||
value_bets: list = None
|
||||
|
||||
confidence: float = 0.0
|
||||
|
||||
def __post_init__(self):
|
||||
if self.value_bets is None:
|
||||
self.value_bets = []
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"market_home_prob": round(self.market_home_prob * 100, 1),
|
||||
"market_draw_prob": round(self.market_draw_prob * 100, 1),
|
||||
"market_away_prob": round(self.market_away_prob * 100, 1),
|
||||
"poisson_home_xg": round(self.poisson_home_xg, 2),
|
||||
"poisson_away_xg": round(self.poisson_away_xg, 2),
|
||||
"over_15_prob": round(self.over_15_prob * 100, 1),
|
||||
"over_25_prob": round(self.over_25_prob * 100, 1),
|
||||
"over_35_prob": round(self.over_35_prob * 100, 1),
|
||||
"btts_yes_prob": round(self.btts_yes_prob * 100, 1),
|
||||
"most_likely_score": self.most_likely_score,
|
||||
"second_likely_score": self.second_likely_score,
|
||||
"third_likely_score": self.third_likely_score,
|
||||
"value_bets": self.value_bets,
|
||||
"confidence": round(self.confidence, 1)
|
||||
}
|
||||
|
||||
|
||||
class OddsPredictorEngine:
|
||||
"""
|
||||
Odds-based prediction engine.
|
||||
|
||||
Uses:
|
||||
- Market odds to extract implied probabilities
|
||||
- Poisson distribution for mathematical xG
|
||||
- Value calculator for EV+ opportunities
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.poisson_engine = get_poisson_engine()
|
||||
try:
|
||||
self.value_calc = get_value_calculator()
|
||||
except Exception:
|
||||
self.value_calc = None
|
||||
self.default_ms_h = 2.65
|
||||
self.default_ms_d = 3.20
|
||||
self.default_ms_a = 2.65
|
||||
print("✅ OddsPredictorEngine initialized")
|
||||
|
||||
def _odds_to_prob(self, odds: float) -> float:
|
||||
"""Convert decimal odds to probability."""
|
||||
try:
|
||||
odds = float(odds)
|
||||
except (TypeError, ValueError):
|
||||
return 0.0
|
||||
if odds <= 1.0:
|
||||
return 0.0
|
||||
return 1.0 / odds
|
||||
|
||||
def predict(self,
|
||||
odds_data: Dict[str, float],
|
||||
home_goals_avg: float = 1.5,
|
||||
home_conceded_avg: float = 1.2,
|
||||
away_goals_avg: float = 1.2,
|
||||
away_conceded_avg: float = 1.4) -> OddsPrediction:
|
||||
"""
|
||||
Generate odds-based prediction.
|
||||
|
||||
Args:
|
||||
odds_data: Dict with keys like 'ms_h', 'ms_d', 'ms_a', 'ou25_o', 'btts_y'
|
||||
home_goals_avg: Home team's average goals scored
|
||||
home_conceded_avg: Home team's average goals conceded
|
||||
away_goals_avg: Away team's average goals scored
|
||||
away_conceded_avg: Away team's average goals conceded
|
||||
|
||||
Returns:
|
||||
OddsPrediction with market and Poisson analysis
|
||||
"""
|
||||
|
||||
# 1. Extract market probabilities from odds
|
||||
ms_h = odds_data.get("ms_h", self.default_ms_h)
|
||||
ms_d = odds_data.get("ms_d", self.default_ms_d)
|
||||
ms_a = odds_data.get("ms_a", self.default_ms_a)
|
||||
|
||||
# Remove vig to get fair probabilities
|
||||
raw_probs = [
|
||||
self._odds_to_prob(ms_h),
|
||||
self._odds_to_prob(ms_d),
|
||||
self._odds_to_prob(ms_a)
|
||||
]
|
||||
total = sum(raw_probs) or 1
|
||||
|
||||
market_home = raw_probs[0] / total
|
||||
market_draw = raw_probs[1] / total
|
||||
market_away = raw_probs[2] / total
|
||||
|
||||
# 2. Poisson prediction
|
||||
poisson_pred = self.poisson_engine.predict(
|
||||
home_goals_avg, home_conceded_avg,
|
||||
away_goals_avg, away_conceded_avg
|
||||
)
|
||||
|
||||
# 3. Get most likely scores
|
||||
likely_scores = poisson_pred.most_likely_scores[:3] if poisson_pred.most_likely_scores else []
|
||||
score_1 = likely_scores[0]["score"] if len(likely_scores) > 0 else "1-1"
|
||||
score_2 = likely_scores[1]["score"] if len(likely_scores) > 1 else "1-0"
|
||||
score_3 = likely_scores[2]["score"] if len(likely_scores) > 2 else "2-1"
|
||||
|
||||
# 4. Value bet detection
|
||||
value_bets = []
|
||||
|
||||
# Check if our Poisson model disagrees with market significantly
|
||||
if abs(poisson_pred.home_win_prob - market_home) > 0.10:
|
||||
if poisson_pred.home_win_prob > market_home:
|
||||
value_bets.append({
|
||||
"market": "MS 1",
|
||||
"edge": round((poisson_pred.home_win_prob - market_home) * 100, 1),
|
||||
"confidence": "medium"
|
||||
})
|
||||
else:
|
||||
value_bets.append({
|
||||
"market": "MS 2",
|
||||
"edge": round((poisson_pred.away_win_prob - market_away) * 100, 1),
|
||||
"confidence": "medium"
|
||||
})
|
||||
|
||||
# O/U value check
|
||||
ou25_o = odds_data.get("ou25_o", 1.9)
|
||||
market_over25 = self._odds_to_prob(ou25_o)
|
||||
if abs(poisson_pred.over_25_prob - market_over25) > 0.08:
|
||||
pick = "2.5 Üst" if poisson_pred.over_25_prob > market_over25 else "2.5 Alt"
|
||||
edge = abs(poisson_pred.over_25_prob - market_over25) * 100
|
||||
value_bets.append({
|
||||
"market": pick,
|
||||
"edge": round(edge, 1),
|
||||
"confidence": "high" if edge > 10 else "medium"
|
||||
})
|
||||
|
||||
# Calculate confidence
|
||||
# Higher when market and Poisson agree
|
||||
agreement = 1.0 - abs(poisson_pred.home_win_prob - market_home)
|
||||
confidence = 50.0 + (agreement * 40) + (len(value_bets) * 5)
|
||||
|
||||
return OddsPrediction(
|
||||
market_home_prob=market_home,
|
||||
market_draw_prob=market_draw,
|
||||
market_away_prob=market_away,
|
||||
poisson_home_xg=poisson_pred.home_xg,
|
||||
poisson_away_xg=poisson_pred.away_xg,
|
||||
over_15_prob=poisson_pred.over_15_prob,
|
||||
over_25_prob=poisson_pred.over_25_prob,
|
||||
over_35_prob=poisson_pred.over_35_prob,
|
||||
btts_yes_prob=poisson_pred.btts_yes_prob,
|
||||
most_likely_score=score_1,
|
||||
second_likely_score=score_2,
|
||||
third_likely_score=score_3,
|
||||
value_bets=value_bets,
|
||||
confidence=min(99.9, confidence)
|
||||
)
|
||||
|
||||
|
||||
# Singleton
|
||||
_engine: Optional[OddsPredictorEngine] = None
|
||||
|
||||
|
||||
def get_odds_predictor() -> OddsPredictorEngine:
|
||||
global _engine
|
||||
if _engine is None:
|
||||
_engine = OddsPredictorEngine()
|
||||
return _engine
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
engine = get_odds_predictor()
|
||||
|
||||
print("\n🧪 Odds Predictor Engine Test")
|
||||
print("=" * 50)
|
||||
|
||||
pred = engine.predict(
|
||||
odds_data={
|
||||
"ms_h": 1.85,
|
||||
"ms_d": 3.40,
|
||||
"ms_a": 4.20,
|
||||
"ou25_o": 1.90
|
||||
},
|
||||
home_goals_avg=1.8,
|
||||
home_conceded_avg=1.0,
|
||||
away_goals_avg=1.2,
|
||||
away_conceded_avg=1.5
|
||||
)
|
||||
|
||||
print(f"\n📊 Prediction:")
|
||||
for k, v in pred.to_dict().items():
|
||||
print(f" {k}: {v}")
|
||||
Executable
+224
@@ -0,0 +1,224 @@
|
||||
"""
|
||||
Player Predictor Engine - V20 Ensemble Component
|
||||
Analyzes squad quality, key players, and missing player impact.
|
||||
|
||||
Weight: 25% in ensemble
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from typing import Dict, Optional, List
|
||||
from dataclasses import dataclass
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from features.squad_analysis_engine import get_squad_analysis_engine
|
||||
from features.sidelined_analyzer import get_sidelined_analyzer
|
||||
|
||||
|
||||
@dataclass
|
||||
class PlayerPrediction:
|
||||
"""Player engine prediction output."""
|
||||
home_squad_quality: float = 50.0 # 0-100
|
||||
away_squad_quality: float = 50.0
|
||||
squad_diff: float = 0.0 # -100 to +100
|
||||
home_key_players: int = 0
|
||||
away_key_players: int = 0
|
||||
home_missing_impact: float = 0.0 # 0-1, how much weaker due to missing players
|
||||
away_missing_impact: float = 0.0
|
||||
home_goals_form: int = 0 # Goals in last 5 matches
|
||||
away_goals_form: int = 0
|
||||
lineup_available: bool = False
|
||||
confidence: float = 0.0
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"home_squad_quality": round(self.home_squad_quality, 1),
|
||||
"away_squad_quality": round(self.away_squad_quality, 1),
|
||||
"squad_diff": round(self.squad_diff, 1),
|
||||
"home_key_players": self.home_key_players,
|
||||
"away_key_players": self.away_key_players,
|
||||
"home_missing_impact": round(self.home_missing_impact, 2),
|
||||
"away_missing_impact": round(self.away_missing_impact, 2),
|
||||
"home_goals_form": self.home_goals_form,
|
||||
"away_goals_form": self.away_goals_form,
|
||||
"lineup_available": self.lineup_available,
|
||||
"confidence": round(self.confidence, 1)
|
||||
}
|
||||
|
||||
|
||||
class PlayerPredictorEngine:
|
||||
"""
|
||||
Player/Squad-based prediction engine.
|
||||
|
||||
Analyzes:
|
||||
- Starting 11 quality
|
||||
- Key player availability (top scorers)
|
||||
- Missing player impact
|
||||
- Recent goalscoring form per player
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.squad_engine = get_squad_analysis_engine()
|
||||
self.sidelined_analyzer = get_sidelined_analyzer()
|
||||
print("✅ PlayerPredictorEngine initialized")
|
||||
|
||||
def predict(self,
|
||||
match_id: str,
|
||||
home_team_id: str,
|
||||
away_team_id: str,
|
||||
home_lineup: List[str] = None,
|
||||
away_lineup: List[str] = None,
|
||||
sidelined_data: Dict = None) -> PlayerPrediction:
|
||||
"""
|
||||
Generate player-based prediction.
|
||||
|
||||
Args:
|
||||
match_id: Match ID for lineup lookup
|
||||
home_team_id: Home team ID
|
||||
away_team_id: Away team ID
|
||||
home_lineup: Optional list of home player IDs
|
||||
away_lineup: Optional list of away player IDs
|
||||
|
||||
Returns:
|
||||
PlayerPrediction with squad analysis
|
||||
"""
|
||||
|
||||
# Get squad features
|
||||
if home_lineup and away_lineup:
|
||||
# Use provided lineups (for live matches)
|
||||
home_analysis = self.squad_engine.analyze_squad_from_list(
|
||||
home_lineup, home_team_id
|
||||
)
|
||||
away_analysis = self.squad_engine.analyze_squad_from_list(
|
||||
away_lineup, away_team_id
|
||||
)
|
||||
lineup_available = True
|
||||
# Build features dict from analysis objects
|
||||
features = {
|
||||
"home_starting_11": home_analysis.starting_count or 11,
|
||||
"home_goals_last_5": home_analysis.total_goals_last_5,
|
||||
"home_assists_last_5": home_analysis.total_assists_last_5,
|
||||
"home_key_players": home_analysis.key_players_count,
|
||||
"away_starting_11": away_analysis.starting_count or 11,
|
||||
"away_goals_last_5": away_analysis.total_goals_last_5,
|
||||
"away_assists_last_5": away_analysis.total_assists_last_5,
|
||||
"away_key_players": away_analysis.key_players_count,
|
||||
}
|
||||
elif match_id:
|
||||
# Try to get from database
|
||||
try:
|
||||
features = self.squad_engine.get_features(
|
||||
match_id, home_team_id, away_team_id
|
||||
)
|
||||
lineup_available = (
|
||||
features.get("home_starting_11", 0) >= 11 and
|
||||
features.get("away_starting_11", 0) >= 11
|
||||
)
|
||||
except Exception:
|
||||
features = self.squad_engine.get_features_without_match(
|
||||
home_team_id, away_team_id
|
||||
)
|
||||
lineup_available = False
|
||||
else:
|
||||
features = self.squad_engine.get_features_without_match(
|
||||
home_team_id, away_team_id
|
||||
)
|
||||
lineup_available = False
|
||||
|
||||
# Extract features
|
||||
home_goals = features.get("home_goals_last_5", 0)
|
||||
away_goals = features.get("away_goals_last_5", 0)
|
||||
home_key = features.get("home_key_players", 0)
|
||||
away_key = features.get("away_key_players", 0)
|
||||
|
||||
# Calculate squad quality (0-100)
|
||||
# Based on: goals scored, key players, assists
|
||||
home_quality = min(100, 50 + (home_goals * 3) + (home_key * 5) +
|
||||
features.get("home_assists_last_5", 0) * 2)
|
||||
away_quality = min(100, 50 + (away_goals * 3) + (away_key * 5) +
|
||||
features.get("away_assists_last_5", 0) * 2)
|
||||
|
||||
# Squad difference
|
||||
squad_diff = home_quality - away_quality
|
||||
|
||||
# Missing player impact
|
||||
# Priority: sidelined data (position-weighted) > lineup count (basic)
|
||||
if sidelined_data:
|
||||
home_impact, away_impact = self.sidelined_analyzer.analyze_match(sidelined_data)
|
||||
home_missing = home_impact.impact_score
|
||||
away_missing = away_impact.impact_score
|
||||
sidelined_available = True
|
||||
else:
|
||||
# Fallback: basic lineup count method
|
||||
expected_xi = 11
|
||||
actual_home_xi = features.get("home_starting_11", 11)
|
||||
actual_away_xi = features.get("away_starting_11", 11)
|
||||
home_missing = (expected_xi - actual_home_xi) / expected_xi if actual_home_xi < expected_xi else 0
|
||||
away_missing = (expected_xi - actual_away_xi) / expected_xi if actual_away_xi < expected_xi else 0
|
||||
sidelined_available = False
|
||||
|
||||
# Confidence: more data sources = higher confidence
|
||||
confidence = 70.0 if lineup_available else 35.0
|
||||
if home_goals + away_goals > 10:
|
||||
confidence += 15
|
||||
if sidelined_available:
|
||||
confidence += self.sidelined_analyzer.config.get("sidelined.confidence_boost", 10)
|
||||
if not lineup_available:
|
||||
confidence -= 5.0
|
||||
|
||||
return PlayerPrediction(
|
||||
home_squad_quality=home_quality,
|
||||
away_squad_quality=away_quality,
|
||||
squad_diff=squad_diff,
|
||||
home_key_players=home_key,
|
||||
away_key_players=away_key,
|
||||
home_missing_impact=home_missing,
|
||||
away_missing_impact=away_missing,
|
||||
home_goals_form=home_goals,
|
||||
away_goals_form=away_goals,
|
||||
lineup_available=lineup_available,
|
||||
confidence=max(5.0, confidence)
|
||||
)
|
||||
|
||||
def get_1x2_modifier(self, prediction: PlayerPrediction) -> Dict[str, float]:
|
||||
"""
|
||||
Calculate 1X2 probability modifiers based on squad analysis.
|
||||
|
||||
Returns modifiers to apply to base probabilities.
|
||||
"""
|
||||
diff = prediction.squad_diff / 100 # -1 to +1
|
||||
|
||||
return {
|
||||
"home_modifier": 1.0 + (diff * 0.3), # Up to +/-30%
|
||||
"away_modifier": 1.0 - (diff * 0.3),
|
||||
"draw_modifier": 1.0 - abs(diff) * 0.2 # Less draw if big diff
|
||||
}
|
||||
|
||||
|
||||
# Singleton
|
||||
_engine: Optional[PlayerPredictorEngine] = None
|
||||
|
||||
|
||||
def get_player_predictor() -> PlayerPredictorEngine:
|
||||
global _engine
|
||||
if _engine is None:
|
||||
_engine = PlayerPredictorEngine()
|
||||
return _engine
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
engine = get_player_predictor()
|
||||
|
||||
print("\n🧪 Player Predictor Engine Test")
|
||||
print("=" * 50)
|
||||
|
||||
pred = engine.predict(
|
||||
match_id=None,
|
||||
home_team_id="test_home",
|
||||
away_team_id="test_away"
|
||||
)
|
||||
|
||||
print(f"\n📊 Prediction:")
|
||||
for k, v in pred.to_dict().items():
|
||||
print(f" {k}: {v}")
|
||||
Executable
+188
@@ -0,0 +1,188 @@
|
||||
"""
|
||||
Referee Predictor Engine - V20 Ensemble Component
|
||||
Analyzes referee patterns for cards, goals, and home bias.
|
||||
|
||||
Weight: 15% in ensemble
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from typing import Dict, Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from features.referee_engine import get_referee_engine
|
||||
|
||||
|
||||
@dataclass
|
||||
class RefereePrediction:
|
||||
"""Referee engine prediction output."""
|
||||
referee_name: str = ""
|
||||
matches_officiated: int = 0
|
||||
|
||||
# Card tendencies
|
||||
avg_yellow_cards: float = 4.0
|
||||
avg_red_cards: float = 0.2
|
||||
is_card_heavy: bool = False # Above average cards
|
||||
|
||||
# Goal tendencies
|
||||
avg_goals_per_match: float = 2.5
|
||||
over_25_rate: float = 0.50
|
||||
is_high_scoring: bool = False # Above average goals
|
||||
|
||||
# Home bias
|
||||
home_win_rate: float = 0.45
|
||||
home_bias: float = 0.0 # -1 to +1, positive = favors home
|
||||
|
||||
# Penalty tendency
|
||||
penalty_rate: float = 0.15
|
||||
|
||||
confidence: float = 0.0
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"referee_name": self.referee_name,
|
||||
"matches_officiated": self.matches_officiated,
|
||||
"avg_yellow_cards": round(self.avg_yellow_cards, 1),
|
||||
"avg_red_cards": round(self.avg_red_cards, 2),
|
||||
"is_card_heavy": self.is_card_heavy,
|
||||
"avg_goals_per_match": round(self.avg_goals_per_match, 2),
|
||||
"over_25_rate": round(self.over_25_rate * 100, 1),
|
||||
"is_high_scoring": self.is_high_scoring,
|
||||
"home_win_rate": round(self.home_win_rate * 100, 1),
|
||||
"home_bias": round(self.home_bias, 2),
|
||||
"penalty_rate": round(self.penalty_rate * 100, 1),
|
||||
"confidence": round(self.confidence, 1)
|
||||
}
|
||||
|
||||
|
||||
class RefereePredictorEngine:
|
||||
"""
|
||||
Referee-based prediction engine.
|
||||
|
||||
Analyzes:
|
||||
- Card tendency (sarı/kırmızı kart ortalaması)
|
||||
- Goal tendency (maç başına gol, 2.5 üst oranı)
|
||||
- Home bias (ev sahibi lehine karar oranı)
|
||||
- Penalty tendency (penaltı verme oranı)
|
||||
"""
|
||||
|
||||
# League average benchmarks
|
||||
LEAGUE_AVG_GOALS = 2.65
|
||||
LEAGUE_AVG_YELLOW = 4.0
|
||||
LEAGUE_HOME_WIN_RATE = 0.45
|
||||
|
||||
def __init__(self):
|
||||
self.referee_engine = get_referee_engine()
|
||||
print("✅ RefereePredictorEngine initialized")
|
||||
|
||||
def predict(self,
|
||||
match_id: str = None,
|
||||
referee_name: str = None,
|
||||
league_id: str = None) -> RefereePrediction:
|
||||
"""
|
||||
Generate referee-based prediction.
|
||||
|
||||
Args:
|
||||
match_id: Match ID to find referee
|
||||
referee_name: Or provide referee name directly
|
||||
league_id: League ID to scope stats (prevents name collisions)
|
||||
|
||||
Returns:
|
||||
RefereePrediction with referee analysis
|
||||
"""
|
||||
|
||||
# Get referee features
|
||||
if match_id:
|
||||
features = self.referee_engine.get_features(match_id, league_id=league_id)
|
||||
# Live flows may already have referee_name while match_officials table is sparse.
|
||||
# Prefer the richer profile if direct-name lookup has more history.
|
||||
if referee_name:
|
||||
name_features = self.referee_engine.get_features_by_name(referee_name, league_id=league_id)
|
||||
if (name_features.get("referee_matches", 0) or 0) > (features.get("referee_matches", 0) or 0):
|
||||
features = name_features
|
||||
elif referee_name:
|
||||
features = self.referee_engine.get_features_by_name(referee_name, league_id=league_id)
|
||||
else:
|
||||
# Return default
|
||||
return RefereePrediction(confidence=10.0)
|
||||
|
||||
ref_name = features.get("referee_name", "Unknown")
|
||||
matches = features.get("referee_matches", 0)
|
||||
|
||||
if matches < 5:
|
||||
# Not enough data
|
||||
return RefereePrediction(
|
||||
referee_name=ref_name,
|
||||
matches_officiated=matches,
|
||||
confidence=20.0
|
||||
)
|
||||
|
||||
# Extract features
|
||||
avg_yellow = features.get("referee_avg_yellow", 4.0)
|
||||
avg_red = features.get("referee_avg_red", 0.2)
|
||||
avg_goals = features.get("referee_avg_goals", 2.5)
|
||||
over25_rate = features.get("referee_over25_rate", 0.5)
|
||||
home_win_rate = features.get("referee_home_win_rate", 0.45) if "referee_home_win_rate" in features else 0.45
|
||||
home_bias = features.get("referee_home_bias", 0.0)
|
||||
penalty_rate = features.get("referee_penalty_rate", 0.15)
|
||||
|
||||
# Determine tendencies
|
||||
is_card_heavy = (avg_yellow + avg_red * 4) > (self.LEAGUE_AVG_YELLOW + 1)
|
||||
is_high_scoring = avg_goals > self.LEAGUE_AVG_GOALS
|
||||
|
||||
# Confidence based on matches officiated
|
||||
confidence = min(90.0, 30.0 + matches * 2)
|
||||
|
||||
return RefereePrediction(
|
||||
referee_name=ref_name,
|
||||
matches_officiated=matches,
|
||||
avg_yellow_cards=avg_yellow,
|
||||
avg_red_cards=avg_red,
|
||||
is_card_heavy=is_card_heavy,
|
||||
avg_goals_per_match=avg_goals,
|
||||
over_25_rate=over25_rate,
|
||||
is_high_scoring=is_high_scoring,
|
||||
home_win_rate=home_win_rate,
|
||||
home_bias=home_bias,
|
||||
penalty_rate=penalty_rate,
|
||||
confidence=confidence
|
||||
)
|
||||
|
||||
def get_modifiers(self, prediction: RefereePrediction) -> Dict[str, float]:
|
||||
"""
|
||||
Get modifiers to apply to other predictions based on referee profile.
|
||||
"""
|
||||
return {
|
||||
# Home team gets slight boost if referee has home bias
|
||||
"home_modifier": 1.0 + (prediction.home_bias * 0.05),
|
||||
# O/U modifier
|
||||
"over_25_modifier": 1.0 + (prediction.avg_goals_per_match - self.LEAGUE_AVG_GOALS) * 0.1,
|
||||
# Card modifier for card markets
|
||||
"cards_modifier": 1.0 + (prediction.avg_yellow_cards - self.LEAGUE_AVG_YELLOW) * 0.05
|
||||
}
|
||||
|
||||
|
||||
# Singleton
|
||||
_engine: Optional[RefereePredictorEngine] = None
|
||||
|
||||
|
||||
def get_referee_predictor() -> RefereePredictorEngine:
|
||||
global _engine
|
||||
if _engine is None:
|
||||
_engine = RefereePredictorEngine()
|
||||
return _engine
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
engine = get_referee_predictor()
|
||||
|
||||
print("\n🧪 Referee Predictor Engine Test")
|
||||
print("=" * 50)
|
||||
|
||||
pred = engine.predict(referee_name="Cüneyt Çakır")
|
||||
|
||||
print(f"\n📊 Prediction:")
|
||||
for k, v in pred.to_dict().items():
|
||||
print(f" {k}: {v}")
|
||||
Executable
+286
@@ -0,0 +1,286 @@
|
||||
"""
|
||||
Team Predictor Engine - V20 Ensemble Component
|
||||
Combines ELO ratings, form stats, H2H records and team statistics.
|
||||
|
||||
Weight: 30% in ensemble
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from typing import Dict, Optional, Tuple, Any
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
# Add parent to path
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from features.elo_system import get_elo_system
|
||||
from features.h2h_engine import get_h2h_engine
|
||||
from features.momentum_engine import get_momentum_engine, MomentumData
|
||||
from features.team_stats_engine import get_team_stats_engine
|
||||
|
||||
|
||||
@dataclass
|
||||
class TeamPrediction:
|
||||
"""Team engine prediction output."""
|
||||
home_win_prob: float = 0.33
|
||||
draw_prob: float = 0.33
|
||||
away_win_prob: float = 0.33
|
||||
home_xg: float = 1.3
|
||||
away_xg: float = 1.1
|
||||
form_advantage: float = 0.0 # -1 to +1, positive = home advantage
|
||||
h2h_advantage: float = 0.0 # -1 to +1
|
||||
elo_diff: float = 0.0
|
||||
confidence: float = 0.0
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"home_win_prob": round(self.home_win_prob * 100, 1),
|
||||
"draw_prob": round(self.draw_prob * 100, 1),
|
||||
"away_win_prob": round(self.away_win_prob * 100, 1),
|
||||
"home_xg": round(self.home_xg, 2),
|
||||
"away_xg": round(self.away_xg, 2),
|
||||
"form_advantage": round(self.form_advantage, 2),
|
||||
"h2h_advantage": round(self.h2h_advantage, 2),
|
||||
"elo_diff": round(self.elo_diff, 0),
|
||||
"confidence": round(self.confidence, 1)
|
||||
}
|
||||
|
||||
raw_features: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
class TeamPredictorEngine:
|
||||
"""
|
||||
Team-based prediction engine.
|
||||
|
||||
Uses:
|
||||
- ELO Rating System (venue-adjusted, league-weighted)
|
||||
- H2H Engine (head-to-head history)
|
||||
- Momentum Engine (recent form)
|
||||
- Team Stats Engine (possession, shots, corners)
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.elo_system = get_elo_system()
|
||||
self.h2h_engine = get_h2h_engine()
|
||||
self.momentum_engine = get_momentum_engine()
|
||||
self.team_stats_engine = get_team_stats_engine()
|
||||
|
||||
print("✅ TeamPredictorEngine initialized")
|
||||
|
||||
def predict(self,
|
||||
home_team_id: str,
|
||||
away_team_id: str,
|
||||
match_date_ms: int,
|
||||
home_team_name: str = "",
|
||||
away_team_name: str = "") -> TeamPrediction:
|
||||
"""
|
||||
Generate team-based prediction.
|
||||
|
||||
Args:
|
||||
home_team_id: Home team ID
|
||||
away_team_id: Away team ID
|
||||
match_date_ms: Match date in milliseconds
|
||||
home_team_name: Home team name (for ELO)
|
||||
away_team_name: Away team name (for ELO)
|
||||
|
||||
Returns:
|
||||
TeamPrediction with 1X2 probabilities and xG
|
||||
"""
|
||||
|
||||
# 1. Get ELO predictions
|
||||
elo_pred = self.elo_system.predict_match(home_team_id, away_team_id)
|
||||
elo_features = self.elo_system.get_match_features(home_team_id, away_team_id)
|
||||
|
||||
# 2. Get H2H features
|
||||
try:
|
||||
h2h_features = self.h2h_engine.get_features(
|
||||
home_team_id, away_team_id, match_date_ms
|
||||
)
|
||||
except Exception:
|
||||
h2h_features = {
|
||||
"h2h_home_win_rate": 0.5,
|
||||
"h2h_away_win_rate": 0.5,
|
||||
"h2h_avg_goals": 2.5,
|
||||
"h2h_btts_rate": 0.5
|
||||
}
|
||||
|
||||
# 3. Get Momentum/Form features
|
||||
try:
|
||||
# key: form_score should be 0-1 derived from momentum_score (-1 to 1)
|
||||
home_mom_data = self.momentum_engine.calculate_momentum(home_team_id, match_date_ms)
|
||||
away_mom_data = self.momentum_engine.calculate_momentum(away_team_id, match_date_ms)
|
||||
|
||||
home_form_score = (home_mom_data.momentum_score + 1) / 2
|
||||
away_form_score = (away_mom_data.momentum_score + 1) / 2
|
||||
except Exception as e:
|
||||
print(f"⚠️ MomentumEngine error: {e}")
|
||||
home_mom_data = MomentumData()
|
||||
away_mom_data = MomentumData()
|
||||
home_form_score = 0.5
|
||||
away_form_score = 0.5
|
||||
|
||||
# 4. Get Team Stats
|
||||
home_stats = self.team_stats_engine.get_features(home_team_id, match_date_ms)
|
||||
away_stats = self.team_stats_engine.get_features(away_team_id, match_date_ms)
|
||||
|
||||
# 5. Combine predictions
|
||||
# ELO-based 1X2 (60% weight)
|
||||
elo_home = elo_pred.get("home_win_prob", 0.33)
|
||||
elo_draw = elo_pred.get("draw_prob", 0.33)
|
||||
elo_away = elo_pred.get("away_win_prob", 0.33)
|
||||
|
||||
# Adjust based on H2H (20% weight)
|
||||
h2h_home_rate = h2h_features.get("h2h_home_win_rate", 0.5)
|
||||
h2h_away_rate = h2h_features.get("h2h_away_win_rate", 0.5)
|
||||
|
||||
# Adjust based on form (20% weight)
|
||||
home_form = home_form_score
|
||||
away_form = away_form_score
|
||||
form_diff = (home_form - away_form) # -1 to +1
|
||||
|
||||
# Weighted combination
|
||||
final_home = elo_home * 0.6 + h2h_home_rate * 0.2 + (0.5 + form_diff * 0.3) * 0.2
|
||||
final_away = elo_away * 0.6 + h2h_away_rate * 0.2 + (0.5 - form_diff * 0.3) * 0.2
|
||||
final_draw = 1.0 - final_home - final_away
|
||||
|
||||
# Normalize
|
||||
total = final_home + final_draw + final_away
|
||||
if total > 0:
|
||||
final_home /= total
|
||||
final_draw /= total
|
||||
final_away /= total
|
||||
|
||||
# Calculate xG based on stats and form (conservative base)
|
||||
home_conversion = home_stats.get("shot_conversion_rate", 0.1)
|
||||
away_conversion = away_stats.get("shot_conversion_rate", 0.1)
|
||||
|
||||
base_home_xg = 1.35 + (home_conversion * 3.0)
|
||||
base_away_xg = 1.10 + (away_conversion * 2.5)
|
||||
|
||||
# Defense weakness factor: opponent's defensive quality affects xG
|
||||
# Higher shots on target against = weaker defense
|
||||
away_def_weakness = away_stats.get("shot_accuracy", 0.35) # opponent's shot accuracy as proxy
|
||||
home_def_weakness = home_stats.get("shot_accuracy", 0.35)
|
||||
|
||||
# Adjust xG: stronger opponent defense → lower xG
|
||||
home_xg = base_home_xg * (1 + form_diff * 0.15) * (0.8 + away_def_weakness * 0.6)
|
||||
away_xg = base_away_xg * (1 - form_diff * 0.15) * (0.8 + home_def_weakness * 0.6)
|
||||
|
||||
# Apply xG Underperformance Penalty directly to calculated xG
|
||||
# If a team chronically underperforms its xG, we subtract that historical difference here
|
||||
if hasattr(home_mom_data, 'xg_underperformance') and home_mom_data.xg_underperformance > 0.2:
|
||||
home_xg -= min(0.5, home_mom_data.xg_underperformance * 0.5)
|
||||
|
||||
if hasattr(away_mom_data, 'xg_underperformance') and away_mom_data.xg_underperformance > 0.2:
|
||||
away_xg -= min(0.5, away_mom_data.xg_underperformance * 0.5)
|
||||
|
||||
# H2H adjustment (more conservative)
|
||||
h2h_avg_goals = h2h_features.get("h2h_avg_goals", 2.5)
|
||||
if h2h_avg_goals > 3.0:
|
||||
home_xg *= 1.05
|
||||
away_xg *= 1.05
|
||||
elif h2h_avg_goals < 2.0:
|
||||
home_xg *= 0.95
|
||||
away_xg *= 0.95
|
||||
|
||||
# Clamp xG to reasonable range
|
||||
home_xg = max(0.5, min(3.5, home_xg))
|
||||
away_xg = max(0.3, min(3.0, away_xg))
|
||||
|
||||
# Calculate confidence
|
||||
# Higher when ELO, H2H, and Form all agree
|
||||
elo_winner = "H" if elo_home > max(elo_draw, elo_away) else ("A" if elo_away > elo_draw else "D")
|
||||
h2h_winner = "H" if h2h_home_rate > h2h_away_rate else "A"
|
||||
form_winner = "H" if form_diff > 0.1 else ("A" if form_diff < -0.1 else "D")
|
||||
|
||||
agreement = sum([
|
||||
elo_winner == h2h_winner,
|
||||
elo_winner == form_winner,
|
||||
h2h_winner == form_winner
|
||||
])
|
||||
|
||||
max_prob = max(final_home, final_draw, final_away)
|
||||
confidence = max_prob * 100 * (0.7 + agreement * 0.1)
|
||||
|
||||
# Collect Raw Features for XGBoost
|
||||
# Note: home_mom_data is an object now
|
||||
def get_rate(val): return val if val is not None else 0.5
|
||||
|
||||
raw_features = {
|
||||
**elo_features, # 8 features
|
||||
|
||||
# Form Features (need key mapping to match extract_training_data.py)
|
||||
"home_goals_avg": 1.5 + home_mom_data.goals_trend, # Proxy
|
||||
"home_conceded_avg": 1.5 - home_mom_data.conceded_trend, # Proxy
|
||||
"away_goals_avg": 1.5 + away_mom_data.goals_trend,
|
||||
"away_conceded_avg": 1.5 - away_mom_data.conceded_trend,
|
||||
|
||||
"home_clean_sheet_rate": 0.2, # Not in new MomentumData
|
||||
"away_clean_sheet_rate": 0.2,
|
||||
"home_scoring_rate": 0.8,
|
||||
"away_scoring_rate": 0.8,
|
||||
|
||||
"home_winning_streak": home_mom_data.winning_streak,
|
||||
"away_winning_streak": away_mom_data.winning_streak,
|
||||
"home_unbeaten_streak": home_mom_data.unbeaten_streak,
|
||||
"away_unbeaten_streak": away_mom_data.unbeaten_streak,
|
||||
|
||||
# H2H Features
|
||||
**h2h_features,
|
||||
|
||||
# Team Stats
|
||||
"home_avg_possession": home_stats.get("avg_possession", 0.5),
|
||||
"away_avg_possession": away_stats.get("avg_possession", 0.5),
|
||||
"home_avg_shots_on_target": home_stats.get("avg_shots_on_target", 3.5),
|
||||
"away_avg_shots_on_target": away_stats.get("avg_shots_on_target", 3.5),
|
||||
"home_shot_conversion": home_stats.get("shot_conversion_rate", 0.1),
|
||||
"away_shot_conversion": away_stats.get("shot_conversion_rate", 0.1),
|
||||
"home_avg_corners": home_stats.get("avg_corners", 4.5),
|
||||
"away_avg_corners": away_stats.get("avg_corners", 4.5),
|
||||
|
||||
# Derived
|
||||
"home_xga": 1.5 - home_mom_data.conceded_trend, # reusing as proxy
|
||||
"away_xga": 1.5 - away_mom_data.conceded_trend
|
||||
}
|
||||
|
||||
return TeamPrediction(
|
||||
home_win_prob=final_home,
|
||||
draw_prob=final_draw,
|
||||
away_win_prob=final_away,
|
||||
home_xg=home_xg,
|
||||
away_xg=away_xg,
|
||||
form_advantage=form_diff,
|
||||
h2h_advantage=h2h_home_rate - h2h_away_rate,
|
||||
elo_diff=elo_features.get("elo_diff", 0),
|
||||
confidence=confidence,
|
||||
raw_features=raw_features
|
||||
)
|
||||
|
||||
|
||||
# Singleton
|
||||
_engine: Optional[TeamPredictorEngine] = None
|
||||
|
||||
|
||||
def get_team_predictor() -> TeamPredictorEngine:
|
||||
global _engine
|
||||
if _engine is None:
|
||||
_engine = TeamPredictorEngine()
|
||||
return _engine
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
engine = get_team_predictor()
|
||||
|
||||
print("\n🧪 Team Predictor Engine Test")
|
||||
print("=" * 50)
|
||||
|
||||
# Test with sample IDs
|
||||
pred = engine.predict(
|
||||
home_team_id="test_home",
|
||||
away_team_id="test_away",
|
||||
match_date_ms=1707393600000
|
||||
)
|
||||
|
||||
print(f"\n📊 Prediction:")
|
||||
for k, v in pred.to_dict().items():
|
||||
print(f" {k}: {v}")
|
||||
Reference in New Issue
Block a user