@@ -1,3 +1,14 @@
|
||||
model_ensemble:
|
||||
xgb_weight: 0.50
|
||||
lgb_weight: 0.50
|
||||
temperature: 1.5
|
||||
default_ms_odds:
|
||||
home: 2.65
|
||||
draw: 3.20
|
||||
away: 2.65
|
||||
elo_staleness_days: 14
|
||||
odds_staleness_hours: 48
|
||||
|
||||
engine_weights:
|
||||
team: 0.30
|
||||
player: 0.25
|
||||
|
||||
@@ -1,16 +1,10 @@
|
||||
# ai-engine/core/engines/__init__.py
|
||||
"""
|
||||
V20 Ensemble Prediction Engines
|
||||
Prediction Engines
|
||||
"""
|
||||
|
||||
from .team_predictor import TeamPredictorEngine, get_team_predictor
|
||||
from .player_predictor import PlayerPredictorEngine, get_player_predictor
|
||||
from .odds_predictor import OddsPredictorEngine, get_odds_predictor
|
||||
from .referee_predictor import RefereePredictorEngine, get_referee_predictor
|
||||
|
||||
__all__ = [
|
||||
"TeamPredictorEngine", "get_team_predictor",
|
||||
"PlayerPredictorEngine", "get_player_predictor",
|
||||
"OddsPredictorEngine", "get_odds_predictor",
|
||||
"RefereePredictorEngine", "get_referee_predictor"
|
||||
]
|
||||
|
||||
@@ -1,237 +0,0 @@
|
||||
"""
|
||||
Odds Predictor Engine - V20 Ensemble Component
|
||||
Uses market odds and Poisson mathematics for predictions.
|
||||
|
||||
Weight: 30% in ensemble
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from typing import Dict, Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from features.poisson_engine import get_poisson_engine
|
||||
from features.value_calculator import get_value_calculator
|
||||
|
||||
|
||||
@dataclass
|
||||
class OddsPrediction:
|
||||
"""Odds engine prediction output."""
|
||||
# Market-implied probabilities
|
||||
market_home_prob: float = 0.33
|
||||
market_draw_prob: float = 0.33
|
||||
market_away_prob: float = 0.33
|
||||
|
||||
# Poisson xG
|
||||
poisson_home_xg: float = 1.3
|
||||
poisson_away_xg: float = 1.1
|
||||
|
||||
# Over/Under probabilities
|
||||
over_15_prob: float = 0.75
|
||||
over_25_prob: float = 0.55
|
||||
over_35_prob: float = 0.30
|
||||
|
||||
# BTTS
|
||||
btts_yes_prob: float = 0.50
|
||||
|
||||
# Most likely scores
|
||||
most_likely_score: str = "1-1"
|
||||
second_likely_score: str = "1-0"
|
||||
third_likely_score: str = "2-1"
|
||||
|
||||
# Value bet opportunities
|
||||
value_bets: Optional[list] = None
|
||||
|
||||
confidence: float = 0.0
|
||||
|
||||
def __post_init__(self):
|
||||
if self.value_bets is None:
|
||||
self.value_bets = []
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"market_home_prob": round(self.market_home_prob * 100, 1),
|
||||
"market_draw_prob": round(self.market_draw_prob * 100, 1),
|
||||
"market_away_prob": round(self.market_away_prob * 100, 1),
|
||||
"poisson_home_xg": round(self.poisson_home_xg, 2),
|
||||
"poisson_away_xg": round(self.poisson_away_xg, 2),
|
||||
"over_15_prob": round(self.over_15_prob * 100, 1),
|
||||
"over_25_prob": round(self.over_25_prob * 100, 1),
|
||||
"over_35_prob": round(self.over_35_prob * 100, 1),
|
||||
"btts_yes_prob": round(self.btts_yes_prob * 100, 1),
|
||||
"most_likely_score": self.most_likely_score,
|
||||
"second_likely_score": self.second_likely_score,
|
||||
"third_likely_score": self.third_likely_score,
|
||||
"value_bets": self.value_bets,
|
||||
"confidence": round(self.confidence, 1)
|
||||
}
|
||||
|
||||
|
||||
class OddsPredictorEngine:
|
||||
"""
|
||||
Odds-based prediction engine.
|
||||
|
||||
Uses:
|
||||
- Market odds to extract implied probabilities
|
||||
- Poisson distribution for mathematical xG
|
||||
- Value calculator for EV+ opportunities
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.poisson_engine = get_poisson_engine()
|
||||
try:
|
||||
self.value_calc = get_value_calculator()
|
||||
except Exception:
|
||||
self.value_calc = None # type: ignore[assignment]
|
||||
self.default_ms_h = 2.65
|
||||
self.default_ms_d = 3.20
|
||||
self.default_ms_a = 2.65
|
||||
print("✅ OddsPredictorEngine initialized")
|
||||
|
||||
def _odds_to_prob(self, odds: float) -> float:
|
||||
"""Convert decimal odds to probability."""
|
||||
try:
|
||||
odds = float(odds)
|
||||
except (TypeError, ValueError):
|
||||
return 0.0
|
||||
if odds <= 1.0:
|
||||
return 0.0
|
||||
return 1.0 / odds
|
||||
|
||||
def predict(self,
|
||||
odds_data: Dict[str, float],
|
||||
home_goals_avg: float = 1.5,
|
||||
home_conceded_avg: float = 1.2,
|
||||
away_goals_avg: float = 1.2,
|
||||
away_conceded_avg: float = 1.4) -> OddsPrediction:
|
||||
"""
|
||||
Generate odds-based prediction.
|
||||
|
||||
Args:
|
||||
odds_data: Dict with keys like 'ms_h', 'ms_d', 'ms_a', 'ou25_o', 'btts_y'
|
||||
home_goals_avg: Home team's average goals scored
|
||||
home_conceded_avg: Home team's average goals conceded
|
||||
away_goals_avg: Away team's average goals scored
|
||||
away_conceded_avg: Away team's average goals conceded
|
||||
|
||||
Returns:
|
||||
OddsPrediction with market and Poisson analysis
|
||||
"""
|
||||
|
||||
# 1. Extract market probabilities from odds
|
||||
ms_h = odds_data.get("ms_h", self.default_ms_h)
|
||||
ms_d = odds_data.get("ms_d", self.default_ms_d)
|
||||
ms_a = odds_data.get("ms_a", self.default_ms_a)
|
||||
|
||||
# Remove vig to get fair probabilities
|
||||
raw_probs = [
|
||||
self._odds_to_prob(ms_h),
|
||||
self._odds_to_prob(ms_d),
|
||||
self._odds_to_prob(ms_a)
|
||||
]
|
||||
total = sum(raw_probs) or 1
|
||||
|
||||
market_home = raw_probs[0] / total
|
||||
market_draw = raw_probs[1] / total
|
||||
market_away = raw_probs[2] / total
|
||||
|
||||
# 2. Poisson prediction
|
||||
poisson_pred = self.poisson_engine.predict(
|
||||
home_goals_avg, home_conceded_avg,
|
||||
away_goals_avg, away_conceded_avg
|
||||
)
|
||||
|
||||
# 3. Get most likely scores
|
||||
likely_scores = poisson_pred.most_likely_scores[:3] if poisson_pred.most_likely_scores else []
|
||||
score_1 = likely_scores[0]["score"] if len(likely_scores) > 0 else "1-1"
|
||||
score_2 = likely_scores[1]["score"] if len(likely_scores) > 1 else "1-0"
|
||||
score_3 = likely_scores[2]["score"] if len(likely_scores) > 2 else "2-1"
|
||||
|
||||
# 4. Value bet detection
|
||||
value_bets = []
|
||||
|
||||
# Check if our Poisson model disagrees with market significantly
|
||||
if abs(poisson_pred.home_win_prob - market_home) > 0.10:
|
||||
if poisson_pred.home_win_prob > market_home:
|
||||
value_bets.append({
|
||||
"market": "MS 1",
|
||||
"edge": round((poisson_pred.home_win_prob - market_home) * 100, 1),
|
||||
"confidence": "medium"
|
||||
})
|
||||
else:
|
||||
value_bets.append({
|
||||
"market": "MS 2",
|
||||
"edge": round((poisson_pred.away_win_prob - market_away) * 100, 1),
|
||||
"confidence": "medium"
|
||||
})
|
||||
|
||||
# O/U value check
|
||||
ou25_o = odds_data.get("ou25_o", 1.9)
|
||||
market_over25 = self._odds_to_prob(ou25_o)
|
||||
if abs(poisson_pred.over_25_prob - market_over25) > 0.08:
|
||||
pick = "2.5 Üst" if poisson_pred.over_25_prob > market_over25 else "2.5 Alt"
|
||||
edge = abs(poisson_pred.over_25_prob - market_over25) * 100
|
||||
value_bets.append({
|
||||
"market": pick,
|
||||
"edge": round(edge, 1),
|
||||
"confidence": "high" if edge > 10 else "medium"
|
||||
})
|
||||
|
||||
# Calculate confidence
|
||||
# Higher when market and Poisson agree
|
||||
agreement = 1.0 - abs(poisson_pred.home_win_prob - market_home)
|
||||
confidence = 50.0 + (agreement * 40) + (len(value_bets) * 5)
|
||||
|
||||
return OddsPrediction(
|
||||
market_home_prob=market_home,
|
||||
market_draw_prob=market_draw,
|
||||
market_away_prob=market_away,
|
||||
poisson_home_xg=poisson_pred.home_xg,
|
||||
poisson_away_xg=poisson_pred.away_xg,
|
||||
over_15_prob=poisson_pred.over_15_prob,
|
||||
over_25_prob=poisson_pred.over_25_prob,
|
||||
over_35_prob=poisson_pred.over_35_prob,
|
||||
btts_yes_prob=poisson_pred.btts_yes_prob,
|
||||
most_likely_score=score_1,
|
||||
second_likely_score=score_2,
|
||||
third_likely_score=score_3,
|
||||
value_bets=value_bets,
|
||||
confidence=min(99.9, confidence)
|
||||
)
|
||||
|
||||
|
||||
# Singleton
|
||||
_engine: Optional[OddsPredictorEngine] = None
|
||||
|
||||
|
||||
def get_odds_predictor() -> OddsPredictorEngine:
|
||||
global _engine
|
||||
if _engine is None:
|
||||
_engine = OddsPredictorEngine()
|
||||
return _engine
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
engine = get_odds_predictor()
|
||||
|
||||
print("\n🧪 Odds Predictor Engine Test")
|
||||
print("=" * 50)
|
||||
|
||||
pred = engine.predict(
|
||||
odds_data={
|
||||
"ms_h": 1.85,
|
||||
"ms_d": 3.40,
|
||||
"ms_a": 4.20,
|
||||
"ou25_o": 1.90
|
||||
},
|
||||
home_goals_avg=1.8,
|
||||
home_conceded_avg=1.0,
|
||||
away_goals_avg=1.2,
|
||||
away_conceded_avg=1.5
|
||||
)
|
||||
|
||||
print(f"\n📊 Prediction:")
|
||||
for k, v in pred.to_dict().items():
|
||||
print(f" {k}: {v}")
|
||||
@@ -24,32 +24,29 @@ class PlayerPrediction:
|
||||
extract_training_data.py so that inference values match the
|
||||
distribution the model was trained on (~3-36 range).
|
||||
"""
|
||||
home_squad_quality: float = 12.0 # training-scale composite (~3-36)
|
||||
home_squad_quality: float = 12.0
|
||||
away_squad_quality: float = 12.0
|
||||
squad_diff: float = 0.0 # home - away (training scale)
|
||||
squad_diff: float = 0.0
|
||||
home_key_players: int = 0
|
||||
away_key_players: int = 0
|
||||
home_missing_impact: float = 0.0 # 0-1, how much weaker due to missing players
|
||||
home_missing_impact: float = 0.0
|
||||
away_missing_impact: float = 0.0
|
||||
home_goals_form: int = 0 # Goals in last 5 matches
|
||||
home_goals_form: int = 0
|
||||
away_goals_form: int = 0
|
||||
home_lineup_goals_per90: float = 0.0
|
||||
away_lineup_goals_per90: float = 0.0
|
||||
home_lineup_assists_per90: float = 0.0
|
||||
away_lineup_assists_per90: float = 0.0
|
||||
home_squad_continuity: float = 0.5
|
||||
away_squad_continuity: float = 0.5
|
||||
home_top_scorer_form: int = 0
|
||||
away_top_scorer_form: int = 0
|
||||
home_avg_player_exp: float = 0.0
|
||||
away_avg_player_exp: float = 0.0
|
||||
home_goals_diversity: float = 0.0
|
||||
away_goals_diversity: float = 0.0
|
||||
lineup_available: bool = False
|
||||
confidence: float = 0.0
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"home_squad_quality": round(self.home_squad_quality, 1),
|
||||
"away_squad_quality": round(self.away_squad_quality, 1),
|
||||
"squad_diff": round(self.squad_diff, 1),
|
||||
"home_key_players": self.home_key_players,
|
||||
"away_key_players": self.away_key_players,
|
||||
"home_missing_impact": round(self.home_missing_impact, 2),
|
||||
"away_missing_impact": round(self.away_missing_impact, 2),
|
||||
"home_goals_form": self.home_goals_form,
|
||||
"away_goals_form": self.away_goals_form,
|
||||
"lineup_available": self.lineup_available,
|
||||
"confidence": round(self.confidence, 1)
|
||||
}
|
||||
|
||||
|
||||
class PlayerPredictorEngine:
|
||||
@@ -90,8 +87,9 @@ class PlayerPredictorEngine:
|
||||
"""
|
||||
|
||||
# Get squad features
|
||||
home_analysis = None
|
||||
away_analysis = None
|
||||
if home_lineup and away_lineup:
|
||||
# Use provided lineups (for live matches)
|
||||
home_analysis = self.squad_engine.analyze_squad_from_list(
|
||||
home_lineup, home_team_id
|
||||
)
|
||||
@@ -99,7 +97,6 @@ class PlayerPredictorEngine:
|
||||
away_lineup, away_team_id
|
||||
)
|
||||
lineup_available = True
|
||||
# Build features dict from analysis objects
|
||||
features = {
|
||||
"home_starting_11": home_analysis.starting_count or 11,
|
||||
"home_goals_last_5": home_analysis.total_goals_last_5,
|
||||
@@ -113,7 +110,6 @@ class PlayerPredictorEngine:
|
||||
"away_forwards": away_analysis.forward_count or 2,
|
||||
}
|
||||
elif match_id:
|
||||
# Try to get from database
|
||||
try:
|
||||
features = self.squad_engine.get_features(
|
||||
match_id, home_team_id, away_team_id
|
||||
@@ -132,58 +128,42 @@ class PlayerPredictorEngine:
|
||||
home_team_id, away_team_id
|
||||
)
|
||||
lineup_available = False
|
||||
|
||||
# Extract features
|
||||
|
||||
home_goals = int(features.get("home_goals_last_5", 0))
|
||||
away_goals = int(features.get("away_goals_last_5", 0))
|
||||
home_key = int(features.get("home_key_players", 0))
|
||||
away_key = int(features.get("away_key_players", 0))
|
||||
home_assists = features.get("home_assists_last_5", 0)
|
||||
away_assists = features.get("away_assists_last_5", 0)
|
||||
home_starting = features.get("home_starting_11", 11)
|
||||
away_starting = features.get("away_starting_11", 11)
|
||||
home_fwd = features.get("home_forwards", 2)
|
||||
away_fwd = features.get("away_forwards", 2)
|
||||
|
||||
# Calculate squad quality — MUST match extract_training_data.py formula
|
||||
# Formula: starting_count * 0.3 + goals * 2.0 + assists * 1.0
|
||||
# + key_players * 3.0 + fwd_count * 1.5
|
||||
# Typical range: ~3 – 36 (model trained on this distribution)
|
||||
home_quality = (
|
||||
home_starting * 0.3 +
|
||||
home_goals * 2.0 +
|
||||
home_assists * 1.0 +
|
||||
home_key * 3.0 +
|
||||
home_fwd * 1.5
|
||||
)
|
||||
away_quality = (
|
||||
away_starting * 0.3 +
|
||||
away_goals * 2.0 +
|
||||
away_assists * 1.0 +
|
||||
away_key * 3.0 +
|
||||
away_fwd * 1.5
|
||||
)
|
||||
|
||||
# Squad difference
|
||||
|
||||
# Squad quality — matches V25 extract_training_data.py:579
|
||||
home_quality = home_starting * 0.3 + home_key * 3.0 + home_fwd * 1.5
|
||||
away_quality = away_starting * 0.3 + away_key * 3.0 + away_fwd * 1.5
|
||||
squad_diff = home_quality - away_quality
|
||||
|
||||
|
||||
# Missing player impact
|
||||
# Priority: sidelined data (position-weighted) > lineup count (basic)
|
||||
if sidelined_data:
|
||||
home_impact, away_impact = self.sidelined_analyzer.analyze_match(sidelined_data)
|
||||
home_missing = min(1.0, max(0.0, home_impact.impact_score))
|
||||
away_missing = min(1.0, max(0.0, away_impact.impact_score))
|
||||
sidelined_available = True
|
||||
else:
|
||||
# Fallback: basic lineup count method
|
||||
expected_xi = 11
|
||||
actual_home_xi = features.get("home_starting_11", 11)
|
||||
actual_away_xi = features.get("away_starting_11", 11)
|
||||
home_missing = (expected_xi - actual_home_xi) / expected_xi if actual_home_xi < expected_xi else 0
|
||||
away_missing = (expected_xi - actual_away_xi) / expected_xi if actual_away_xi < expected_xi else 0
|
||||
sidelined_available = False
|
||||
|
||||
# Confidence: more data sources = higher confidence
|
||||
|
||||
# Player-level features (matches extract_training_data.py:594-650)
|
||||
player_feats = self._compute_player_level_features(
|
||||
home_lineup or [], away_lineup or [],
|
||||
home_team_id, away_team_id,
|
||||
home_analysis, away_analysis,
|
||||
)
|
||||
|
||||
confidence = 70.0 if lineup_available else 35.0
|
||||
if home_goals + away_goals > 10:
|
||||
confidence += 15
|
||||
@@ -191,7 +171,7 @@ class PlayerPredictorEngine:
|
||||
confidence += self.sidelined_analyzer.config.get("sidelined.confidence_boost", 10)
|
||||
if not lineup_available:
|
||||
confidence -= 5.0
|
||||
|
||||
|
||||
return PlayerPrediction(
|
||||
home_squad_quality=home_quality,
|
||||
away_squad_quality=away_quality,
|
||||
@@ -202,9 +182,137 @@ class PlayerPredictorEngine:
|
||||
away_missing_impact=away_missing,
|
||||
home_goals_form=home_goals,
|
||||
away_goals_form=away_goals,
|
||||
home_lineup_goals_per90=player_feats['home_lineup_goals_per90'],
|
||||
away_lineup_goals_per90=player_feats['away_lineup_goals_per90'],
|
||||
home_lineup_assists_per90=player_feats['home_lineup_assists_per90'],
|
||||
away_lineup_assists_per90=player_feats['away_lineup_assists_per90'],
|
||||
home_squad_continuity=player_feats['home_squad_continuity'],
|
||||
away_squad_continuity=player_feats['away_squad_continuity'],
|
||||
home_top_scorer_form=player_feats['home_top_scorer_form'],
|
||||
away_top_scorer_form=player_feats['away_top_scorer_form'],
|
||||
home_avg_player_exp=player_feats['home_avg_player_exp'],
|
||||
away_avg_player_exp=player_feats['away_avg_player_exp'],
|
||||
home_goals_diversity=player_feats['home_goals_diversity'],
|
||||
away_goals_diversity=player_feats['away_goals_diversity'],
|
||||
lineup_available=lineup_available,
|
||||
confidence=max(5.0, confidence)
|
||||
)
|
||||
|
||||
def _compute_player_level_features(
|
||||
self,
|
||||
home_lineup: List[str],
|
||||
away_lineup: List[str],
|
||||
home_team_id: str,
|
||||
away_team_id: str,
|
||||
home_analysis,
|
||||
away_analysis,
|
||||
) -> Dict[str, float]:
|
||||
defaults = {
|
||||
'home_lineup_goals_per90': 0.0, 'away_lineup_goals_per90': 0.0,
|
||||
'home_lineup_assists_per90': 0.0, 'away_lineup_assists_per90': 0.0,
|
||||
'home_squad_continuity': 0.5, 'away_squad_continuity': 0.5,
|
||||
'home_top_scorer_form': 0, 'away_top_scorer_form': 0,
|
||||
'home_avg_player_exp': 0.0, 'away_avg_player_exp': 0.0,
|
||||
'home_goals_diversity': 0.0, 'away_goals_diversity': 0.0,
|
||||
}
|
||||
conn = self.squad_engine.get_conn()
|
||||
if conn is None:
|
||||
return defaults
|
||||
|
||||
try:
|
||||
from psycopg2.extras import RealDictCursor
|
||||
result = {}
|
||||
for prefix, lineup, team_id in [
|
||||
('home', home_lineup, home_team_id),
|
||||
('away', away_lineup, away_team_id),
|
||||
]:
|
||||
if not lineup:
|
||||
for k in ('lineup_goals_per90', 'lineup_assists_per90',
|
||||
'squad_continuity', 'top_scorer_form',
|
||||
'avg_player_exp', 'goals_diversity'):
|
||||
result[f'{prefix}_{k}'] = defaults[f'{prefix}_{k}']
|
||||
continue
|
||||
|
||||
g90, a90, total_exp = 0.0, 0.0, 0
|
||||
best_scorer_total, best_scorer_id = 0, None
|
||||
scorers_in_lineup = 0
|
||||
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
for pid in lineup:
|
||||
cur.execute("""
|
||||
SELECT
|
||||
COUNT(*) as starts,
|
||||
COALESCE(SUM(CASE WHEN e.event_type = 'goal'
|
||||
AND (e.event_subtype IS NULL OR e.event_subtype NOT ILIKE '%%penaltı kaçırma%%')
|
||||
THEN 1 ELSE 0 END), 0) as goals,
|
||||
COALESCE((SELECT COUNT(*) FROM match_player_events
|
||||
WHERE assist_player_id = %s), 0) as assists
|
||||
FROM match_player_participation mpp
|
||||
LEFT JOIN match_player_events e
|
||||
ON e.match_id = mpp.match_id AND e.player_id = mpp.player_id
|
||||
WHERE mpp.player_id = %s AND mpp.is_starting = true
|
||||
""", (pid, pid))
|
||||
row = cur.fetchone()
|
||||
if not row or not row['starts']:
|
||||
continue
|
||||
starts = row['starts']
|
||||
goals = row['goals'] or 0
|
||||
assists = row['assists'] or 0
|
||||
g90 += goals / starts
|
||||
a90 += assists / starts
|
||||
total_exp += starts
|
||||
if goals > 0:
|
||||
scorers_in_lineup += 1
|
||||
if goals > best_scorer_total:
|
||||
best_scorer_total = goals
|
||||
best_scorer_id = pid
|
||||
|
||||
n_st = len(lineup) or 1
|
||||
|
||||
# Top scorer recent form (goals in last 5 starts)
|
||||
top_scorer_form = 0
|
||||
if best_scorer_id:
|
||||
cur.execute("""
|
||||
SELECT COUNT(*) as goals
|
||||
FROM match_player_events mpe
|
||||
WHERE mpe.player_id = %s AND mpe.event_type = 'goal'
|
||||
AND mpe.match_id IN (
|
||||
SELECT match_id FROM match_player_participation
|
||||
WHERE player_id = %s AND is_starting = true
|
||||
ORDER BY match_id DESC LIMIT 5
|
||||
)
|
||||
""", (best_scorer_id, best_scorer_id))
|
||||
tsf_row = cur.fetchone()
|
||||
if tsf_row:
|
||||
top_scorer_form = tsf_row['goals'] or 0
|
||||
|
||||
# Squad continuity (overlap with previous match lineup)
|
||||
squad_continuity = 0.5
|
||||
cur.execute("""
|
||||
SELECT mpp.player_id
|
||||
FROM match_player_participation mpp
|
||||
JOIN matches m ON mpp.match_id = m.id
|
||||
WHERE mpp.team_id = %s AND mpp.is_starting = true
|
||||
AND m.status = 'FT'
|
||||
ORDER BY m.mst_utc DESC
|
||||
LIMIT 11
|
||||
""", (team_id,))
|
||||
prev_starters = {r['player_id'] for r in cur.fetchall()}
|
||||
if prev_starters:
|
||||
overlap = len(set(lineup) & prev_starters)
|
||||
squad_continuity = overlap / n_st
|
||||
|
||||
result[f'{prefix}_lineup_goals_per90'] = round(g90, 3)
|
||||
result[f'{prefix}_lineup_assists_per90'] = round(a90, 3)
|
||||
result[f'{prefix}_squad_continuity'] = round(squad_continuity, 3)
|
||||
result[f'{prefix}_top_scorer_form'] = top_scorer_form
|
||||
result[f'{prefix}_avg_player_exp'] = round(total_exp / n_st, 1)
|
||||
result[f'{prefix}_goals_diversity'] = round(scorers_in_lineup / n_st, 3)
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
print(f"[PlayerPredictor] Player-level features failed: {e}")
|
||||
return defaults
|
||||
|
||||
def get_1x2_modifier(self, prediction: PlayerPrediction) -> Dict[str, float]:
|
||||
"""
|
||||
|
||||
@@ -1,188 +0,0 @@
|
||||
"""
|
||||
Referee Predictor Engine - V20 Ensemble Component
|
||||
Analyzes referee patterns for cards, goals, and home bias.
|
||||
|
||||
Weight: 15% in ensemble
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from typing import Dict, Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from features.referee_engine import get_referee_engine
|
||||
|
||||
|
||||
@dataclass
|
||||
class RefereePrediction:
|
||||
"""Referee engine prediction output."""
|
||||
referee_name: str = ""
|
||||
matches_officiated: int = 0
|
||||
|
||||
# Card tendencies
|
||||
avg_yellow_cards: float = 4.0
|
||||
avg_red_cards: float = 0.2
|
||||
is_card_heavy: bool = False # Above average cards
|
||||
|
||||
# Goal tendencies
|
||||
avg_goals_per_match: float = 2.5
|
||||
over_25_rate: float = 0.50
|
||||
is_high_scoring: bool = False # Above average goals
|
||||
|
||||
# Home bias
|
||||
home_win_rate: float = 0.45
|
||||
home_bias: float = 0.0 # -1 to +1, positive = favors home
|
||||
|
||||
# Penalty tendency
|
||||
penalty_rate: float = 0.15
|
||||
|
||||
confidence: float = 0.0
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"referee_name": self.referee_name,
|
||||
"matches_officiated": self.matches_officiated,
|
||||
"avg_yellow_cards": round(self.avg_yellow_cards, 1),
|
||||
"avg_red_cards": round(self.avg_red_cards, 2),
|
||||
"is_card_heavy": self.is_card_heavy,
|
||||
"avg_goals_per_match": round(self.avg_goals_per_match, 2),
|
||||
"over_25_rate": round(self.over_25_rate * 100, 1),
|
||||
"is_high_scoring": self.is_high_scoring,
|
||||
"home_win_rate": round(self.home_win_rate * 100, 1),
|
||||
"home_bias": round(self.home_bias, 2),
|
||||
"penalty_rate": round(self.penalty_rate * 100, 1),
|
||||
"confidence": round(self.confidence, 1)
|
||||
}
|
||||
|
||||
|
||||
class RefereePredictorEngine:
|
||||
"""
|
||||
Referee-based prediction engine.
|
||||
|
||||
Analyzes:
|
||||
- Card tendency (sarı/kırmızı kart ortalaması)
|
||||
- Goal tendency (maç başına gol, 2.5 üst oranı)
|
||||
- Home bias (ev sahibi lehine karar oranı)
|
||||
- Penalty tendency (penaltı verme oranı)
|
||||
"""
|
||||
|
||||
# League average benchmarks
|
||||
LEAGUE_AVG_GOALS = 2.65
|
||||
LEAGUE_AVG_YELLOW = 4.0
|
||||
LEAGUE_HOME_WIN_RATE = 0.45
|
||||
|
||||
def __init__(self):
|
||||
self.referee_engine = get_referee_engine()
|
||||
print("✅ RefereePredictorEngine initialized")
|
||||
|
||||
def predict(self,
|
||||
match_id: Optional[str] = None,
|
||||
referee_name: Optional[str] = None,
|
||||
league_id: Optional[str] = None) -> RefereePrediction:
|
||||
"""
|
||||
Generate referee-based prediction.
|
||||
|
||||
Args:
|
||||
match_id: Match ID to find referee
|
||||
referee_name: Or provide referee name directly
|
||||
league_id: League ID to scope stats (prevents name collisions)
|
||||
|
||||
Returns:
|
||||
RefereePrediction with referee analysis
|
||||
"""
|
||||
|
||||
# Get referee features
|
||||
if match_id:
|
||||
features = self.referee_engine.get_features(match_id, league_id=league_id or "")
|
||||
# Live flows may already have referee_name while match_officials table is sparse.
|
||||
# Prefer the richer profile if direct-name lookup has more history.
|
||||
if referee_name:
|
||||
name_features = self.referee_engine.get_features_by_name(referee_name, league_id=league_id or "")
|
||||
if (name_features.get("referee_matches", 0) or 0) > (features.get("referee_matches", 0) or 0):
|
||||
features = name_features
|
||||
elif referee_name:
|
||||
features = self.referee_engine.get_features_by_name(referee_name, league_id=league_id or "")
|
||||
else:
|
||||
# Return default
|
||||
return RefereePrediction(confidence=10.0)
|
||||
|
||||
ref_name = str(features.get("referee_name", "Unknown"))
|
||||
matches = int(features.get("referee_matches", 0))
|
||||
|
||||
if matches < 5:
|
||||
# Not enough data
|
||||
return RefereePrediction(
|
||||
referee_name=ref_name,
|
||||
matches_officiated=matches,
|
||||
confidence=20.0
|
||||
)
|
||||
|
||||
# Extract features
|
||||
avg_yellow = features.get("referee_avg_yellow", 4.0)
|
||||
avg_red = features.get("referee_avg_red", 0.2)
|
||||
avg_goals = features.get("referee_avg_goals", 2.5)
|
||||
over25_rate = features.get("referee_over25_rate", 0.5)
|
||||
home_win_rate = features.get("referee_home_win_rate", 0.45) if "referee_home_win_rate" in features else 0.45
|
||||
home_bias = features.get("referee_home_bias", 0.0)
|
||||
penalty_rate = features.get("referee_penalty_rate", 0.15)
|
||||
|
||||
# Determine tendencies
|
||||
is_card_heavy = (avg_yellow + avg_red * 4) > (self.LEAGUE_AVG_YELLOW + 1)
|
||||
is_high_scoring = avg_goals > self.LEAGUE_AVG_GOALS
|
||||
|
||||
# Confidence based on matches officiated
|
||||
confidence = min(90.0, 30.0 + matches * 2)
|
||||
|
||||
return RefereePrediction(
|
||||
referee_name=ref_name,
|
||||
matches_officiated=matches,
|
||||
avg_yellow_cards=avg_yellow,
|
||||
avg_red_cards=avg_red,
|
||||
is_card_heavy=is_card_heavy,
|
||||
avg_goals_per_match=avg_goals,
|
||||
over_25_rate=over25_rate,
|
||||
is_high_scoring=is_high_scoring,
|
||||
home_win_rate=home_win_rate,
|
||||
home_bias=home_bias,
|
||||
penalty_rate=penalty_rate,
|
||||
confidence=confidence
|
||||
)
|
||||
|
||||
def get_modifiers(self, prediction: RefereePrediction) -> Dict[str, float]:
|
||||
"""
|
||||
Get modifiers to apply to other predictions based on referee profile.
|
||||
"""
|
||||
return {
|
||||
# Home team gets slight boost if referee has home bias
|
||||
"home_modifier": 1.0 + (prediction.home_bias * 0.05),
|
||||
# O/U modifier
|
||||
"over_25_modifier": 1.0 + (prediction.avg_goals_per_match - self.LEAGUE_AVG_GOALS) * 0.1,
|
||||
# Card modifier for card markets
|
||||
"cards_modifier": 1.0 + (prediction.avg_yellow_cards - self.LEAGUE_AVG_YELLOW) * 0.05
|
||||
}
|
||||
|
||||
|
||||
# Singleton
|
||||
_engine: Optional[RefereePredictorEngine] = None
|
||||
|
||||
|
||||
def get_referee_predictor() -> RefereePredictorEngine:
|
||||
global _engine
|
||||
if _engine is None:
|
||||
_engine = RefereePredictorEngine()
|
||||
return _engine
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
engine = get_referee_predictor()
|
||||
|
||||
print("\n🧪 Referee Predictor Engine Test")
|
||||
print("=" * 50)
|
||||
|
||||
pred = engine.predict(referee_name="Cüneyt Çakır")
|
||||
|
||||
print(f"\n📊 Prediction:")
|
||||
for k, v in pred.to_dict().items():
|
||||
print(f" {k}: {v}")
|
||||
@@ -1,286 +0,0 @@
|
||||
"""
|
||||
Team Predictor Engine - V20 Ensemble Component
|
||||
Combines ELO ratings, form stats, H2H records and team statistics.
|
||||
|
||||
Weight: 30% in ensemble
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from typing import Dict, Optional, Tuple, Any
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
# Add parent to path
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from features.elo_system import get_elo_system
|
||||
from features.h2h_engine import get_h2h_engine
|
||||
from features.momentum_engine import get_momentum_engine, MomentumData
|
||||
from features.team_stats_engine import get_team_stats_engine
|
||||
|
||||
|
||||
@dataclass
|
||||
class TeamPrediction:
|
||||
"""Team engine prediction output."""
|
||||
home_win_prob: float = 0.33
|
||||
draw_prob: float = 0.33
|
||||
away_win_prob: float = 0.33
|
||||
home_xg: float = 1.3
|
||||
away_xg: float = 1.1
|
||||
form_advantage: float = 0.0 # -1 to +1, positive = home advantage
|
||||
h2h_advantage: float = 0.0 # -1 to +1
|
||||
elo_diff: float = 0.0
|
||||
confidence: float = 0.0
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"home_win_prob": round(self.home_win_prob * 100, 1),
|
||||
"draw_prob": round(self.draw_prob * 100, 1),
|
||||
"away_win_prob": round(self.away_win_prob * 100, 1),
|
||||
"home_xg": round(self.home_xg, 2),
|
||||
"away_xg": round(self.away_xg, 2),
|
||||
"form_advantage": round(self.form_advantage, 2),
|
||||
"h2h_advantage": round(self.h2h_advantage, 2),
|
||||
"elo_diff": round(self.elo_diff, 0),
|
||||
"confidence": round(self.confidence, 1)
|
||||
}
|
||||
|
||||
raw_features: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
class TeamPredictorEngine:
|
||||
"""
|
||||
Team-based prediction engine.
|
||||
|
||||
Uses:
|
||||
- ELO Rating System (venue-adjusted, league-weighted)
|
||||
- H2H Engine (head-to-head history)
|
||||
- Momentum Engine (recent form)
|
||||
- Team Stats Engine (possession, shots, corners)
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.elo_system = get_elo_system()
|
||||
self.h2h_engine = get_h2h_engine()
|
||||
self.momentum_engine = get_momentum_engine()
|
||||
self.team_stats_engine = get_team_stats_engine()
|
||||
|
||||
print("✅ TeamPredictorEngine initialized")
|
||||
|
||||
def predict(self,
|
||||
home_team_id: str,
|
||||
away_team_id: str,
|
||||
match_date_ms: int,
|
||||
home_team_name: str = "",
|
||||
away_team_name: str = "") -> TeamPrediction:
|
||||
"""
|
||||
Generate team-based prediction.
|
||||
|
||||
Args:
|
||||
home_team_id: Home team ID
|
||||
away_team_id: Away team ID
|
||||
match_date_ms: Match date in milliseconds
|
||||
home_team_name: Home team name (for ELO)
|
||||
away_team_name: Away team name (for ELO)
|
||||
|
||||
Returns:
|
||||
TeamPrediction with 1X2 probabilities and xG
|
||||
"""
|
||||
|
||||
# 1. Get ELO predictions
|
||||
elo_pred = self.elo_system.predict_match(home_team_id, away_team_id)
|
||||
elo_features = self.elo_system.get_match_features(home_team_id, away_team_id)
|
||||
|
||||
# 2. Get H2H features
|
||||
try:
|
||||
h2h_features = self.h2h_engine.get_features(
|
||||
home_team_id, away_team_id, match_date_ms
|
||||
)
|
||||
except Exception:
|
||||
h2h_features = {
|
||||
"h2h_home_win_rate": 0.5,
|
||||
"h2h_away_win_rate": 0.5,
|
||||
"h2h_avg_goals": 2.5,
|
||||
"h2h_btts_rate": 0.5
|
||||
}
|
||||
|
||||
# 3. Get Momentum/Form features
|
||||
try:
|
||||
# key: form_score should be 0-1 derived from momentum_score (-1 to 1)
|
||||
home_mom_data = self.momentum_engine.calculate_momentum(home_team_id, match_date_ms)
|
||||
away_mom_data = self.momentum_engine.calculate_momentum(away_team_id, match_date_ms)
|
||||
|
||||
home_form_score = (home_mom_data.momentum_score + 1) / 2
|
||||
away_form_score = (away_mom_data.momentum_score + 1) / 2
|
||||
except Exception as e:
|
||||
print(f"⚠️ MomentumEngine error: {e}")
|
||||
home_mom_data = MomentumData()
|
||||
away_mom_data = MomentumData()
|
||||
home_form_score = 0.5
|
||||
away_form_score = 0.5
|
||||
|
||||
# 4. Get Team Stats
|
||||
home_stats = self.team_stats_engine.get_features(home_team_id, match_date_ms)
|
||||
away_stats = self.team_stats_engine.get_features(away_team_id, match_date_ms)
|
||||
|
||||
# 5. Combine predictions
|
||||
# ELO-based 1X2 (60% weight)
|
||||
elo_home = elo_pred.get("home_win_prob", 0.33)
|
||||
elo_draw = elo_pred.get("draw_prob", 0.33)
|
||||
elo_away = elo_pred.get("away_win_prob", 0.33)
|
||||
|
||||
# Adjust based on H2H (20% weight)
|
||||
h2h_home_rate = h2h_features.get("h2h_home_win_rate", 0.5)
|
||||
h2h_away_rate = h2h_features.get("h2h_away_win_rate", 0.5)
|
||||
|
||||
# Adjust based on form (20% weight)
|
||||
home_form = home_form_score
|
||||
away_form = away_form_score
|
||||
form_diff = (home_form - away_form) # -1 to +1
|
||||
|
||||
# Weighted combination
|
||||
final_home = elo_home * 0.6 + h2h_home_rate * 0.2 + (0.5 + form_diff * 0.3) * 0.2
|
||||
final_away = elo_away * 0.6 + h2h_away_rate * 0.2 + (0.5 - form_diff * 0.3) * 0.2
|
||||
final_draw = 1.0 - final_home - final_away
|
||||
|
||||
# Normalize
|
||||
total = final_home + final_draw + final_away
|
||||
if total > 0:
|
||||
final_home /= total
|
||||
final_draw /= total
|
||||
final_away /= total
|
||||
|
||||
# Calculate xG based on stats and form (conservative base)
|
||||
home_conversion = home_stats.get("shot_conversion_rate", 0.1)
|
||||
away_conversion = away_stats.get("shot_conversion_rate", 0.1)
|
||||
|
||||
base_home_xg = 1.35 + (home_conversion * 3.0)
|
||||
base_away_xg = 1.10 + (away_conversion * 2.5)
|
||||
|
||||
# Defense weakness factor: opponent's defensive quality affects xG
|
||||
# Higher shots on target against = weaker defense
|
||||
away_def_weakness = away_stats.get("shot_accuracy", 0.35) # opponent's shot accuracy as proxy
|
||||
home_def_weakness = home_stats.get("shot_accuracy", 0.35)
|
||||
|
||||
# Adjust xG: stronger opponent defense → lower xG
|
||||
home_xg = base_home_xg * (1 + form_diff * 0.15) * (0.8 + away_def_weakness * 0.6)
|
||||
away_xg = base_away_xg * (1 - form_diff * 0.15) * (0.8 + home_def_weakness * 0.6)
|
||||
|
||||
# Apply xG Underperformance Penalty directly to calculated xG
|
||||
# If a team chronically underperforms its xG, we subtract that historical difference here
|
||||
if hasattr(home_mom_data, 'xg_underperformance') and home_mom_data.xg_underperformance > 0.2:
|
||||
home_xg -= min(0.5, home_mom_data.xg_underperformance * 0.5)
|
||||
|
||||
if hasattr(away_mom_data, 'xg_underperformance') and away_mom_data.xg_underperformance > 0.2:
|
||||
away_xg -= min(0.5, away_mom_data.xg_underperformance * 0.5)
|
||||
|
||||
# H2H adjustment (more conservative)
|
||||
h2h_avg_goals = h2h_features.get("h2h_avg_goals", 2.5)
|
||||
if h2h_avg_goals > 3.0:
|
||||
home_xg *= 1.05
|
||||
away_xg *= 1.05
|
||||
elif h2h_avg_goals < 2.0:
|
||||
home_xg *= 0.95
|
||||
away_xg *= 0.95
|
||||
|
||||
# Clamp xG to reasonable range
|
||||
home_xg = max(0.5, min(3.5, home_xg))
|
||||
away_xg = max(0.3, min(3.0, away_xg))
|
||||
|
||||
# Calculate confidence
|
||||
# Higher when ELO, H2H, and Form all agree
|
||||
elo_winner = "H" if elo_home > max(elo_draw, elo_away) else ("A" if elo_away > elo_draw else "D")
|
||||
h2h_winner = "H" if h2h_home_rate > h2h_away_rate else "A"
|
||||
form_winner = "H" if form_diff > 0.1 else ("A" if form_diff < -0.1 else "D")
|
||||
|
||||
agreement = sum([
|
||||
elo_winner == h2h_winner,
|
||||
elo_winner == form_winner,
|
||||
h2h_winner == form_winner
|
||||
])
|
||||
|
||||
max_prob = max(final_home, final_draw, final_away)
|
||||
confidence = max_prob * 100 * (0.7 + agreement * 0.1)
|
||||
|
||||
# Collect Raw Features for XGBoost
|
||||
# Note: home_mom_data is an object now
|
||||
def get_rate(val): return val if val is not None else 0.5
|
||||
|
||||
raw_features = {
|
||||
**elo_features, # 8 features
|
||||
|
||||
# Form Features (need key mapping to match extract_training_data.py)
|
||||
"home_goals_avg": 1.5 + home_mom_data.goals_trend, # Proxy
|
||||
"home_conceded_avg": 1.5 - home_mom_data.conceded_trend, # Proxy
|
||||
"away_goals_avg": 1.5 + away_mom_data.goals_trend,
|
||||
"away_conceded_avg": 1.5 - away_mom_data.conceded_trend,
|
||||
|
||||
"home_clean_sheet_rate": 0.2, # Not in new MomentumData
|
||||
"away_clean_sheet_rate": 0.2,
|
||||
"home_scoring_rate": 0.8,
|
||||
"away_scoring_rate": 0.8,
|
||||
|
||||
"home_winning_streak": home_mom_data.winning_streak,
|
||||
"away_winning_streak": away_mom_data.winning_streak,
|
||||
"home_unbeaten_streak": home_mom_data.unbeaten_streak,
|
||||
"away_unbeaten_streak": away_mom_data.unbeaten_streak,
|
||||
|
||||
# H2H Features
|
||||
**h2h_features,
|
||||
|
||||
# Team Stats
|
||||
"home_avg_possession": home_stats.get("avg_possession", 0.5),
|
||||
"away_avg_possession": away_stats.get("avg_possession", 0.5),
|
||||
"home_avg_shots_on_target": home_stats.get("avg_shots_on_target", 3.5),
|
||||
"away_avg_shots_on_target": away_stats.get("avg_shots_on_target", 3.5),
|
||||
"home_shot_conversion": home_stats.get("shot_conversion_rate", 0.1),
|
||||
"away_shot_conversion": away_stats.get("shot_conversion_rate", 0.1),
|
||||
"home_avg_corners": home_stats.get("avg_corners", 4.5),
|
||||
"away_avg_corners": away_stats.get("avg_corners", 4.5),
|
||||
|
||||
# Derived
|
||||
"home_xga": 1.5 - home_mom_data.conceded_trend, # reusing as proxy
|
||||
"away_xga": 1.5 - away_mom_data.conceded_trend
|
||||
}
|
||||
|
||||
return TeamPrediction(
|
||||
home_win_prob=final_home,
|
||||
draw_prob=final_draw,
|
||||
away_win_prob=final_away,
|
||||
home_xg=home_xg,
|
||||
away_xg=away_xg,
|
||||
form_advantage=form_diff,
|
||||
h2h_advantage=h2h_home_rate - h2h_away_rate,
|
||||
elo_diff=elo_features.get("elo_diff", 0),
|
||||
confidence=confidence,
|
||||
raw_features=raw_features
|
||||
)
|
||||
|
||||
|
||||
# Singleton
|
||||
_engine: Optional[TeamPredictorEngine] = None
|
||||
|
||||
|
||||
def get_team_predictor() -> TeamPredictorEngine:
|
||||
global _engine
|
||||
if _engine is None:
|
||||
_engine = TeamPredictorEngine()
|
||||
return _engine
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
engine = get_team_predictor()
|
||||
|
||||
print("\n🧪 Team Predictor Engine Test")
|
||||
print("=" * 50)
|
||||
|
||||
# Test with sample IDs
|
||||
pred = engine.predict(
|
||||
home_team_id="test_home",
|
||||
away_team_id="test_away",
|
||||
match_date_ms=1707393600000
|
||||
)
|
||||
|
||||
print(f"\n📊 Prediction:")
|
||||
for k, v in pred.to_dict().items():
|
||||
print(f" {k}: {v}")
|
||||
@@ -15,13 +15,9 @@ Orijinal Faktörler:
|
||||
- Tarihsel upset pattern
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from typing import Dict, Any, Optional, Tuple, List
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
try:
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
+46
-18
@@ -21,6 +21,7 @@ except ImportError:
|
||||
HAS_BASKETBALL = False
|
||||
from services.single_match_orchestrator import get_single_match_orchestrator
|
||||
from services.v26_shadow_engine import get_v26_shadow_engine
|
||||
from models.league_model import get_league_model_loader
|
||||
|
||||
load_dotenv()
|
||||
|
||||
@@ -123,7 +124,15 @@ def health_check() -> dict[str, Any]:
|
||||
try:
|
||||
orchestrator = get_single_match_orchestrator()
|
||||
shadow_engine = get_v26_shadow_engine()
|
||||
|
||||
|
||||
# Per-market V25 model status
|
||||
v25_readiness: dict[str, Any] = {"fully_loaded": False}
|
||||
try:
|
||||
v25_predictor = orchestrator._get_v25_predictor()
|
||||
v25_readiness = v25_predictor.readiness_summary()
|
||||
except Exception as v25_err:
|
||||
v25_readiness = {"fully_loaded": False, "error": str(v25_err)}
|
||||
|
||||
if HAS_BASKETBALL:
|
||||
basketball_predictor = get_basketball_v25_predictor()
|
||||
basketball_readiness = basketball_predictor.readiness_summary()
|
||||
@@ -131,35 +140,52 @@ def health_check() -> dict[str, Any]:
|
||||
else:
|
||||
basketball_readiness = {"fully_loaded": False, "error": "Basketball module not found"}
|
||||
ready = True
|
||||
|
||||
|
||||
league_readiness = get_league_model_loader().readiness_summary()
|
||||
overall_ready = ready and v25_readiness.get("fully_loaded", False)
|
||||
return {
|
||||
"status": "healthy" if ready else "degraded",
|
||||
"status": "healthy" if overall_ready else "degraded",
|
||||
"engine": "v28.main",
|
||||
"mode": os.getenv("AI_ENGINE_MODE", "v28"),
|
||||
"ready": ready,
|
||||
"ready": overall_ready,
|
||||
"v25_football": v25_readiness,
|
||||
"league_specific": league_readiness,
|
||||
"basketball_v25": basketball_readiness,
|
||||
"v26_shadow": shadow_engine.readiness_summary(),
|
||||
"prediction_service_ready": True,
|
||||
"model_loaded": ready,
|
||||
"model_loaded": overall_ready,
|
||||
"orchestrator_mode": getattr(orchestrator, "engine_mode", "v28"),
|
||||
}
|
||||
except Exception as error:
|
||||
return {"status": "unhealthy", "ready": False, "error": str(error)}
|
||||
|
||||
|
||||
_REQUIRED_RESPONSE_FIELDS = ("match_info", "market_board", "main_pick", "bet_summary", "data_quality")
|
||||
|
||||
|
||||
@app.post("/v20plus/analyze/{match_id}")
|
||||
async def analyze_match_v20plus(match_id: str) -> dict[str, Any]:
|
||||
started_at = time.time()
|
||||
orchestrator = get_single_match_orchestrator()
|
||||
result = orchestrator.analyze_match(match_id)
|
||||
result = await asyncio.to_thread(orchestrator.analyze_match, match_id)
|
||||
elapsed_ms = int((time.time() - started_at) * 1000)
|
||||
|
||||
if not result:
|
||||
raise HTTPException(status_code=404, detail=f"Match not found: {match_id}")
|
||||
|
||||
# Response validation: log missing required fields (non-fatal)
|
||||
missing_fields = [f for f in _REQUIRED_RESPONSE_FIELDS if f not in result]
|
||||
if missing_fields:
|
||||
print(f"⚠️ [API] analyze/{match_id} response missing fields: {missing_fields} ({elapsed_ms}ms)")
|
||||
|
||||
result["timing_ms"] = elapsed_ms
|
||||
return result
|
||||
|
||||
|
||||
@app.get("/v20plus/analyze-htms/{match_id}")
|
||||
async def analyze_match_htms_v20plus(match_id: str) -> dict[str, Any]:
|
||||
orchestrator = get_single_match_orchestrator()
|
||||
result = orchestrator.analyze_match_htms(match_id)
|
||||
result = await asyncio.to_thread(orchestrator.analyze_match_htms, match_id)
|
||||
if not result:
|
||||
raise HTTPException(status_code=404, detail=f"Match not found: {match_id}")
|
||||
return result
|
||||
@@ -230,11 +256,12 @@ async def analyze_match_htft_v20plus(match_id: str, timeout_sec: int = 30) -> di
|
||||
@app.post("/v20plus/coupon")
|
||||
async def generate_coupon_v20plus(request: CouponRequest) -> dict[str, Any]:
|
||||
orchestrator = get_single_match_orchestrator()
|
||||
return orchestrator.build_coupon(
|
||||
match_ids=request.match_ids,
|
||||
strategy=request.strategy or "BALANCED",
|
||||
max_matches=request.max_matches,
|
||||
min_confidence=request.min_confidence,
|
||||
return await asyncio.to_thread(
|
||||
orchestrator.build_coupon,
|
||||
request.match_ids,
|
||||
request.strategy or "BALANCED",
|
||||
request.max_matches,
|
||||
request.min_confidence,
|
||||
)
|
||||
|
||||
|
||||
@@ -244,7 +271,7 @@ async def get_daily_banker_v20plus(count: int = 3) -> dict[str, Any]:
|
||||
raise HTTPException(status_code=400, detail="count must be >= 1")
|
||||
|
||||
orchestrator = get_single_match_orchestrator()
|
||||
bankers = orchestrator.get_daily_bankers(count=count)
|
||||
bankers = await asyncio.to_thread(orchestrator.get_daily_bankers, count)
|
||||
return {"count": len(bankers), "bankers": bankers}
|
||||
|
||||
@app.get("/v20plus/reversal-watchlist")
|
||||
@@ -262,11 +289,12 @@ async def get_reversal_watchlist_v20plus(
|
||||
raise HTTPException(status_code=400, detail="min_score must be between 0 and 100")
|
||||
|
||||
orchestrator = get_single_match_orchestrator()
|
||||
return orchestrator.get_reversal_watchlist(
|
||||
count=count,
|
||||
horizon_hours=horizon_hours,
|
||||
min_score=min_score,
|
||||
top_leagues_only=top_leagues_only,
|
||||
return await asyncio.to_thread(
|
||||
orchestrator.get_reversal_watchlist,
|
||||
count,
|
||||
horizon_hours,
|
||||
min_score,
|
||||
top_leagues_only,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -46,6 +46,9 @@ SUPPORTED_MARKETS = [
|
||||
"ht_ft", # Half-Time/Full-Time
|
||||
"dc", # Double Chance
|
||||
"ht", # Half-Time Result
|
||||
"ht_home", # Half-Time Home win
|
||||
"ht_draw", # Half-Time Draw
|
||||
"ht_away", # Half-Time Away win
|
||||
]
|
||||
|
||||
|
||||
@@ -111,6 +114,9 @@ class Calibrator:
|
||||
"ht_ft": 0.92,
|
||||
"dc": 0.97,
|
||||
"ht": 0.92,
|
||||
"ht_home": 0.92,
|
||||
"ht_draw": 0.92,
|
||||
"ht_away": 0.92,
|
||||
}
|
||||
self._load_calibrators()
|
||||
|
||||
|
||||
@@ -0,0 +1,191 @@
|
||||
"""
|
||||
League-Specific Model Loader
|
||||
=============================
|
||||
Loads per-league XGBoost models + isotonic calibrators trained by
|
||||
scripts/train_league_models.py and provides a unified prediction interface.
|
||||
|
||||
Falls back to general V25 for any market/league without a dedicated model.
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import pickle
|
||||
from functools import lru_cache
|
||||
from typing import Dict, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import xgboost as xgb
|
||||
|
||||
AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
LEAGUE_MODEL_DIR = os.path.join(AI_ENGINE_DIR, "models", "league_specific")
|
||||
|
||||
# Market file name → (num_class, label_list)
|
||||
MARKET_META: Dict[str, Tuple[int, list]] = {
|
||||
"ms": (3, ["1", "X", "2"]),
|
||||
"ou15": (2, ["Over", "Under"]),
|
||||
"ou25": (2, ["Over", "Under"]),
|
||||
"ou35": (2, ["Over", "Under"]),
|
||||
"btts": (2, ["Yes", "No"]),
|
||||
"ht": (3, ["1", "X", "2"]),
|
||||
"ht_ou05": (2, ["Over", "Under"]),
|
||||
"ht_ou15": (2, ["Over", "Under"]),
|
||||
"htft": (9, ["1/1","1/X","1/2","X/1","X/X","X/2","2/1","2/X","2/2"]),
|
||||
"oe": (2, ["Odd", "Even"]),
|
||||
"cards": (2, ["Over", "Under"]),
|
||||
"handicap": (3, ["1", "X", "2"]),
|
||||
}
|
||||
|
||||
# Signal key map (file key → uppercase signal key used in _get_v25_signal)
|
||||
FILE_TO_SIGNAL = {
|
||||
"ms": "MS", "ou15": "OU15", "ou25": "OU25", "ou35": "OU35",
|
||||
"btts": "BTTS", "ht": "HT", "ht_ou05": "HT_OU05", "ht_ou15": "HT_OU15",
|
||||
"htft": "HTFT", "oe": "OE", "cards": "CARDS", "handicap": "HCAP",
|
||||
}
|
||||
|
||||
|
||||
class LeagueModel:
|
||||
"""Holds XGBoost models + isotonic calibrators for one league."""
|
||||
|
||||
def __init__(self, league_id: str):
|
||||
self.league_id = league_id
|
||||
self.league_dir = os.path.join(LEAGUE_MODEL_DIR, league_id)
|
||||
self.models: Dict[str, xgb.Booster] = {} # market_key → booster
|
||||
self.calibrators: Dict[str, object] = {} # cal_key → isotonic
|
||||
self.feature_cols: Optional[list] = None
|
||||
self._loaded = False
|
||||
|
||||
def load(self) -> bool:
|
||||
if not os.path.isdir(self.league_dir):
|
||||
return False
|
||||
try:
|
||||
fc_path = os.path.join(self.league_dir, "feature_cols.json")
|
||||
if os.path.exists(fc_path):
|
||||
with open(fc_path) as f:
|
||||
self.feature_cols = json.load(f)
|
||||
|
||||
for mkey in MARKET_META:
|
||||
xgb_path = os.path.join(self.league_dir, f"xgb_{mkey}.json")
|
||||
if os.path.exists(xgb_path) and os.path.getsize(xgb_path) > 100:
|
||||
b = xgb.Booster()
|
||||
b.load_model(xgb_path)
|
||||
self.models[mkey] = b
|
||||
|
||||
for fname in os.listdir(self.league_dir):
|
||||
if fname.startswith("cal_") and fname.endswith(".pkl"):
|
||||
cal_key = fname[4:-4] # strip cal_ and .pkl
|
||||
with open(os.path.join(self.league_dir, fname), "rb") as f:
|
||||
self.calibrators[cal_key] = pickle.load(f)
|
||||
|
||||
self._loaded = bool(self.models or self.calibrators)
|
||||
return self._loaded
|
||||
except Exception as e:
|
||||
print(f"[LeagueModel] Load failed for {self.league_id}: {e}")
|
||||
return False
|
||||
|
||||
def has_market(self, mkey: str) -> bool:
|
||||
return mkey in self.models
|
||||
|
||||
def predict_market(
|
||||
self,
|
||||
mkey: str,
|
||||
feature_row: Dict[str, float],
|
||||
) -> Optional[Dict[str, float]]:
|
||||
"""
|
||||
Predict one market using league-specific XGBoost + isotonic calibration.
|
||||
Returns {label: prob} dict or None if no model available.
|
||||
"""
|
||||
if mkey not in self.models:
|
||||
return None
|
||||
|
||||
num_class, labels = MARKET_META[mkey]
|
||||
fc = self.feature_cols
|
||||
if fc is None:
|
||||
# Fallback to whatever the booster expects (it knows its feature names)
|
||||
fc = list(self.models[mkey].feature_names or [])
|
||||
|
||||
try:
|
||||
X = pd.DataFrame([{col: feature_row.get(col, 0.0) for col in fc}])
|
||||
dmat = xgb.DMatrix(X)
|
||||
raw = self.models[mkey].predict(dmat)
|
||||
|
||||
if num_class > 2:
|
||||
probs_arr = raw.reshape(-1, num_class)[0]
|
||||
probs = {labels[i]: float(probs_arr[i]) for i in range(num_class)}
|
||||
# Apply isotonic calibration per class
|
||||
cal_total = 0.0
|
||||
for i, label in enumerate(labels):
|
||||
cal_key = f"{mkey}_{i}"
|
||||
if cal_key in self.calibrators:
|
||||
p_cal = float(self.calibrators[cal_key].predict([probs_arr[i]])[0])
|
||||
probs[label] = max(0.01, min(0.99, p_cal))
|
||||
cal_total += probs[label]
|
||||
if cal_total > 0:
|
||||
probs = {k: v / cal_total for k, v in probs.items()}
|
||||
else:
|
||||
p = float(raw[0])
|
||||
cal_key = mkey
|
||||
if cal_key in self.calibrators:
|
||||
p = float(self.calibrators[cal_key].predict([p])[0])
|
||||
p = max(0.01, min(0.99, p))
|
||||
probs = {labels[0]: p, labels[1]: 1.0 - p}
|
||||
|
||||
return probs
|
||||
except Exception as e:
|
||||
print(f"[LeagueModel] predict_market({mkey}) failed for {self.league_id}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
class LeagueModelLoader:
|
||||
"""
|
||||
In-memory cache for league-specific models.
|
||||
Thread-safe for single-process async servers (FastAPI/uvicorn).
|
||||
"""
|
||||
|
||||
def __init__(self, max_cached: int = 80):
|
||||
self._cache: Dict[str, Optional[LeagueModel]] = {}
|
||||
self._max_cached = max_cached
|
||||
|
||||
def get(self, league_id: str) -> Optional[LeagueModel]:
|
||||
"""Return loaded LeagueModel for this league, or None if unavailable."""
|
||||
if league_id in self._cache:
|
||||
return self._cache[league_id]
|
||||
|
||||
# Evict oldest entry if cache is full
|
||||
if len(self._cache) >= self._max_cached:
|
||||
oldest = next(iter(self._cache))
|
||||
del self._cache[oldest]
|
||||
|
||||
model = LeagueModel(league_id)
|
||||
loaded = model.load()
|
||||
self._cache[league_id] = model if loaded else None
|
||||
if loaded:
|
||||
n_models = len(model.models)
|
||||
n_cals = len(model.calibrators)
|
||||
print(f"[LeagueModel] Loaded {league_id}: {n_models} XGB models, {n_cals} calibrators")
|
||||
return self._cache[league_id]
|
||||
|
||||
def available_leagues(self) -> list:
|
||||
if not os.path.isdir(LEAGUE_MODEL_DIR):
|
||||
return []
|
||||
return [d for d in os.listdir(LEAGUE_MODEL_DIR)
|
||||
if os.path.isdir(os.path.join(LEAGUE_MODEL_DIR, d))]
|
||||
|
||||
def readiness_summary(self) -> dict:
|
||||
leagues = self.available_leagues()
|
||||
return {
|
||||
"league_specific_dir": LEAGUE_MODEL_DIR,
|
||||
"available_leagues": len(leagues),
|
||||
"cached": len([v for v in self._cache.values() if v is not None]),
|
||||
}
|
||||
|
||||
|
||||
# ── Singleton ──────────────────────────────────────────────────────
|
||||
_loader: Optional[LeagueModelLoader] = None
|
||||
|
||||
|
||||
def get_league_model_loader() -> LeagueModelLoader:
|
||||
global _loader
|
||||
if _loader is None:
|
||||
_loader = LeagueModelLoader()
|
||||
return _loader
|
||||
File diff suppressed because it is too large
Load Diff
@@ -20,6 +20,13 @@ from dataclasses import dataclass, field
|
||||
import xgboost as xgb
|
||||
import lightgbm as lgb
|
||||
|
||||
import sys
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
try:
|
||||
from config.config_loader import get_config as _get_cfg
|
||||
except ImportError:
|
||||
_get_cfg = None # type: ignore[assignment]
|
||||
|
||||
# CatBoost is optional
|
||||
try:
|
||||
from catboost import CatBoostClassifier
|
||||
@@ -228,7 +235,7 @@ class V25Predictor:
|
||||
print(f"[V25] Using fallback feature columns ({len(V25Predictor._FALLBACK_FEATURE_COLS)} features)")
|
||||
return V25Predictor._FALLBACK_FEATURE_COLS
|
||||
|
||||
# Model weights for ensemble
|
||||
# Model weights for ensemble (overridden from config in __init__)
|
||||
DEFAULT_WEIGHTS = {
|
||||
'xgb': 0.50,
|
||||
'lgb': 0.50,
|
||||
@@ -245,6 +252,16 @@ class V25Predictor:
|
||||
self.models = {} # market -> {'xgb': model, 'lgb': model}
|
||||
self._loaded = False
|
||||
self.FEATURE_COLS = self._load_feature_cols()
|
||||
# Load weights from config (falls back to class default 0.50/0.50)
|
||||
if _get_cfg is not None:
|
||||
try:
|
||||
cfg = _get_cfg()
|
||||
self.DEFAULT_WEIGHTS = {
|
||||
'xgb': float(cfg.get('model_ensemble.xgb_weight', 0.50)),
|
||||
'lgb': float(cfg.get('model_ensemble.lgb_weight', 0.50)),
|
||||
}
|
||||
except Exception:
|
||||
pass # keep class-level defaults
|
||||
|
||||
# All trained market models available in V25
|
||||
ALL_MARKETS = [
|
||||
@@ -275,21 +292,34 @@ class V25Predictor:
|
||||
xgb_content = f.read()
|
||||
booster = xgb.Booster()
|
||||
booster.load_model(bytearray(xgb_content, 'utf-8'))
|
||||
self.models[market]['xgb'] = booster
|
||||
loaded_count += 1
|
||||
|
||||
# Corruption detection: verify model can run a dummy prediction
|
||||
try:
|
||||
_dummy = pd.DataFrame([{col: 0.0 for col in self.FEATURE_COLS}])
|
||||
booster.predict(xgb.DMatrix(_dummy))
|
||||
self.models[market]['xgb'] = booster
|
||||
loaded_count += 1
|
||||
except Exception as _ce:
|
||||
print(f"[V25] ⚠️ XGB model for {market} failed integrity check: {_ce} — skipping")
|
||||
|
||||
# Load LightGBM (read content in Python to avoid non-ASCII path issues)
|
||||
lgb_path = os.path.join(self.models_dir, f'lgb_v25_{market}.txt')
|
||||
if os.path.exists(lgb_path) and os.path.getsize(lgb_path) > 0:
|
||||
with open(lgb_path, 'r', encoding='utf-8') as f:
|
||||
model_str = f.read()
|
||||
self.models[market]['lgb'] = lgb.Booster(model_str=model_str)
|
||||
loaded_count += 1
|
||||
|
||||
lgb_model = lgb.Booster(model_str=model_str)
|
||||
# Corruption detection: verify model can run a dummy prediction
|
||||
try:
|
||||
_dummy = pd.DataFrame([{col: 0.0 for col in self.FEATURE_COLS}])
|
||||
lgb_model.predict(_dummy)
|
||||
self.models[market]['lgb'] = lgb_model
|
||||
loaded_count += 1
|
||||
except Exception as _ce:
|
||||
print(f"[V25] ⚠️ LGB model for {market} failed integrity check: {_ce} — skipping")
|
||||
|
||||
# Remove empty entries
|
||||
if not self.models[market]:
|
||||
del self.models[market]
|
||||
|
||||
|
||||
print(f"[V25] Loaded {loaded_count} model files across {len(self.models)} markets: {list(self.models.keys())}")
|
||||
self._loaded = loaded_count > 0
|
||||
return self._loaded
|
||||
@@ -305,7 +335,27 @@ class V25Predictor:
|
||||
if not self._loaded:
|
||||
if not self.load_models():
|
||||
raise RuntimeError("Failed to load V25 models")
|
||||
|
||||
|
||||
def readiness_summary(self) -> Dict[str, Any]:
|
||||
"""Return per-market model status for health check endpoint."""
|
||||
if not self._loaded:
|
||||
self.load_models()
|
||||
market_status = {}
|
||||
for market in self.ALL_MARKETS:
|
||||
m = self.models.get(market, {})
|
||||
market_status[market] = {
|
||||
"xgb": "xgb" in m,
|
||||
"lgb": "lgb" in m,
|
||||
"ready": bool(m),
|
||||
}
|
||||
loaded_markets = [k for k, v in market_status.items() if v["ready"]]
|
||||
return {
|
||||
"fully_loaded": len(loaded_markets) == len(self.ALL_MARKETS),
|
||||
"loaded_markets": loaded_markets,
|
||||
"missing_markets": [m for m in self.ALL_MARKETS if m not in loaded_markets],
|
||||
"weights": self.DEFAULT_WEIGHTS,
|
||||
}
|
||||
|
||||
def _prepare_features(self, features: Dict[str, float]) -> pd.DataFrame:
|
||||
"""Prepare feature vector for prediction."""
|
||||
X = pd.DataFrame([{col: features.get(col, 0.0) for col in self.FEATURE_COLS}])
|
||||
@@ -563,13 +613,23 @@ class V25Predictor:
|
||||
) -> List[ValueBet]:
|
||||
"""Detect value bets based on model vs market odds."""
|
||||
value_bets = []
|
||||
min_edge = 0.05 # 5% minimum edge
|
||||
|
||||
# Market-specific minimum edge thresholds
|
||||
# MS: higher variance → require more edge
|
||||
# OU/BTTS: binary markets → tighter edge acceptable
|
||||
EDGE_THRESHOLDS = {
|
||||
'MS': 0.06,
|
||||
'OU25': 0.04,
|
||||
'BTTS': 0.04,
|
||||
}
|
||||
ms_edge = EDGE_THRESHOLDS['MS']
|
||||
ou_edge = EDGE_THRESHOLDS['OU25']
|
||||
btts_edge = EDGE_THRESHOLDS['BTTS']
|
||||
|
||||
# MS value bets
|
||||
if 'ms_h' in odds and odds['ms_h'] > 0:
|
||||
implied = 1 / odds['ms_h']
|
||||
edge = home_prob - implied
|
||||
if edge > min_edge:
|
||||
if edge > ms_edge:
|
||||
value_bets.append(ValueBet(
|
||||
market_type='MS',
|
||||
pick='1',
|
||||
@@ -582,7 +642,7 @@ class V25Predictor:
|
||||
if 'ms_d' in odds and odds['ms_d'] > 0:
|
||||
implied = 1 / odds['ms_d']
|
||||
edge = draw_prob - implied
|
||||
if edge > min_edge:
|
||||
if edge > ms_edge:
|
||||
value_bets.append(ValueBet(
|
||||
market_type='MS',
|
||||
pick='X',
|
||||
@@ -595,7 +655,7 @@ class V25Predictor:
|
||||
if 'ms_a' in odds and odds['ms_a'] > 0:
|
||||
implied = 1 / odds['ms_a']
|
||||
edge = away_prob - implied
|
||||
if edge > min_edge:
|
||||
if edge > ms_edge:
|
||||
value_bets.append(ValueBet(
|
||||
market_type='MS',
|
||||
pick='2',
|
||||
@@ -609,7 +669,7 @@ class V25Predictor:
|
||||
if 'ou25_o' in odds and odds['ou25_o'] > 0:
|
||||
implied = 1 / odds['ou25_o']
|
||||
edge = over_prob - implied
|
||||
if edge > min_edge:
|
||||
if edge > ou_edge:
|
||||
value_bets.append(ValueBet(
|
||||
market_type='OU25',
|
||||
pick='Over',
|
||||
@@ -622,7 +682,7 @@ class V25Predictor:
|
||||
if 'ou25_u' in odds and odds['ou25_u'] > 0:
|
||||
implied = 1 / odds['ou25_u']
|
||||
edge = under_prob - implied
|
||||
if edge > min_edge:
|
||||
if edge > ou_edge:
|
||||
value_bets.append(ValueBet(
|
||||
market_type='OU25',
|
||||
pick='Under',
|
||||
@@ -636,7 +696,7 @@ class V25Predictor:
|
||||
if 'btts_y' in odds and odds['btts_y'] > 0:
|
||||
implied = 1 / odds['btts_y']
|
||||
edge = btts_yes_prob - implied
|
||||
if edge > min_edge:
|
||||
if edge > btts_edge:
|
||||
value_bets.append(ValueBet(
|
||||
market_type='BTTS',
|
||||
pick='Yes',
|
||||
@@ -649,7 +709,7 @@ class V25Predictor:
|
||||
if 'btts_n' in odds and odds['btts_n'] > 0:
|
||||
implied = 1 / odds['btts_n']
|
||||
edge = btts_no_prob - implied
|
||||
if edge > min_edge:
|
||||
if edge > btts_edge:
|
||||
value_bets.append(ValueBet(
|
||||
market_type='BTTS',
|
||||
pick='No',
|
||||
|
||||
@@ -0,0 +1,160 @@
|
||||
{
|
||||
"total_test": 23039,
|
||||
"thresholds": {
|
||||
"0.0": {
|
||||
"n_matches": 22227,
|
||||
"pct": 96.5,
|
||||
"markets": {
|
||||
"ms": {
|
||||
"hit_rate": 0.5363,
|
||||
"avg_roi": -0.0046,
|
||||
"total_roi": -103.02
|
||||
},
|
||||
"ou15": {
|
||||
"hit_rate": 0.7463,
|
||||
"avg_roi": 0.0144,
|
||||
"total_roi": 319.02
|
||||
},
|
||||
"ou25": {
|
||||
"hit_rate": 0.6111,
|
||||
"avg_roi": -0.006,
|
||||
"total_roi": -134.41
|
||||
},
|
||||
"ou35": {
|
||||
"hit_rate": 0.7302,
|
||||
"avg_roi": -0.014,
|
||||
"total_roi": -310.51
|
||||
},
|
||||
"btts": {
|
||||
"hit_rate": 0.5848,
|
||||
"avg_roi": 0.0031,
|
||||
"total_roi": 69.5
|
||||
}
|
||||
}
|
||||
},
|
||||
"0.1": {
|
||||
"n_matches": 23033,
|
||||
"pct": 100.0,
|
||||
"markets": {
|
||||
"ms": {
|
||||
"hit_rate": 0.546,
|
||||
"avg_roi": -0.0045,
|
||||
"total_roi": -104.38
|
||||
},
|
||||
"ou15": {
|
||||
"hit_rate": 0.7533,
|
||||
"avg_roi": 0.0145,
|
||||
"total_roi": 335.02
|
||||
},
|
||||
"ou25": {
|
||||
"hit_rate": 0.6193,
|
||||
"avg_roi": -0.0042,
|
||||
"total_roi": -96.97
|
||||
},
|
||||
"ou35": {
|
||||
"hit_rate": 0.7277,
|
||||
"avg_roi": -0.0147,
|
||||
"total_roi": -338.57
|
||||
},
|
||||
"btts": {
|
||||
"hit_rate": 0.5886,
|
||||
"avg_roi": 0.0025,
|
||||
"total_roi": 57.21
|
||||
}
|
||||
}
|
||||
},
|
||||
"0.2": {
|
||||
"n_matches": 23034,
|
||||
"pct": 100.0,
|
||||
"markets": {
|
||||
"ms": {
|
||||
"hit_rate": 0.5459,
|
||||
"avg_roi": -0.0046,
|
||||
"total_roi": -105.38
|
||||
},
|
||||
"ou15": {
|
||||
"hit_rate": 0.7533,
|
||||
"avg_roi": 0.0146,
|
||||
"total_roi": 335.26
|
||||
},
|
||||
"ou25": {
|
||||
"hit_rate": 0.6193,
|
||||
"avg_roi": -0.0043,
|
||||
"total_roi": -97.97
|
||||
},
|
||||
"ou35": {
|
||||
"hit_rate": 0.7276,
|
||||
"avg_roi": -0.0147,
|
||||
"total_roi": -339.57
|
||||
},
|
||||
"btts": {
|
||||
"hit_rate": 0.5887,
|
||||
"avg_roi": 0.0025,
|
||||
"total_roi": 57.62
|
||||
}
|
||||
}
|
||||
},
|
||||
"0.3": {
|
||||
"n_matches": 23039,
|
||||
"pct": 100.0,
|
||||
"markets": {
|
||||
"ms": {
|
||||
"hit_rate": 0.546,
|
||||
"avg_roi": -0.0045,
|
||||
"total_roi": -103.45
|
||||
},
|
||||
"ou15": {
|
||||
"hit_rate": 0.7534,
|
||||
"avg_roi": 0.0146,
|
||||
"total_roi": 335.6
|
||||
},
|
||||
"ou25": {
|
||||
"hit_rate": 0.6194,
|
||||
"avg_roi": -0.0042,
|
||||
"total_roi": -97.44
|
||||
},
|
||||
"ou35": {
|
||||
"hit_rate": 0.7277,
|
||||
"avg_roi": -0.0147,
|
||||
"total_roi": -339.26
|
||||
},
|
||||
"btts": {
|
||||
"hit_rate": 0.5887,
|
||||
"avg_roi": 0.0025,
|
||||
"total_roi": 58.61
|
||||
}
|
||||
}
|
||||
},
|
||||
"0.5": {
|
||||
"n_matches": 23039,
|
||||
"pct": 100.0,
|
||||
"markets": {
|
||||
"ms": {
|
||||
"hit_rate": 0.546,
|
||||
"avg_roi": -0.0045,
|
||||
"total_roi": -103.45
|
||||
},
|
||||
"ou15": {
|
||||
"hit_rate": 0.7534,
|
||||
"avg_roi": 0.0146,
|
||||
"total_roi": 335.6
|
||||
},
|
||||
"ou25": {
|
||||
"hit_rate": 0.6194,
|
||||
"avg_roi": -0.0042,
|
||||
"total_roi": -97.44
|
||||
},
|
||||
"ou35": {
|
||||
"hit_rate": 0.7277,
|
||||
"avg_roi": -0.0147,
|
||||
"total_roi": -339.26
|
||||
},
|
||||
"btts": {
|
||||
"hit_rate": 0.5887,
|
||||
"avg_roi": 0.0025,
|
||||
"total_roi": 58.61
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,5 @@
|
||||
[
|
||||
{
|
||||
"market": "MS-Ev",
|
||||
"min_edge": 0.02,
|
||||
"n":
|
||||
@@ -0,0 +1,267 @@
|
||||
{
|
||||
"generated_at": "2026-05-15T21:40:57.995899",
|
||||
"matches_processed": 3000,
|
||||
"matches_skipped": 0,
|
||||
"markets": {
|
||||
"MS": {
|
||||
"overall_accuracy": 54.97,
|
||||
"total_matches": 3000,
|
||||
"by_confidence_band": {
|
||||
"<50%": {
|
||||
"accuracy": 38.87,
|
||||
"count": 759,
|
||||
"mean_confidence": 45.58
|
||||
},
|
||||
"50-65%": {
|
||||
"accuracy": 52.62,
|
||||
"count": 1300,
|
||||
"mean_confidence": 57.19
|
||||
},
|
||||
"65-75%": {
|
||||
"accuracy": 66.99,
|
||||
"count": 624,
|
||||
"mean_confidence": 69.49
|
||||
},
|
||||
"75%+": {
|
||||
"accuracy": 79.5,
|
||||
"count": 317,
|
||||
"mean_confidence": 80.69
|
||||
}
|
||||
},
|
||||
"by_league": {
|
||||
"Bundesliga": {
|
||||
"accuracy": 46.77,
|
||||
"count": 62
|
||||
},
|
||||
"Ligue 1": {
|
||||
"accuracy": 58.73,
|
||||
"count": 63
|
||||
},
|
||||
"Serie A": {
|
||||
"accuracy": 56.25,
|
||||
"count": 64
|
||||
},
|
||||
"Other": {
|
||||
"accuracy": 55.03,
|
||||
"count": 2811
|
||||
}
|
||||
},
|
||||
"by_pick_direction": {
|
||||
"1": {
|
||||
"accuracy": 58.38,
|
||||
"count": 1946,
|
||||
"mean_confidence": 60.84
|
||||
},
|
||||
"2": {
|
||||
"accuracy": 48.72,
|
||||
"count": 1053,
|
||||
"mean_confidence": 56.44
|
||||
},
|
||||
"X": {
|
||||
"accuracy": 0.0,
|
||||
"count": 1,
|
||||
"mean_confidence": 56.07
|
||||
}
|
||||
}
|
||||
},
|
||||
"OU15": {
|
||||
"overall_accuracy": 74.4,
|
||||
"total_matches": 3000,
|
||||
"by_confidence_band": {
|
||||
"50-65%": {
|
||||
"accuracy": 70.97,
|
||||
"count": 62,
|
||||
"mean_confidence": 59.63
|
||||
},
|
||||
"65-75%": {
|
||||
"accuracy": 68.0,
|
||||
"count": 275,
|
||||
"mean_confidence": 71.1
|
||||
},
|
||||
"75%+": {
|
||||
"accuracy": 75.14,
|
||||
"count": 2663,
|
||||
"mean_confidence": 89.44
|
||||
}
|
||||
},
|
||||
"by_league": {
|
||||
"Bundesliga": {
|
||||
"accuracy": 67.74,
|
||||
"count": 62
|
||||
},
|
||||
"Ligue 1": {
|
||||
"accuracy": 76.19,
|
||||
"count": 63
|
||||
},
|
||||
"Serie A": {
|
||||
"accuracy": 70.31,
|
||||
"count": 64
|
||||
},
|
||||
"Other": {
|
||||
"accuracy": 74.6,
|
||||
"count": 2811
|
||||
}
|
||||
},
|
||||
"by_pick_direction": {
|
||||
"Over": {
|
||||
"accuracy": 74.4,
|
||||
"count": 3000,
|
||||
"mean_confidence": 87.14
|
||||
}
|
||||
}
|
||||
},
|
||||
"OU25": {
|
||||
"overall_accuracy": 51.77,
|
||||
"total_matches": 3000,
|
||||
"by_confidence_band": {
|
||||
"50-65%": {
|
||||
"accuracy": 49.33,
|
||||
"count": 1267,
|
||||
"mean_confidence": 57.13
|
||||
},
|
||||
"65-75%": {
|
||||
"accuracy": 54.53,
|
||||
"count": 453,
|
||||
"mean_confidence": 69.42
|
||||
},
|
||||
"75%+": {
|
||||
"accuracy": 53.2,
|
||||
"count": 1280,
|
||||
"mean_confidence": 90.2
|
||||
}
|
||||
},
|
||||
"by_league": {
|
||||
"Bundesliga": {
|
||||
"accuracy": 41.94,
|
||||
"count": 62
|
||||
},
|
||||
"Ligue 1": {
|
||||
"accuracy": 50.79,
|
||||
"count": 63
|
||||
},
|
||||
"Serie A": {
|
||||
"accuracy": 43.75,
|
||||
"count": 64
|
||||
},
|
||||
"Other": {
|
||||
"accuracy": 52.19,
|
||||
"count": 2811
|
||||
}
|
||||
},
|
||||
"by_pick_direction": {
|
||||
"Over": {
|
||||
"accuracy": 51.03,
|
||||
"count": 2432,
|
||||
"mean_confidence": 76.11
|
||||
},
|
||||
"Under": {
|
||||
"accuracy": 54.93,
|
||||
"count": 568,
|
||||
"mean_confidence": 60.17
|
||||
}
|
||||
}
|
||||
},
|
||||
"BTTS": {
|
||||
"overall_accuracy": 51.83,
|
||||
"total_matches": 3000,
|
||||
"by_confidence_band": {
|
||||
"50-65%": {
|
||||
"accuracy": 48.74,
|
||||
"count": 2214,
|
||||
"mean_confidence": 58.66
|
||||
},
|
||||
"65-75%": {
|
||||
"accuracy": 60.42,
|
||||
"count": 758,
|
||||
"mean_confidence": 68.19
|
||||
},
|
||||
"75%+": {
|
||||
"accuracy": 64.29,
|
||||
"count": 28,
|
||||
"mean_confidence": 77.44
|
||||
}
|
||||
},
|
||||
"by_league": {
|
||||
"Bundesliga": {
|
||||
"accuracy": 54.84,
|
||||
"count": 62
|
||||
},
|
||||
"Ligue 1": {
|
||||
"accuracy": 50.79,
|
||||
"count": 63
|
||||
},
|
||||
"Serie A": {
|
||||
"accuracy": 57.81,
|
||||
"count": 64
|
||||
},
|
||||
"Other": {
|
||||
"accuracy": 51.65,
|
||||
"count": 2811
|
||||
}
|
||||
},
|
||||
"by_pick_direction": {
|
||||
"No": {
|
||||
"accuracy": 50.26,
|
||||
"count": 2099,
|
||||
"mean_confidence": 61.56
|
||||
},
|
||||
"Yes": {
|
||||
"accuracy": 55.49,
|
||||
"count": 901,
|
||||
"mean_confidence": 60.51
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"calibration": {
|
||||
"ms_home": {
|
||||
"brier_score": 0.2054,
|
||||
"calibration_error": 0.0,
|
||||
"sample_count": 3000,
|
||||
"last_trained": "2026-05-15T21:40:58.026574",
|
||||
"mean_predicted": 0.4942,
|
||||
"mean_actual": 0.46
|
||||
},
|
||||
"ms_draw": {
|
||||
"brier_score": 0.1846,
|
||||
"calibration_error": 0.0,
|
||||
"sample_count": 3000,
|
||||
"last_trained": "2026-05-15T21:40:58.030886",
|
||||
"mean_predicted": 0.149,
|
||||
"mean_actual": 0.2493
|
||||
},
|
||||
"ms_away": {
|
||||
"brier_score": 0.1726,
|
||||
"calibration_error": 0.0,
|
||||
"sample_count": 3000,
|
||||
"last_trained": "2026-05-15T21:40:58.033980",
|
||||
"mean_predicted": 0.3567,
|
||||
"mean_actual": 0.2907
|
||||
},
|
||||
"ou15": {
|
||||
"brier_score": 0.1884,
|
||||
"calibration_error": 0.0,
|
||||
"sample_count": 3000,
|
||||
"last_trained": "2026-05-15T21:40:58.037204",
|
||||
"mean_predicted": 0.8714,
|
||||
"mean_actual": 0.744
|
||||
},
|
||||
"ou25": {
|
||||
"brier_score": 0.247,
|
||||
"calibration_error": 0.0,
|
||||
"sample_count": 3000,
|
||||
"last_trained": "2026-05-15T21:40:58.041152",
|
||||
"mean_predicted": 0.6924,
|
||||
"mean_actual": 0.499
|
||||
},
|
||||
"btts": {
|
||||
"brier_score": 0.2453,
|
||||
"calibration_error": 0.0,
|
||||
"sample_count": 3000,
|
||||
"last_trained": "2026-05-15T21:40:58.044344",
|
||||
"mean_predicted": 0.4506,
|
||||
"mean_actual": 0.5147
|
||||
}
|
||||
},
|
||||
"runtime_seconds": 94.1
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,40 @@
|
||||
"""
|
||||
MatchData dataclass — core data transfer object used throughout the engine.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class MatchData:
|
||||
match_id: str
|
||||
home_team_id: str
|
||||
away_team_id: str
|
||||
home_team_name: str
|
||||
away_team_name: str
|
||||
match_date_ms: int
|
||||
sport: str
|
||||
league_id: Optional[str]
|
||||
league_name: str
|
||||
referee_name: Optional[str]
|
||||
odds_data: Dict[str, float]
|
||||
home_lineup: Optional[List[str]]
|
||||
away_lineup: Optional[List[str]]
|
||||
sidelined_data: Optional[Dict[str, Any]]
|
||||
home_goals_avg: float
|
||||
home_conceded_avg: float
|
||||
away_goals_avg: float
|
||||
away_conceded_avg: float
|
||||
home_position: int
|
||||
away_position: int
|
||||
lineup_source: str
|
||||
status: str = ""
|
||||
state: Optional[str] = None
|
||||
substate: Optional[str] = None
|
||||
current_score_home: Optional[int] = None
|
||||
current_score_away: Optional[int] = None
|
||||
lineup_confidence: float = 0.0
|
||||
source_table: str = "matches"
|
||||
@@ -0,0 +1,292 @@
|
||||
"""
|
||||
Shared prediction dataclasses used across the AI engine.
|
||||
|
||||
These were originally defined in models/v20_ensemble.py and are extracted here
|
||||
so they can be used without importing the full V20 ensemble.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from core.calculators.score_calculator import ScorePrediction
|
||||
|
||||
|
||||
@dataclass
|
||||
class MarketPrediction:
|
||||
"""Prediction for a single betting market."""
|
||||
market_type: str
|
||||
pick: str
|
||||
probability: float
|
||||
confidence: float
|
||||
odds: float = 0.0
|
||||
is_recommended: bool = False
|
||||
is_value_bet: bool = False
|
||||
edge: float = 0.0 # Expected edge over market
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"market_type": self.market_type,
|
||||
"pick": self.pick,
|
||||
"probability": round(self.probability * 100, 1),
|
||||
"confidence": round(self.confidence, 1),
|
||||
"odds": self.odds,
|
||||
"is_recommended": self.is_recommended,
|
||||
"is_value_bet": self.is_value_bet,
|
||||
"edge": round(self.edge, 1)
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class FullMatchPrediction:
|
||||
"""Complete prediction for a match with ALL markets."""
|
||||
match_id: str
|
||||
home_team: str
|
||||
away_team: str
|
||||
match_date: str = ""
|
||||
|
||||
# === MAÇ SONUCU (1X2) ===
|
||||
ms_home_prob: float = 0.33
|
||||
ms_draw_prob: float = 0.33
|
||||
ms_away_prob: float = 0.33
|
||||
ms_pick: str = ""
|
||||
ms_confidence: float = 0.0
|
||||
|
||||
# === ÇİFTE ŞANS ===
|
||||
dc_1x_prob: float = 0.66
|
||||
dc_x2_prob: float = 0.66
|
||||
dc_12_prob: float = 0.66
|
||||
dc_pick: str = ""
|
||||
dc_confidence: float = 0.0
|
||||
|
||||
# === ALT/ÜST GOLLER ===
|
||||
# 1.5
|
||||
over_15_prob: float = 0.70
|
||||
under_15_prob: float = 0.30
|
||||
ou15_pick: str = ""
|
||||
ou15_confidence: float = 0.0
|
||||
|
||||
# 2.5
|
||||
over_25_prob: float = 0.50
|
||||
under_25_prob: float = 0.50
|
||||
ou25_pick: str = ""
|
||||
ou25_confidence: float = 0.0
|
||||
|
||||
# 3.5
|
||||
over_35_prob: float = 0.30
|
||||
under_35_prob: float = 0.70
|
||||
ou35_pick: str = ""
|
||||
ou35_confidence: float = 0.0
|
||||
|
||||
# === KARŞILIKLI GOL (BTTS) ===
|
||||
btts_yes_prob: float = 0.50
|
||||
btts_no_prob: float = 0.50
|
||||
btts_pick: str = ""
|
||||
btts_confidence: float = 0.0
|
||||
|
||||
# === İLK YARI SONUCU ===
|
||||
ht_home_prob: float = 0.30
|
||||
ht_draw_prob: float = 0.40
|
||||
ht_away_prob: float = 0.30
|
||||
ht_pick: str = ""
|
||||
ht_confidence: float = 0.0
|
||||
|
||||
# === SKOR TAHMİNLERİ ===
|
||||
score: Optional[ScorePrediction] = None
|
||||
predicted_ft_score: str = "1-1"
|
||||
predicted_ht_score: str = "0-0"
|
||||
ft_scores_top5: List[Dict] = field(default_factory=list)
|
||||
|
||||
# === xG (Expected Goals) ===
|
||||
home_xg: float = 1.3
|
||||
away_xg: float = 1.1
|
||||
total_xg: float = 2.4
|
||||
|
||||
# === RISK DEĞERLENDİRMESİ ===
|
||||
risk_level: str = "MEDIUM" # LOW, MEDIUM, HIGH, EXTREME
|
||||
risk_score: float = 0.0
|
||||
is_surprise_risk: bool = False
|
||||
surprise_type: str = ""
|
||||
risk_warnings: List[str] = field(default_factory=list)
|
||||
ht_ft_probs: Dict[str, float] = field(default_factory=dict)
|
||||
|
||||
# === GLM-5 SÜRPRİZ SKORU ===
|
||||
upset_score: int = 0 # 0-100 arası sürpriz skoru
|
||||
upset_level: str = "LOW" # LOW, MEDIUM, HIGH, EXTREME
|
||||
upset_reasons: List[str] = field(default_factory=list)
|
||||
|
||||
# === SÜRPRİZ PROFİLİ ===
|
||||
surprise_score: float = 0.0 # 0-100 overall surprise risk score
|
||||
surprise_comment: str = "" # Human-readable surprise commentary
|
||||
surprise_reasons: List[str] = field(default_factory=list) # Flagged risk reasons
|
||||
surprise_breakdown: List[Dict[str, Any]] = field(default_factory=list) # Per-factor {code, points, label}
|
||||
|
||||
# === ENGINE KATKILARI ===
|
||||
team_confidence: float = 0.0
|
||||
player_confidence: float = 0.0
|
||||
odds_confidence: float = 0.0
|
||||
referee_confidence: float = 0.0
|
||||
|
||||
# === KORNER & KART & DİĞER ===
|
||||
total_corners_pred: float = 9.5
|
||||
corner_pick: str = "9.5 Üst"
|
||||
|
||||
total_cards_pred: float = 4.5
|
||||
card_pick: str = "4.5 Alt"
|
||||
cards_over_prob: float = 0.50
|
||||
cards_under_prob: float = 0.50
|
||||
cards_confidence: float = 0.0
|
||||
|
||||
handicap_pick: str = ""
|
||||
handicap_home_prob: float = 0.33
|
||||
handicap_draw_prob: float = 0.34
|
||||
handicap_away_prob: float = 0.33
|
||||
handicap_confidence: float = 0.0
|
||||
|
||||
ht_over_05_prob: float = 0.65
|
||||
ht_under_05_prob: float = 0.35
|
||||
ht_over_15_prob: float = 0.30
|
||||
ht_under_15_prob: float = 0.70
|
||||
ht_ou_pick: str = "İY 0.5 Üst"
|
||||
ht_ou15_pick: str = "İY 1.5 Alt"
|
||||
|
||||
odd_even_pick: str = "Çift"
|
||||
odd_prob: float = 0.50 # Tek olasılığı
|
||||
even_prob: float = 0.50 # Çift olasılığı
|
||||
|
||||
# === TAVSİYELER (RECOMMENDATIONS) ===
|
||||
best_bet: Optional[MarketPrediction] = None
|
||||
recommended_bets: List[MarketPrediction] = field(default_factory=list)
|
||||
alternative_bet: Optional[MarketPrediction] = None
|
||||
expert_recommendation: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
# === DETAILED ANALYSIS ===
|
||||
analysis_details: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"match_info": {
|
||||
"match_id": self.match_id,
|
||||
"home_team": self.home_team,
|
||||
"away_team": self.away_team,
|
||||
"match_date": self.match_date
|
||||
},
|
||||
"predictions": {
|
||||
"match_result": {
|
||||
"1": round(self.ms_home_prob * 100, 1),
|
||||
"X": round(self.ms_draw_prob * 100, 1),
|
||||
"2": round(self.ms_away_prob * 100, 1),
|
||||
"pick": self.ms_pick,
|
||||
"confidence": round(self.ms_confidence, 1)
|
||||
},
|
||||
"double_chance": {
|
||||
"1X": round(self.dc_1x_prob * 100, 1),
|
||||
"X2": round(self.dc_x2_prob * 100, 1),
|
||||
"12": round(self.dc_12_prob * 100, 1),
|
||||
"pick": self.dc_pick,
|
||||
"confidence": round(self.dc_confidence, 1)
|
||||
},
|
||||
"over_under": {
|
||||
"1.5": {
|
||||
"over": round(self.over_15_prob * 100, 1),
|
||||
"under": round(self.under_15_prob * 100, 1),
|
||||
"pick": self.ou15_pick,
|
||||
"confidence": round(self.ou15_confidence, 1)
|
||||
},
|
||||
"2.5": {
|
||||
"over": round(self.over_25_prob * 100, 1),
|
||||
"under": round(self.under_25_prob * 100, 1),
|
||||
"pick": self.ou25_pick,
|
||||
"confidence": round(self.ou25_confidence, 1)
|
||||
},
|
||||
"3.5": {
|
||||
"over": round(self.over_35_prob * 100, 1),
|
||||
"under": round(self.under_35_prob * 100, 1),
|
||||
"pick": self.ou35_pick,
|
||||
"confidence": round(self.ou35_confidence, 1)
|
||||
}
|
||||
},
|
||||
"btts": {
|
||||
"yes": round(self.btts_yes_prob * 100, 1),
|
||||
"no": round(self.btts_no_prob * 100, 1),
|
||||
"pick": self.btts_pick,
|
||||
"confidence": round(self.btts_confidence, 1)
|
||||
},
|
||||
"first_half": {
|
||||
"1": round(self.ht_home_prob * 100, 1),
|
||||
"X": round(self.ht_draw_prob * 100, 1),
|
||||
"2": round(self.ht_away_prob * 100, 1),
|
||||
"pick": self.ht_pick,
|
||||
"confidence": round(self.ht_confidence, 1),
|
||||
"over_under_05": {
|
||||
"over": round(self.ht_over_05_prob * 100, 1),
|
||||
"under": round(self.ht_under_05_prob * 100, 1),
|
||||
"pick": self.ht_ou_pick
|
||||
},
|
||||
"over_under_15": {
|
||||
"over": round(self.ht_over_15_prob * 100, 1),
|
||||
"under": round(self.ht_under_15_prob * 100, 1),
|
||||
"pick": self.ht_ou15_pick
|
||||
}
|
||||
},
|
||||
"scores": {
|
||||
"predicted_ft": self.predicted_ft_score,
|
||||
"predicted_ht": self.predicted_ht_score,
|
||||
"top_5_ft_scores": self.ft_scores_top5
|
||||
},
|
||||
"others": {
|
||||
"handicap": {
|
||||
"pick": self.handicap_pick,
|
||||
"confidence": round(self.handicap_confidence, 1),
|
||||
"home": round(self.handicap_home_prob * 100, 1),
|
||||
"draw": round(self.handicap_draw_prob * 100, 1),
|
||||
"away": round(self.handicap_away_prob * 100, 1)
|
||||
},
|
||||
"corners": {
|
||||
"total": round(self.total_corners_pred, 1),
|
||||
"pick": self.corner_pick
|
||||
},
|
||||
"cards": {
|
||||
"total": round(self.total_cards_pred, 1),
|
||||
"pick": self.card_pick,
|
||||
"confidence": round(self.cards_confidence, 1),
|
||||
"over": round(self.cards_over_prob * 100, 1),
|
||||
"under": round(self.cards_under_prob * 100, 1)
|
||||
},
|
||||
"odd_even": {
|
||||
"pick": self.odd_even_pick,
|
||||
"tek": round(self.odd_prob * 100, 1),
|
||||
"cift": round(self.even_prob * 100, 1)
|
||||
}
|
||||
},
|
||||
"xg": {
|
||||
"home": round(self.home_xg, 2),
|
||||
"away": round(self.away_xg, 2),
|
||||
"total": round(self.total_xg, 2)
|
||||
}
|
||||
},
|
||||
"risk": {
|
||||
"level": self.risk_level,
|
||||
"score": round(self.risk_score, 1),
|
||||
"is_surprise_risk": self.is_surprise_risk,
|
||||
"surprise_type": self.surprise_type,
|
||||
"ht_ft_probs": {k: round(v * 100, 1) for k, v in self.ht_ft_probs.items()} if self.ht_ft_probs else {},
|
||||
"warnings": self.risk_warnings
|
||||
},
|
||||
"upset_analysis": {
|
||||
"score": self.upset_score,
|
||||
"level": self.upset_level,
|
||||
"reasons": self.upset_reasons
|
||||
},
|
||||
"engine_breakdown": {
|
||||
"team_engine": round(self.team_confidence, 1),
|
||||
"player_engine": round(self.player_confidence, 1),
|
||||
"odds_engine": round(self.odds_confidence, 1),
|
||||
"referee_engine": round(self.referee_confidence, 1)
|
||||
},
|
||||
"recommendations": {
|
||||
"best_bet": self.best_bet.to_dict() if self.best_bet else None,
|
||||
"all_recommended": [b.to_dict() for b in self.recommended_bets] if self.recommended_bets else [],
|
||||
"alternative_bet": self.alternative_bet.to_dict() if self.alternative_bet else None
|
||||
},
|
||||
"analysis_details": self.analysis_details
|
||||
}
|
||||
@@ -0,0 +1,510 @@
|
||||
"""
|
||||
Calibration Backfill Script
|
||||
============================
|
||||
Runs V25 model against historical matches (using pre-computed ai_features + odds)
|
||||
to generate calibration training data, then trains isotonic calibration models.
|
||||
|
||||
Usage:
|
||||
python ai-engine/scripts/backfill_calibration.py
|
||||
python ai-engine/scripts/backfill_calibration.py --limit 5000
|
||||
python ai-engine/scripts/backfill_calibration.py --min-samples 50
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
from dotenv import load_dotenv
|
||||
|
||||
AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
sys.path.insert(0, AI_ENGINE_DIR)
|
||||
|
||||
from models.v25_ensemble import V25Predictor
|
||||
from models.calibration import get_calibrator
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
def _normalize_pick(pick) -> str:
|
||||
return str(pick or "").strip().casefold()
|
||||
|
||||
|
||||
def resolve_actual(market, pick, score_home, score_away, ht_home, ht_away):
|
||||
if score_home is None or score_away is None:
|
||||
return None
|
||||
market = (market or "").upper()
|
||||
p = _normalize_pick(pick)
|
||||
total = score_home + score_away
|
||||
ht_total = (ht_home or 0) + (ht_away or 0) if ht_home is not None else None
|
||||
|
||||
if market == "MS":
|
||||
if p == "1": return int(score_home > score_away)
|
||||
if p in {"x", "0"}: return int(score_home == score_away)
|
||||
if p == "2": return int(score_away > score_home)
|
||||
return None
|
||||
if market in {"OU15", "OU25", "OU35"}:
|
||||
line = {"OU15": 1.5, "OU25": 2.5, "OU35": 3.5}[market]
|
||||
if "over" in p or "üst" in p or "ust" in p: return int(total > line)
|
||||
if "under" in p or "alt" in p: return int(total < line)
|
||||
return None
|
||||
if market == "BTTS":
|
||||
both = score_home > 0 and score_away > 0
|
||||
if "yes" in p or "var" in p: return int(both)
|
||||
if "no" in p or "yok" in p: return int(not both)
|
||||
return None
|
||||
if market == "HT":
|
||||
if ht_home is None or ht_away is None: return None
|
||||
if p == "1": return int(ht_home > ht_away)
|
||||
if p in {"x", "0"}: return int(ht_home == ht_away)
|
||||
if p == "2": return int(ht_away > ht_home)
|
||||
return None
|
||||
if market == "HTFT":
|
||||
if ht_home is None or ht_away is None or "/" not in p: return None
|
||||
ht_p, ft_p = p.split("/")
|
||||
ht_actual = "1" if ht_home > ht_away else "2" if ht_away > ht_home else "x"
|
||||
ft_actual = "1" if score_home > score_away else "2" if score_away > score_home else "x"
|
||||
return int(ht_p.strip() == ht_actual and ft_p.strip() == ft_actual)
|
||||
if market == "DC":
|
||||
norm = p.replace("-", "").upper()
|
||||
if norm == "1X": return int(score_home >= score_away)
|
||||
if norm == "X2": return int(score_away >= score_home)
|
||||
if norm == "12": return int(score_home != score_away)
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def calibrator_key(market, pick):
|
||||
m = (market or "").upper()
|
||||
p = _normalize_pick(pick)
|
||||
if m == "MS":
|
||||
if p == "1": return "ms_home"
|
||||
if p in {"x", "0"}: return "ms_draw"
|
||||
if p == "2": return "ms_away"
|
||||
return None
|
||||
if m == "DC": return "dc"
|
||||
if m == "OU15" and ("over" in p or "üst" in p): return "ou15"
|
||||
if m == "OU25" and ("over" in p or "üst" in p): return "ou25"
|
||||
if m == "OU35" and ("over" in p or "üst" in p): return "ou35"
|
||||
if m == "BTTS" and ("yes" in p or "var" in p): return "btts"
|
||||
if m == "HT":
|
||||
if p == "1": return "ht_home"
|
||||
if p in {"x", "0"}: return "ht_draw"
|
||||
if p == "2": return "ht_away"
|
||||
return None
|
||||
if m == "HTFT": return "ht_ft"
|
||||
return None
|
||||
|
||||
|
||||
def get_conn():
|
||||
db_url = os.getenv("DATABASE_URL", "")
|
||||
if "?schema=" in db_url:
|
||||
db_url = db_url.split("?schema=")[0]
|
||||
if not db_url:
|
||||
raise ValueError("DATABASE_URL not set")
|
||||
return psycopg2.connect(db_url, cursor_factory=RealDictCursor)
|
||||
|
||||
|
||||
ODD_CAT_MAP = {
|
||||
"maç sonucu": {"1": "ms_h", "0": "ms_d", "x": "ms_d", "2": "ms_a"},
|
||||
"1. yarı sonucu": {"1": "ht_ms_h", "0": "ht_ms_d", "x": "ht_ms_d", "2": "ht_ms_a"},
|
||||
}
|
||||
|
||||
ODD_CAT_KEYWORD_MAP = {
|
||||
"karşılıklı gol": {"var": "btts_y", "yok": "btts_n"},
|
||||
"0,5 alt/üst": {"alt": "ou05_u", "üst": "ou05_o"},
|
||||
"1,5 alt/üst": {"alt": "ou15_u", "üst": "ou15_o"},
|
||||
"2,5 alt/üst": {"alt": "ou25_u", "üst": "ou25_o"},
|
||||
"3,5 alt/üst": {"alt": "ou35_u", "üst": "ou35_o"},
|
||||
"ilk yarı 0,5 alt/üst": {"alt": "ht_ou05_u", "üst": "ht_ou05_o"},
|
||||
"ilk yarı 1,5 alt/üst": {"alt": "ht_ou15_u", "üst": "ht_ou15_o"},
|
||||
}
|
||||
|
||||
|
||||
def load_matches(cur, limit: int) -> List[Dict]:
|
||||
cur.execute("""
|
||||
SELECT m.id, m.score_home, m.score_away,
|
||||
m.ht_score_home, m.ht_score_away
|
||||
FROM matches m
|
||||
JOIN football_ai_features f ON f.match_id = m.id
|
||||
WHERE m.status = 'FT'
|
||||
AND m.sport = 'football'
|
||||
AND m.score_home IS NOT NULL
|
||||
AND m.score_away IS NOT NULL
|
||||
ORDER BY m.mst_utc DESC
|
||||
LIMIT %s
|
||||
""", (limit,))
|
||||
return cur.fetchall()
|
||||
|
||||
|
||||
def load_ai_features_batch(cur, match_ids: List[str]) -> Dict[str, Dict]:
|
||||
if not match_ids:
|
||||
return {}
|
||||
ph = ",".join(["%s"] * len(match_ids))
|
||||
cur.execute(f"""
|
||||
SELECT match_id,
|
||||
home_elo AS home_overall_elo,
|
||||
away_elo AS away_overall_elo,
|
||||
elo_diff,
|
||||
home_home_elo, away_away_elo,
|
||||
home_form_elo, away_form_elo,
|
||||
(home_form_elo - away_form_elo) AS form_elo_diff,
|
||||
home_goals_avg_5 AS home_goals_avg,
|
||||
home_conceded_avg_5 AS home_conceded_avg,
|
||||
away_goals_avg_5 AS away_goals_avg,
|
||||
away_conceded_avg_5 AS away_conceded_avg,
|
||||
home_clean_sheet_rate, away_clean_sheet_rate,
|
||||
home_scoring_rate, away_scoring_rate,
|
||||
home_win_streak AS home_winning_streak,
|
||||
away_win_streak AS away_winning_streak,
|
||||
0 AS home_unbeaten_streak,
|
||||
0 AS away_unbeaten_streak,
|
||||
h2h_total AS h2h_total_matches,
|
||||
h2h_home_win_rate,
|
||||
(1.0 - h2h_home_win_rate - 0.33) AS h2h_draw_rate,
|
||||
h2h_avg_goals,
|
||||
h2h_btts_rate, h2h_over25_rate,
|
||||
home_avg_possession, away_avg_possession,
|
||||
home_avg_shots_on_target, away_avg_shots_on_target,
|
||||
home_shot_conversion, away_shot_conversion,
|
||||
0.0 AS home_avg_corners, 0.0 AS away_avg_corners,
|
||||
implied_home, implied_draw, implied_away,
|
||||
league_avg_goals,
|
||||
0.0 AS league_zero_goal_rate,
|
||||
0.0 AS home_xga, 0.0 AS away_xga,
|
||||
0.0 AS upset_atmosphere, 0.0 AS upset_motivation,
|
||||
0.0 AS upset_fatigue, 0.0 AS upset_potential,
|
||||
referee_home_bias, referee_avg_goals,
|
||||
referee_avg_cards AS referee_cards_total,
|
||||
0.0 AS referee_avg_yellow,
|
||||
0.0 AS referee_experience,
|
||||
0.0 AS home_momentum_score, 0.0 AS away_momentum_score,
|
||||
0.0 AS momentum_diff,
|
||||
0.0 AS home_squad_quality, 0.0 AS away_squad_quality,
|
||||
0.0 AS squad_diff,
|
||||
0 AS home_key_players, 0 AS away_key_players,
|
||||
missing_players_impact AS home_missing_impact,
|
||||
0.0 AS away_missing_impact,
|
||||
home_goals_avg_5 AS home_goals_form,
|
||||
away_goals_avg_5 AS away_goals_form
|
||||
FROM football_ai_features
|
||||
WHERE match_id IN ({ph})
|
||||
""", match_ids)
|
||||
return {str(row["match_id"]): dict(row) for row in cur.fetchall()}
|
||||
|
||||
|
||||
def load_odds_batch(cur, match_ids: List[str]) -> Dict[str, Dict[str, float]]:
|
||||
if not match_ids:
|
||||
return {}
|
||||
ph = ",".join(["%s"] * len(match_ids))
|
||||
cur.execute(f"""
|
||||
SELECT oc.match_id, oc.name AS cat_name,
|
||||
os.name AS sel_name, os.odd_value
|
||||
FROM odd_selections os
|
||||
JOIN odd_categories oc ON os.odd_category_db_id = oc.db_id
|
||||
WHERE oc.match_id IN ({ph})
|
||||
""", match_ids)
|
||||
|
||||
odds: Dict[str, Dict[str, float]] = {}
|
||||
for row in cur.fetchall():
|
||||
mid = str(row["match_id"])
|
||||
cat = (row["cat_name"] or "").lower().strip()
|
||||
sel = (row["sel_name"] or "").strip()
|
||||
val = float(row["odd_value"]) if row["odd_value"] else 0
|
||||
if val <= 0:
|
||||
continue
|
||||
if mid not in odds:
|
||||
odds[mid] = {}
|
||||
|
||||
if cat in ODD_CAT_MAP:
|
||||
key = ODD_CAT_MAP[cat].get(sel.lower())
|
||||
if key:
|
||||
odds[mid][key] = val
|
||||
else:
|
||||
for cat_pattern, kw_map in ODD_CAT_KEYWORD_MAP.items():
|
||||
if cat == cat_pattern:
|
||||
for keyword, key in kw_map.items():
|
||||
if keyword in sel.lower():
|
||||
odds[mid][key] = val
|
||||
break
|
||||
return odds
|
||||
|
||||
|
||||
MARKETS_TO_PREDICT = [
|
||||
("MS", "1", lambda p: p[0]),
|
||||
("MS", "X", lambda p: p[1]),
|
||||
("MS", "2", lambda p: p[2]),
|
||||
("OU25", "Over 2.5", lambda p: p[0]),
|
||||
("BTTS", "Yes", lambda p: p[0]),
|
||||
("OU15", "Over 1.5", lambda p: p[0]),
|
||||
("OU35", "Over 3.5", lambda p: p[0]),
|
||||
("HT", "1", lambda p: p[0]),
|
||||
("HT", "X", lambda p: p[1]),
|
||||
("HT", "2", lambda p: p[2]),
|
||||
]
|
||||
|
||||
|
||||
def run_backfill(args):
|
||||
print("=" * 70)
|
||||
print("CALIBRATION BACKFILL")
|
||||
print("=" * 70)
|
||||
|
||||
conn = get_conn()
|
||||
cur = conn.cursor(cursor_factory=RealDictCursor)
|
||||
|
||||
t0 = time.time()
|
||||
print(f"Loading matches (limit={args.limit})...")
|
||||
matches = load_matches(cur, args.limit)
|
||||
print(f" Found {len(matches)} finished matches with ai_features")
|
||||
|
||||
match_ids = [str(m["id"]) for m in matches]
|
||||
match_map = {str(m["id"]): m for m in matches}
|
||||
|
||||
print("Loading ai_features...")
|
||||
features_map = load_ai_features_batch(cur, match_ids)
|
||||
print(f" Loaded features for {len(features_map)} matches")
|
||||
|
||||
print("Loading odds...")
|
||||
odds_map = load_odds_batch(cur, match_ids)
|
||||
print(f" Loaded odds for {len(odds_map)} matches")
|
||||
|
||||
print(f"Data loading: {time.time() - t0:.1f}s")
|
||||
|
||||
print("\nLoading V25 model...")
|
||||
predictor = V25Predictor()
|
||||
predictor.load_models()
|
||||
|
||||
feature_cols = predictor.FEATURE_COLS
|
||||
|
||||
samples: List[Dict[str, Any]] = []
|
||||
skipped = 0
|
||||
processed = 0
|
||||
|
||||
print(f"\nRunning predictions on {len(match_ids)} matches...")
|
||||
t1 = time.time()
|
||||
|
||||
for i, mid in enumerate(match_ids):
|
||||
if mid not in features_map:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
feat_row = features_map[mid]
|
||||
odds_row = odds_map.get(mid, {})
|
||||
match_row = match_map[mid]
|
||||
|
||||
feat_dict = {}
|
||||
for col in feature_cols:
|
||||
if col in feat_row and feat_row[col] is not None:
|
||||
feat_dict[col] = float(feat_row[col])
|
||||
elif col.startswith("odds_") and not col.endswith("_present"):
|
||||
odds_key = col.replace("odds_", "")
|
||||
feat_dict[col] = float(odds_row.get(odds_key, 0))
|
||||
elif col.endswith("_present"):
|
||||
base = col.replace("_present", "")
|
||||
odds_key = base.replace("odds_", "")
|
||||
feat_dict[col] = 1.0 if odds_row.get(odds_key, 0) > 0 else 0.0
|
||||
else:
|
||||
feat_dict[col] = 0.0
|
||||
|
||||
if odds_row.get("ms_h", 0) > 0:
|
||||
feat_dict["odds_ms_h"] = odds_row["ms_h"]
|
||||
if odds_row.get("ms_d", 0) > 0:
|
||||
feat_dict["odds_ms_d"] = odds_row["ms_d"]
|
||||
if odds_row.get("ms_a", 0) > 0:
|
||||
feat_dict["odds_ms_a"] = odds_row["ms_a"]
|
||||
|
||||
ms_h = feat_dict.get("odds_ms_h", 0)
|
||||
ms_d = feat_dict.get("odds_ms_d", 0)
|
||||
ms_a = feat_dict.get("odds_ms_a", 0)
|
||||
if ms_h > 0 and ms_d > 0 and ms_a > 0:
|
||||
raw_sum = 1/ms_h + 1/ms_d + 1/ms_a
|
||||
feat_dict["implied_home"] = (1/ms_h) / raw_sum
|
||||
feat_dict["implied_draw"] = (1/ms_d) / raw_sum
|
||||
feat_dict["implied_away"] = (1/ms_a) / raw_sum
|
||||
|
||||
sh = match_row["score_home"]
|
||||
sa = match_row["score_away"]
|
||||
ht_h = match_row.get("ht_score_home")
|
||||
ht_a = match_row.get("ht_score_away")
|
||||
|
||||
try:
|
||||
X = pd.DataFrame([{c: feat_dict.get(c, 0.0) for c in feature_cols}])
|
||||
|
||||
for market_name, model_key, market_list in [
|
||||
("ms", "ms", ["MS"]),
|
||||
("ou25", "ou25", ["OU25"]),
|
||||
("btts", "btts", ["BTTS"]),
|
||||
("ou15", "ou15", ["OU15"]),
|
||||
("ou35", "ou35", ["OU35"]),
|
||||
("ht_result", "ht_result", ["HT"]),
|
||||
]:
|
||||
if model_key not in predictor.models:
|
||||
continue
|
||||
|
||||
probs = predictor.predict_market(model_key, feat_dict)
|
||||
if probs is None:
|
||||
continue
|
||||
|
||||
if model_key == "ms":
|
||||
for pick, prob in [("1", probs[0]), ("X", probs[1]), ("2", probs[2])]:
|
||||
actual = resolve_actual("MS", pick, sh, sa, ht_h, ht_a)
|
||||
key = calibrator_key("MS", pick)
|
||||
if actual is not None and key:
|
||||
samples.append({
|
||||
"match_id": mid,
|
||||
"market": "MS",
|
||||
"pick": pick,
|
||||
"key": key,
|
||||
"raw_prob": float(prob),
|
||||
"actual": int(actual),
|
||||
})
|
||||
|
||||
elif model_key == "ht_result":
|
||||
if ht_h is None or ht_a is None:
|
||||
continue
|
||||
for pick, prob in [("1", probs[0]), ("X", probs[1]), ("2", probs[2])]:
|
||||
actual = resolve_actual("HT", pick, sh, sa, ht_h, ht_a)
|
||||
key = calibrator_key("HT", pick)
|
||||
if actual is not None and key:
|
||||
samples.append({
|
||||
"match_id": mid,
|
||||
"market": "HT",
|
||||
"pick": pick,
|
||||
"key": key,
|
||||
"raw_prob": float(prob),
|
||||
"actual": int(actual),
|
||||
})
|
||||
|
||||
elif model_key in ("ou25", "ou15", "ou35"):
|
||||
market_upper = model_key.upper()
|
||||
over_prob = float(probs[0]) if len(probs) > 0 else 0.5
|
||||
pick = f"Over"
|
||||
actual = resolve_actual(market_upper, "Over", sh, sa, ht_h, ht_a)
|
||||
key = calibrator_key(market_upper, "Over")
|
||||
if actual is not None and key:
|
||||
samples.append({
|
||||
"match_id": mid,
|
||||
"market": market_upper,
|
||||
"pick": pick,
|
||||
"key": key,
|
||||
"raw_prob": over_prob,
|
||||
"actual": int(actual),
|
||||
})
|
||||
|
||||
elif model_key == "btts":
|
||||
yes_prob = float(probs[0]) if len(probs) > 0 else 0.5
|
||||
actual = resolve_actual("BTTS", "Yes", sh, sa, ht_h, ht_a)
|
||||
key = calibrator_key("BTTS", "Yes")
|
||||
if actual is not None and key:
|
||||
samples.append({
|
||||
"match_id": mid,
|
||||
"market": "BTTS",
|
||||
"pick": "Yes",
|
||||
"key": key,
|
||||
"raw_prob": yes_prob,
|
||||
"actual": int(actual),
|
||||
})
|
||||
|
||||
processed += 1
|
||||
|
||||
except Exception as e:
|
||||
skipped += 1
|
||||
if skipped <= 5:
|
||||
print(f" Error on {mid}: {e}")
|
||||
|
||||
if (i + 1) % 5000 == 0:
|
||||
elapsed = time.time() - t1
|
||||
rate = (i + 1) / elapsed
|
||||
print(f" Processed {i+1}/{len(match_ids)} ({rate:.0f} matches/s)")
|
||||
|
||||
elapsed = time.time() - t1
|
||||
print(f"\nPrediction complete: {processed} matches, {skipped} skipped, {elapsed:.1f}s")
|
||||
|
||||
if not samples:
|
||||
print("No calibration samples generated!")
|
||||
cur.close()
|
||||
conn.close()
|
||||
return
|
||||
|
||||
df = pd.DataFrame(samples)
|
||||
print(f"\nTotal calibration samples: {len(df)}")
|
||||
print(f"Unique matches: {df['match_id'].nunique()}")
|
||||
print(f"\nPer-key counts:")
|
||||
for key, count in df["key"].value_counts().items():
|
||||
print(f" {key:<14} {count}")
|
||||
|
||||
print(f"\nTraining isotonic calibration models (min_samples={args.min_samples})...")
|
||||
calibrator = get_calibrator()
|
||||
results: Dict[str, Any] = {}
|
||||
keys = sorted(df["key"].unique())
|
||||
|
||||
for key in keys:
|
||||
sub = df[df["key"] == key].copy()
|
||||
sub = sub.drop_duplicates(subset=["match_id", "key"], keep="first")
|
||||
sub = sub.dropna(subset=["raw_prob", "actual"])
|
||||
sub = sub[(sub["raw_prob"] > 0.0) & (sub["raw_prob"] < 1.0)]
|
||||
|
||||
n = len(sub)
|
||||
if n < args.min_samples:
|
||||
results[key] = {"status": "skipped", "samples": n}
|
||||
continue
|
||||
|
||||
metrics = calibrator.train_calibration(
|
||||
df=sub,
|
||||
market=key,
|
||||
prob_col="raw_prob",
|
||||
actual_col="actual",
|
||||
min_samples=args.min_samples,
|
||||
save=True,
|
||||
)
|
||||
results[key] = {
|
||||
"status": "trained",
|
||||
"samples": metrics.sample_count,
|
||||
"brier": round(metrics.brier_score, 4),
|
||||
"ece": round(metrics.calibration_error, 4),
|
||||
"mean_predicted": round(metrics.mean_predicted, 4),
|
||||
"mean_actual": round(metrics.mean_actual, 4),
|
||||
}
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("CALIBRATION RESULTS")
|
||||
print("=" * 70)
|
||||
print(f"{'market':<14} {'status':<10} {'n':<8} {'brier':<9} {'ece':<8} {'pred_avg':<9} {'actual_avg'}")
|
||||
print("-" * 70)
|
||||
for key, info in sorted(results.items()):
|
||||
if info["status"] == "trained":
|
||||
print(
|
||||
f"{key:<14} {'OK':<10} {info['samples']:<8} "
|
||||
f"{info['brier']:<9.4f} {info['ece']:<8.4f} "
|
||||
f"{info['mean_predicted']:<9.4f} {info['mean_actual']}"
|
||||
)
|
||||
else:
|
||||
print(f"{key:<14} {'SKIP':<10} {info['samples']:<8}")
|
||||
print("=" * 70)
|
||||
|
||||
total_time = time.time() - t0
|
||||
print(f"\nTotal time: {total_time:.1f}s")
|
||||
print(f"Calibration models saved to: {os.path.join(AI_ENGINE_DIR, 'models', 'calibration')}/")
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Backfill calibration from historical matches")
|
||||
parser.add_argument("--limit", type=int, default=50000,
|
||||
help="Max matches to process (default: 50000)")
|
||||
parser.add_argument("--min-samples", type=int, default=100,
|
||||
help="Min samples per market for calibration (default: 100)")
|
||||
args = parser.parse_args()
|
||||
run_backfill(args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,352 @@
|
||||
"""
|
||||
Tutarsızlık Bazlı Backtest
|
||||
============================
|
||||
Modeller arası tutarsızlığı ölçer, tutarlı maçlarda bahis açılsaydı
|
||||
ROI ne olurdu hesaplar.
|
||||
|
||||
Mantık:
|
||||
- Her maç için market'ler arası çelişkileri tespit et
|
||||
- Tutarsız maçları filtrele
|
||||
- Tutarlı maçlarda hit rate ve ROI hesapla
|
||||
|
||||
Usage:
|
||||
python scripts/backtest_consistency.py
|
||||
"""
|
||||
|
||||
import os, sys, json
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import xgboost as xgb
|
||||
from sklearn.metrics import accuracy_score
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
|
||||
'data', 'training_data.csv')
|
||||
MODELS_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
|
||||
'models', 'v25')
|
||||
|
||||
SKIP_COLS = {
|
||||
'match_id','home_team_id','away_team_id','league_id','mst_utc',
|
||||
'score_home','score_away','total_goals','ht_score_home','ht_score_away','ht_total_goals',
|
||||
'label_ms','label_ou05','label_ou15','label_ou25','label_ou35','label_btts',
|
||||
'label_ht_result','label_ht_ou05','label_ht_ou15','label_ht_ft',
|
||||
'label_odd_even','label_yellow_cards','label_cards_ou45','label_handicap_ms',
|
||||
}
|
||||
|
||||
|
||||
def load_model(market: str):
|
||||
path = os.path.join(MODELS_DIR, f'xgb_v25_{market}.json')
|
||||
if not os.path.exists(path):
|
||||
return None
|
||||
b = xgb.Booster()
|
||||
b.load_model(path)
|
||||
return b
|
||||
|
||||
|
||||
def predict_proba(model, X: np.ndarray, feature_cols: list, n_class: int):
|
||||
dmat = xgb.DMatrix(pd.DataFrame(X, columns=feature_cols))
|
||||
raw = model.predict(dmat)
|
||||
if n_class > 2:
|
||||
return raw.reshape(-1, n_class)
|
||||
return np.column_stack([1 - raw, raw])
|
||||
|
||||
|
||||
def consistency_score(probs: dict) -> tuple[float, list]:
|
||||
"""
|
||||
Market'ler arası tutarsızlığı hesapla.
|
||||
0 = tamamen tutarlı, 1 = tamamen çelişkili.
|
||||
|
||||
Kontrol edilen çelişkiler:
|
||||
1. OU15 üst yüksek ama OU25 üst de yüksek → ok
|
||||
OU15 üst yüksek ama OU25 alt yüksek → ÇELISKI (1 gol bekleniyor ama 2.5+ da bekleniyor?)
|
||||
|
||||
2. HT_OU05 üst yüksek ama HT sonucu draw yüksek → ÇELISKI
|
||||
|
||||
3. OU35 üst yüksek ama BTTS düşük → şüpheli
|
||||
|
||||
4. MS home yüksek ama HT away yüksek → çelişkili
|
||||
"""
|
||||
conflicts = []
|
||||
total_weight = 0
|
||||
total_conflict = 0
|
||||
|
||||
# OU tutarlılığı: P(OU25>0.5) <= P(OU15>0.5) matematiksel zorunluluk
|
||||
ou15_over = probs.get('ou15_over', 0.5)
|
||||
ou25_over = probs.get('ou25_over', 0.5)
|
||||
ou35_over = probs.get('ou35_over', 0.5)
|
||||
|
||||
# OU hiyerarşisi: ou35 <= ou25 <= ou15 olmalı
|
||||
if ou25_over > ou15_over + 0.05:
|
||||
gap = ou25_over - ou15_over
|
||||
conflicts.append(f'OU25>{ou25_over:.0%} > OU15>{ou15_over:.0%} (imkansız)')
|
||||
total_conflict += gap * 2
|
||||
total_weight += 1
|
||||
|
||||
if ou35_over > ou25_over + 0.05:
|
||||
gap = ou35_over - ou25_over
|
||||
conflicts.append(f'OU35>{ou35_over:.0%} > OU25>{ou25_over:.0%} (imkansız)')
|
||||
total_conflict += gap * 2
|
||||
total_weight += 1
|
||||
|
||||
# HT_OU05 ve HT sonuç tutarlılığı
|
||||
ht_ou05_over = probs.get('ht_ou05_over', 0.5)
|
||||
ht_draw_prob = probs.get('ht_draw', 0.34)
|
||||
|
||||
# İlk yarıda gol bekleniyor ama beraberlik de bekleniyor (0-0 draw?)
|
||||
# HT_OU05 >%70 ama HT draw >%50 → çelişkili (0-0 berabere çok?)
|
||||
if ht_ou05_over > 0.70 and ht_draw_prob > 0.50:
|
||||
conflict = min(ht_ou05_over - 0.5, ht_draw_prob - 0.4)
|
||||
conflicts.append(f'HT_OU05>{ht_ou05_over:.0%} ama HT_Draw>{ht_draw_prob:.0%}')
|
||||
total_conflict += conflict
|
||||
total_weight += 1
|
||||
|
||||
# HT_OU05 ve HT_OU15 tutarlılığı
|
||||
ht_ou15_over = probs.get('ht_ou15_over', 0.3)
|
||||
if ht_ou15_over > ht_ou05_over + 0.05:
|
||||
gap = ht_ou15_over - ht_ou05_over
|
||||
conflicts.append(f'HT_OU15>{ht_ou15_over:.0%} > HT_OU05>{ht_ou05_over:.0%} (imkansız)')
|
||||
total_conflict += gap * 2
|
||||
total_weight += 1
|
||||
|
||||
# MS ve OU tutarlılığı
|
||||
ms_home = probs.get('ms_home', 0.33)
|
||||
ms_away = probs.get('ms_away', 0.33)
|
||||
btts_yes = probs.get('btts_yes', 0.5)
|
||||
|
||||
# Tek takım galibiyeti kuvvetli ama BTTS yüksek → şüpheli
|
||||
dominant = max(ms_home, ms_away)
|
||||
if dominant > 0.65 and btts_yes > 0.65:
|
||||
conflict = (dominant - 0.5) * (btts_yes - 0.5)
|
||||
conflicts.append(f'MS dominant>{dominant:.0%} ama BTTS_Yes>{btts_yes:.0%}')
|
||||
total_conflict += conflict * 0.5
|
||||
total_weight += 1
|
||||
|
||||
# OU25 ve BTTS tutarlılığı
|
||||
# BTTS yüksekse en az 2 gol → OU25 üst de yüksek olmalı
|
||||
if btts_yes > 0.65 and ou25_over < 0.45:
|
||||
conflict = btts_yes - ou25_over
|
||||
conflicts.append(f'BTTS_Yes>{btts_yes:.0%} ama OU25>{ou25_over:.0%} düşük')
|
||||
total_conflict += conflict
|
||||
total_weight += 1
|
||||
|
||||
# OU35 üst yüksek ama BTTS düşük → şüpheli (3+ gol ama tek takım mı?)
|
||||
if ou35_over > 0.45 and btts_yes < 0.40:
|
||||
conflict = (ou35_over - 0.35) * (0.5 - btts_yes)
|
||||
conflicts.append(f'OU35>{ou35_over:.0%} ama BTTS_Yes<{btts_yes:.0%}')
|
||||
total_conflict += conflict
|
||||
total_weight += 1
|
||||
|
||||
score = min(1.0, total_conflict / max(total_weight * 0.3, 0.1))
|
||||
return score, conflicts
|
||||
|
||||
|
||||
def main():
|
||||
print('Loading data...')
|
||||
df = pd.read_csv(DATA_PATH, low_memory=False)
|
||||
|
||||
# Son %20 = test seti (kronolojik)
|
||||
df = df.sort_values('mst_utc')
|
||||
n_test = int(len(df) * 0.20)
|
||||
df_test = df.tail(n_test).copy()
|
||||
print(f'Test seti: {len(df_test):,} maç')
|
||||
|
||||
feature_cols = [c for c in df.columns if c not in SKIP_COLS]
|
||||
|
||||
# Modelleri yükle
|
||||
print('Modeller yükleniyor...')
|
||||
models = {
|
||||
'ms': (load_model('ms'), 3),
|
||||
'ou15': (load_model('ou15'), 2),
|
||||
'ou25': (load_model('ou25'), 2),
|
||||
'ou35': (load_model('ou35'), 2),
|
||||
'btts': (load_model('btts'), 2),
|
||||
'ht_result':(load_model('ht_result'), 3),
|
||||
'ht_ou05': (load_model('ht_ou05'), 2),
|
||||
'ht_ou15': (load_model('ht_ou15'), 2),
|
||||
}
|
||||
models = {k: v for k, v in models.items() if v[0] is not None}
|
||||
print(f'Yüklenen model: {list(models.keys())}')
|
||||
|
||||
X = df_test[feature_cols].fillna(0).values
|
||||
|
||||
# Tüm tahminleri al
|
||||
print('Tahminler yapılıyor...')
|
||||
preds = {}
|
||||
for mkey, (model, n_class) in models.items():
|
||||
p = predict_proba(model, X, feature_cols, n_class)
|
||||
preds[mkey] = p
|
||||
|
||||
# Her maç için tutarsızlık skoru ve tahmin kararı
|
||||
results = []
|
||||
for i in range(len(df_test)):
|
||||
row = df_test.iloc[i]
|
||||
|
||||
# Olasılıkları topla
|
||||
probs = {}
|
||||
if 'ms' in preds:
|
||||
probs['ms_home'] = preds['ms'][i][0]
|
||||
probs['ms_draw'] = preds['ms'][i][1]
|
||||
probs['ms_away'] = preds['ms'][i][2]
|
||||
if 'ou15' in preds:
|
||||
probs['ou15_over'] = preds['ou15'][i][1]
|
||||
if 'ou25' in preds:
|
||||
probs['ou25_over'] = preds['ou25'][i][1]
|
||||
if 'ou35' in preds:
|
||||
probs['ou35_over'] = preds['ou35'][i][1]
|
||||
if 'btts' in preds:
|
||||
probs['btts_yes'] = preds['btts'][i][1]
|
||||
if 'ht_result' in preds:
|
||||
probs['ht_home'] = preds['ht_result'][i][0]
|
||||
probs['ht_draw'] = preds['ht_result'][i][1]
|
||||
probs['ht_away'] = preds['ht_result'][i][2]
|
||||
if 'ht_ou05' in preds:
|
||||
probs['ht_ou05_over'] = preds['ht_ou05'][i][1]
|
||||
if 'ht_ou15' in preds:
|
||||
probs['ht_ou15_over'] = preds['ht_ou15'][i][1]
|
||||
|
||||
c_score, conflicts = consistency_score(probs)
|
||||
|
||||
# Gerçek sonuçlar
|
||||
actual = {
|
||||
'ms': int(row.get('label_ms', -1)),
|
||||
'ou15': int(row.get('label_ou15', -1)),
|
||||
'ou25': int(row.get('label_ou25', -1)),
|
||||
'ou35': int(row.get('label_ou35', -1)),
|
||||
'btts': int(row.get('label_btts', -1)),
|
||||
}
|
||||
|
||||
# Her market için tahmin ve doğruluk
|
||||
market_results = {}
|
||||
for mkt, label_key in [('ms','ms'),('ou15','ou15'),('ou25','ou25'),
|
||||
('ou35','ou35'),('btts','btts')]:
|
||||
if mkt not in preds or actual[label_key] < 0:
|
||||
continue
|
||||
pred_class = int(np.argmax(preds[mkt][i]))
|
||||
correct = int(pred_class == actual[label_key])
|
||||
|
||||
# Odds (implied prob → odds = 1/prob)
|
||||
pred_prob = float(preds[mkt][i][pred_class])
|
||||
implied_odds = 1 / pred_prob if pred_prob > 0.01 else 10.0
|
||||
# ROI hesabı: 1 birim bahis, kazanırsa (odds-1) kazanç, kaybederse -1
|
||||
roi = (implied_odds - 1) * correct - (1 - correct)
|
||||
|
||||
market_results[mkt] = {
|
||||
'pred': pred_class,
|
||||
'actual': actual[label_key],
|
||||
'correct': correct,
|
||||
'prob': pred_prob,
|
||||
'roi': roi,
|
||||
}
|
||||
|
||||
results.append({
|
||||
'idx': i,
|
||||
'consistency_score': c_score,
|
||||
'conflicts': conflicts,
|
||||
'probs': probs,
|
||||
'market_results': market_results,
|
||||
})
|
||||
|
||||
df_results = pd.DataFrame([{
|
||||
'consistency_score': r['consistency_score'],
|
||||
'n_conflicts': len(r['conflicts']),
|
||||
**{f'{m}_correct': r['market_results'].get(m, {}).get('correct', None)
|
||||
for m in ['ms','ou15','ou25','ou35','btts']},
|
||||
**{f'{m}_roi': r['market_results'].get(m, {}).get('roi', None)
|
||||
for m in ['ms','ou15','ou25','ou35','btts']},
|
||||
} for r in results])
|
||||
|
||||
# ── Analiz ──────────────────────────────────────────────────────────
|
||||
print(f'\n{"="*70}')
|
||||
print('TUTARSIZLIK ANALİZİ')
|
||||
print(f'{"="*70}')
|
||||
|
||||
thresholds = [0.0, 0.1, 0.2, 0.3, 0.5]
|
||||
markets = ['ms', 'ou15', 'ou25', 'ou35', 'btts']
|
||||
|
||||
for t in thresholds:
|
||||
mask = df_results['consistency_score'] <= t
|
||||
n = mask.sum()
|
||||
if n < 50:
|
||||
continue
|
||||
|
||||
print(f'\n[Tutarsızlık <= {t:.1f}] → {n:,} maç ({n/len(df_results)*100:.0f}%)')
|
||||
print(f' {"Market":<8} {"HitRate":>8} {"ROI/bahis":>10} {"Toplam ROI":>12}')
|
||||
print(f' {"-"*42}')
|
||||
for m in markets:
|
||||
col_c = f'{m}_correct'
|
||||
col_r = f'{m}_roi'
|
||||
if col_c not in df_results.columns:
|
||||
continue
|
||||
sub = df_results[mask][col_c].dropna()
|
||||
roi_sub = df_results[mask][col_r].dropna()
|
||||
if len(sub) < 20:
|
||||
continue
|
||||
hit = sub.mean()
|
||||
avg_roi = roi_sub.mean()
|
||||
total_roi = roi_sub.sum()
|
||||
print(f' {m:<8} {hit:>7.1%} {avg_roi:>+9.3f} {total_roi:>+11.1f}')
|
||||
|
||||
# Çelişki türlerine göre breakdown
|
||||
print(f'\n{"="*70}')
|
||||
print('EN SIK ÇELIŞKILER')
|
||||
print(f'{"="*70}')
|
||||
all_conflicts = [c for r in results for c in r['conflicts']]
|
||||
from collections import Counter
|
||||
for conflict, cnt in Counter(all_conflicts).most_common(10):
|
||||
print(f' {cnt:>5}x {conflict}')
|
||||
|
||||
# Tutarsızlık dağılımı
|
||||
print(f'\n{"="*70}')
|
||||
print('TUTARSIZLIK DAĞILIMI')
|
||||
print(f'{"="*70}')
|
||||
for label, lo, hi in [
|
||||
('Tamamen tutarlı', 0.0, 0.05),
|
||||
('Çok tutarlı', 0.05, 0.15),
|
||||
('Orta', 0.15, 0.30),
|
||||
('Tutarsız', 0.30, 0.50),
|
||||
('Çok tutarsız', 0.50, 1.01),
|
||||
]:
|
||||
mask = (df_results['consistency_score'] >= lo) & (df_results['consistency_score'] < hi)
|
||||
n = mask.sum()
|
||||
ou25_hit = df_results[mask]['ou25_correct'].mean()
|
||||
ms_hit = df_results[mask]['ms_correct'].mean()
|
||||
print(f' {label:<20} {n:>6,} maç ({n/len(df_results)*100:>4.0f}%) | '
|
||||
f'MS={ms_hit:.0%} OU25={ou25_hit:.0%}')
|
||||
|
||||
# Raporu kaydet
|
||||
report = {
|
||||
'total_test': len(df_results),
|
||||
'thresholds': {},
|
||||
}
|
||||
for t in thresholds:
|
||||
mask = df_results['consistency_score'] <= t
|
||||
n = mask.sum()
|
||||
report['thresholds'][str(t)] = {
|
||||
'n_matches': int(n),
|
||||
'pct': round(n/len(df_results)*100, 1),
|
||||
'markets': {},
|
||||
}
|
||||
for m in markets:
|
||||
col_c = f'{m}_correct'
|
||||
col_r = f'{m}_roi'
|
||||
if col_c not in df_results.columns:
|
||||
continue
|
||||
sub_c = df_results[mask][col_c].dropna()
|
||||
sub_r = df_results[mask][col_r].dropna()
|
||||
if len(sub_c) > 0:
|
||||
report['thresholds'][str(t)]['markets'][m] = {
|
||||
'hit_rate': round(float(sub_c.mean()), 4),
|
||||
'avg_roi': round(float(sub_r.mean()), 4),
|
||||
'total_roi': round(float(sub_r.sum()), 2),
|
||||
}
|
||||
|
||||
out_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
|
||||
'reports', 'backtest_consistency.json')
|
||||
with open(out_path, 'w') as f:
|
||||
json.dump(report, f, indent=2)
|
||||
print(f'\nRapor: {out_path}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -0,0 +1,310 @@
|
||||
"""
|
||||
League Model Backtest — Son 100+ Maç
|
||||
======================================
|
||||
Her lig için en son 100-200 maçı (eğitim datasından bağımsız, test seti)
|
||||
lig bazlı modelle tahmin eder ve gerçek sonuçla karşılaştırır.
|
||||
|
||||
Usage:
|
||||
python scripts/backtest_league_models.py
|
||||
python scripts/backtest_league_models.py --min-matches 150
|
||||
"""
|
||||
|
||||
import os, sys, json, warnings, argparse
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import xgboost as xgb
|
||||
from sklearn.metrics import accuracy_score
|
||||
|
||||
warnings.filterwarnings("ignore")
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from models.league_model import get_league_model_loader, MARKET_META, FILE_TO_SIGNAL
|
||||
|
||||
AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
DATA_PATH = os.path.join(AI_ENGINE_DIR, "data", "training_data.csv")
|
||||
REPORTS_DIR = os.path.join(AI_ENGINE_DIR, "reports")
|
||||
QL_PATH = os.path.join(os.path.dirname(AI_ENGINE_DIR), "qualified_leagues.json")
|
||||
|
||||
# Gerçek label kolonları (CSV'den)
|
||||
LABEL_COLS = {
|
||||
"MS": "label_ms",
|
||||
"OU15": "label_ou15",
|
||||
"OU25": "label_ou25",
|
||||
"OU35": "label_ou35",
|
||||
"BTTS": "label_btts",
|
||||
"HT": "label_ht_result",
|
||||
"HT_OU05": "label_ht_ou05",
|
||||
"HT_OU15": "label_ht_ou15",
|
||||
"HTFT": "label_ht_ft",
|
||||
"OE": "label_odd_even",
|
||||
"CARDS": "label_cards_ou45",
|
||||
"HCAP": "label_handicap_ms",
|
||||
}
|
||||
|
||||
# Model dosya adı → signal key eşlemesi
|
||||
SIGNAL_TO_FILE = {v: k for k, v in FILE_TO_SIGNAL.items()}
|
||||
|
||||
SKIP_COLS = {
|
||||
"match_id","home_team_id","away_team_id","league_id","mst_utc",
|
||||
"score_home","score_away","total_goals","ht_score_home","ht_score_away","ht_total_goals",
|
||||
"label_ms","label_ou05","label_ou15","label_ou25","label_ou35","label_btts",
|
||||
"label_ht_result","label_ht_ou05","label_ht_ou15","label_ht_ft",
|
||||
"label_odd_even","label_yellow_cards","label_cards_ou45","label_handicap_ms",
|
||||
}
|
||||
|
||||
|
||||
def backtest_league(
|
||||
league_id: str,
|
||||
df_league: pd.DataFrame,
|
||||
feature_cols: list,
|
||||
league_model,
|
||||
n_test: int,
|
||||
) -> dict:
|
||||
"""Son n_test maçı backtest et, her market için doğruluk döndür."""
|
||||
df_sorted = df_league.sort_values("mst_utc")
|
||||
df_test = df_sorted.tail(n_test)
|
||||
|
||||
X = df_test[feature_cols].fillna(0)
|
||||
results = {}
|
||||
|
||||
for sig_key, mfile_key in SIGNAL_TO_FILE.items():
|
||||
label_col = LABEL_COLS.get(sig_key)
|
||||
if not label_col or label_col not in df_test.columns:
|
||||
continue
|
||||
|
||||
y_true = df_test[label_col].dropna().values
|
||||
if len(y_true) < 30:
|
||||
continue
|
||||
|
||||
# League-specific model varsa kullan
|
||||
if league_model and league_model.has_market(mfile_key):
|
||||
probs_list = []
|
||||
preds = []
|
||||
for _, row in df_test.iterrows():
|
||||
feat = row[feature_cols].fillna(0).to_dict()
|
||||
probs = league_model.predict_market(mfile_key, feat)
|
||||
if probs:
|
||||
best = max(probs, key=probs.__getitem__)
|
||||
meta = MARKET_META[mfile_key]
|
||||
labels = meta[1]
|
||||
pred_idx = labels.index(best)
|
||||
preds.append(pred_idx)
|
||||
probs_list.append(list(probs.values()))
|
||||
|
||||
if not preds:
|
||||
continue
|
||||
|
||||
y_valid = df_test[label_col].dropna()
|
||||
if len(preds) != len(y_valid):
|
||||
min_len = min(len(preds), len(y_valid))
|
||||
preds = preds[:min_len]
|
||||
y_valid = y_valid.values[:min_len]
|
||||
else:
|
||||
y_valid = y_valid.values
|
||||
|
||||
acc = accuracy_score(y_valid, preds)
|
||||
results[sig_key] = {
|
||||
"accuracy": round(acc, 4),
|
||||
"n": len(preds),
|
||||
"source": "league_specific",
|
||||
}
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def backtest_with_general_v25(
|
||||
df_test: pd.DataFrame,
|
||||
feature_cols: list,
|
||||
) -> dict:
|
||||
"""Genel V25 modeli ile backtest."""
|
||||
try:
|
||||
from models.v25_ensemble import get_v25_predictor
|
||||
v25 = get_v25_predictor()
|
||||
if not v25._loaded:
|
||||
v25.load_models()
|
||||
except Exception as e:
|
||||
return {}
|
||||
|
||||
X = df_test[feature_cols].fillna(0)
|
||||
results = {}
|
||||
|
||||
mkey_map = {
|
||||
"MS": ("ms", {"1": 0, "X": 1, "2": 2}),
|
||||
"OU15": ("ou15", {"Over": 0, "Under": 1}),
|
||||
"OU25": ("ou25", {"Over": 0, "Under": 1}),
|
||||
"OU35": ("ou35", {"Over": 0, "Under": 1}),
|
||||
"BTTS": ("btts", {"Yes": 0, "No": 1}),
|
||||
}
|
||||
|
||||
for sig_key, (mkey, label_to_idx) in mkey_map.items():
|
||||
label_col = LABEL_COLS.get(sig_key)
|
||||
if not label_col or label_col not in df_test.columns:
|
||||
continue
|
||||
y_true = df_test[label_col].dropna().values
|
||||
if len(y_true) < 30 or not v25.has_market(mkey):
|
||||
continue
|
||||
|
||||
try:
|
||||
dmat = xgb.DMatrix(X.values, feature_names=feature_cols)
|
||||
models_v25 = v25.models.get(mkey, {})
|
||||
if "xgb" not in models_v25:
|
||||
continue
|
||||
raw = models_v25["xgb"].predict(dmat)
|
||||
num_class = list(MARKET_META.get(mkey, (2,)))[0]
|
||||
|
||||
if num_class > 2:
|
||||
raw = raw.reshape(-1, num_class)
|
||||
preds = np.argmax(raw, axis=1)
|
||||
else:
|
||||
preds = (raw >= 0.5).astype(int)
|
||||
|
||||
acc = accuracy_score(y_true, preds)
|
||||
results[sig_key] = {
|
||||
"accuracy": round(acc, 4),
|
||||
"n": len(preds),
|
||||
"source": "general_v25",
|
||||
}
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--min-matches", type=int, default=100)
|
||||
parser.add_argument("--test-size", type=int, default=150,
|
||||
help="Son kaç maçı test için kullan (min 100)")
|
||||
args = parser.parse_args()
|
||||
n_test = max(args.min_matches, args.test_size)
|
||||
|
||||
print(f"Loading training data ...")
|
||||
df = pd.read_csv(DATA_PATH, low_memory=False)
|
||||
feature_cols = [c for c in df.columns if c not in SKIP_COLS]
|
||||
print(f" {len(df):,} maç | {len(feature_cols)} feature")
|
||||
|
||||
qualified = json.load(open(QL_PATH)) if os.path.exists(QL_PATH) else []
|
||||
loader = get_league_model_loader()
|
||||
|
||||
try:
|
||||
import psycopg2
|
||||
from data.db import get_clean_dsn
|
||||
conn = psycopg2.connect(get_clean_dsn())
|
||||
cur = conn.cursor()
|
||||
cur.execute("SELECT id, name FROM leagues WHERE id = ANY(%s)", (qualified,))
|
||||
league_names = {r[0]: r[1] for r in cur.fetchall()}
|
||||
conn.close()
|
||||
except Exception:
|
||||
league_names = {}
|
||||
|
||||
counts = df[df["league_id"].isin(qualified)].groupby("league_id").size()
|
||||
leagues_to_test = counts[counts >= n_test].index.tolist()
|
||||
print(f"\nBacktest: {len(leagues_to_test)} lig (>={n_test} maç) | son {n_test} maç kullanılacak\n")
|
||||
|
||||
all_results = []
|
||||
markets_order = ["MS", "OU15", "OU25", "OU35", "BTTS", "HT", "HT_OU05", "HT_OU15", "HTFT", "OE", "CARDS", "HCAP"]
|
||||
|
||||
header = f"{'Liga':<35} {'Maç':>5} | " + " | ".join(f"{m:>7}" for m in markets_order)
|
||||
print(header)
|
||||
print("-" * len(header))
|
||||
|
||||
for league_id in leagues_to_test:
|
||||
df_league = df[df["league_id"] == league_id].copy()
|
||||
name = league_names.get(league_id, league_id[:20])
|
||||
|
||||
league_model = loader.get(league_id)
|
||||
|
||||
if league_model and league_model.models:
|
||||
# Batch predict from CSV features (fast)
|
||||
df_test = df_league.sort_values("mst_utc").tail(n_test)
|
||||
X = df_test[feature_cols].fillna(0)
|
||||
mkt_results = {}
|
||||
|
||||
for mfile_key in list(league_model.models.keys()):
|
||||
sig_key = FILE_TO_SIGNAL.get(mfile_key)
|
||||
if not sig_key:
|
||||
continue
|
||||
label_col = LABEL_COLS.get(sig_key)
|
||||
if not label_col or label_col not in df_test.columns:
|
||||
continue
|
||||
y_true = df_test[label_col].dropna().values
|
||||
if len(y_true) < 30:
|
||||
continue
|
||||
|
||||
try:
|
||||
dmat = xgb.DMatrix(X.values, feature_names=feature_cols)
|
||||
raw = league_model.models[mfile_key].predict(dmat)
|
||||
nc = MARKET_META[mfile_key][0]
|
||||
if nc > 2:
|
||||
preds = np.argmax(raw.reshape(-1, nc), axis=1)
|
||||
else:
|
||||
preds = (raw >= 0.5).astype(int)
|
||||
|
||||
acc = accuracy_score(y_true[:len(preds)], preds[:len(y_true)])
|
||||
mkt_results[sig_key] = {"accuracy": round(float(acc), 4), "n": len(preds), "source": "league_xgb"}
|
||||
except Exception as e:
|
||||
mkt_results[sig_key] = {"error": str(e)}
|
||||
|
||||
# Fill missing markets with general V25
|
||||
missing_mkts_df = df_league.sort_values("mst_utc").tail(n_test)
|
||||
gen_results = backtest_with_general_v25(missing_mkts_df, feature_cols)
|
||||
for k, v in gen_results.items():
|
||||
if k not in mkt_results:
|
||||
mkt_results[k] = {**v, "source": "general_v25_fallback"}
|
||||
else:
|
||||
# No league model — use general V25
|
||||
df_test = df_league.sort_values("mst_utc").tail(n_test)
|
||||
mkt_results = backtest_with_general_v25(df_test, feature_cols)
|
||||
for k in mkt_results:
|
||||
mkt_results[k]["source"] = "general_v25"
|
||||
|
||||
n_used = min(n_test, len(df_league))
|
||||
|
||||
# Print row
|
||||
accs = []
|
||||
for m in markets_order:
|
||||
r = mkt_results.get(m, {})
|
||||
if "accuracy" in r:
|
||||
accs.append(f"{r['accuracy']*100:>6.1f}%")
|
||||
else:
|
||||
accs.append(f"{'—':>7}")
|
||||
print(f"{name:<35} {n_used:>5} | " + " | ".join(accs))
|
||||
|
||||
all_results.append({
|
||||
"league_id": league_id,
|
||||
"league_name": name,
|
||||
"n_tested": n_used,
|
||||
"markets": mkt_results,
|
||||
})
|
||||
|
||||
# ── Özet ──────────────────────────────────────────────────────
|
||||
print("\n" + "=" * len(header))
|
||||
print("ORTALAMA DOĞRULUK (tüm ligler):")
|
||||
for m in markets_order:
|
||||
accs = [r["markets"][m]["accuracy"] for r in all_results if m in r["markets"] and "accuracy" in r["markets"][m]]
|
||||
if accs:
|
||||
print(f" {m:<10}: {np.mean(accs)*100:.1f}% (min={min(accs)*100:.1f}% max={max(accs)*100:.1f}% n_leagues={len(accs)})")
|
||||
|
||||
# En iyi / en kötü MS ligleri
|
||||
ms_sorted = sorted(
|
||||
[(r["league_name"], r["markets"].get("MS",{}).get("accuracy",0), r["n_tested"])
|
||||
for r in all_results if "MS" in r["markets"] and "accuracy" in r["markets"]["MS"]],
|
||||
key=lambda x: x[1], reverse=True
|
||||
)
|
||||
print("\nEN İYİ MS (Top 10):")
|
||||
for name, acc, n in ms_sorted[:10]:
|
||||
print(f" {name:<35} {acc*100:.1f}% ({n} maç)")
|
||||
print("\nEN KÖTÜ MS (Bottom 10):")
|
||||
for name, acc, n in ms_sorted[-10:]:
|
||||
print(f" {name:<35} {acc*100:.1f}% ({n} maç)")
|
||||
|
||||
# Save
|
||||
report = {"generated_at": pd.Timestamp.now().isoformat(), "n_test_per_league": n_test, "results": all_results}
|
||||
out_path = os.path.join(REPORTS_DIR, "backtest_league_results.json")
|
||||
with open(out_path, "w") as f:
|
||||
json.dump(report, f, indent=2)
|
||||
print(f"\nRapor: {out_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
+113
-200
@@ -1,223 +1,136 @@
|
||||
"""
|
||||
Real AI Engine Backtest Script
|
||||
==============================
|
||||
Uses the ACTUAL models (V20/V25 Ensemble) to predict historical matches.
|
||||
|
||||
Usage:
|
||||
python ai-engine/scripts/backtest_real.py
|
||||
Gerçek Odds Bazlı Backtest
|
||||
============================
|
||||
Model olasılığı vs gerçek bookmaker odds karşılaştırır.
|
||||
Edge varsa bahis açıldığı varsayılır, gerçek ROI hesaplanır.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import time
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
from datetime import datetime
|
||||
import os, sys, json
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import xgboost as xgb
|
||||
|
||||
# Add paths
|
||||
AI_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
ROOT_DIR = os.path.dirname(AI_DIR)
|
||||
sys.path.insert(0, ROOT_DIR)
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
# Fix for Windows path issues in scripts
|
||||
if "scripts" in os.path.basename(AI_DIR):
|
||||
ROOT_DIR = os.path.dirname(ROOT_DIR) # One level up if inside scripts folder
|
||||
DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data', 'training_data.csv')
|
||||
MODELS_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'models', 'v25')
|
||||
REPORT_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'reports')
|
||||
|
||||
from services.single_match_orchestrator import get_single_match_orchestrator, MatchData
|
||||
SKIP_COLS = {
|
||||
'match_id','home_team_id','away_team_id','league_id','mst_utc',
|
||||
'score_home','score_away','total_goals','ht_score_home','ht_score_away','ht_total_goals',
|
||||
'label_ms','label_ou05','label_ou15','label_ou25','label_ou35','label_btts',
|
||||
'label_ht_result','label_ht_ou05','label_ht_ou15','label_ht_ft',
|
||||
'label_odd_even','label_yellow_cards','label_cards_ou45','label_handicap_ms',
|
||||
}
|
||||
|
||||
def get_clean_dsn() -> str:
|
||||
return "postgresql://suggestbet:SuGGesT2026SecuRe@localhost:15432/boilerplate_db"
|
||||
# (model_key, n_class, pred_class, label_col, odds_col, isim)
|
||||
MARKETS = [
|
||||
('ms', 3, 0, 'label_ms', 'odds_ms_h', 'MS-Ev'),
|
||||
('ms', 3, 1, 'label_ms', 'odds_ms_d', 'MS-Ber'),
|
||||
('ms', 3, 2, 'label_ms', 'odds_ms_a', 'MS-Dep'),
|
||||
('ou15', 2, 1, 'label_ou15', 'odds_ou15_o', 'OU15-Ust'),
|
||||
('ou15', 2, 0, 'label_ou15', 'odds_ou15_u', 'OU15-Alt'),
|
||||
('ou25', 2, 1, 'label_ou25', 'odds_ou25_o', 'OU25-Ust'),
|
||||
('ou25', 2, 0, 'label_ou25', 'odds_ou25_u', 'OU25-Alt'),
|
||||
('ou35', 2, 1, 'label_ou35', 'odds_ou35_o', 'OU35-Ust'),
|
||||
('ou35', 2, 0, 'label_ou35', 'odds_ou35_u', 'OU35-Alt'),
|
||||
('btts', 2, 1, 'label_btts', 'odds_btts_y', 'BTTS-Var'),
|
||||
('btts', 2, 0, 'label_btts', 'odds_btts_n', 'BTTS-Yok'),
|
||||
]
|
||||
|
||||
def run_backtest():
|
||||
print("🚀 REAL AI BACKTEST: Sept 13, 2024 - Top Leagues")
|
||||
print("🧠 Engine: V30 Ensemble (V20+V25)")
|
||||
print("="*60)
|
||||
MIN_ODDS = 1.10
|
||||
MAX_ODDS = 10.0
|
||||
|
||||
# Load Top Leagues
|
||||
leagues_path = os.path.join(ROOT_DIR, "top_leagues.json")
|
||||
try:
|
||||
with open(leagues_path, 'r') as f:
|
||||
top_leagues = json.load(f)
|
||||
league_ids = tuple(str(lid) for lid in top_leagues)
|
||||
print(f"📋 Loaded {len(top_leagues)} top leagues.")
|
||||
except Exception as e:
|
||||
print(f"❌ Error loading top_leagues.json: {e}")
|
||||
return
|
||||
|
||||
# Date Range (Sept 13, 2024)
|
||||
start_dt = datetime(2024, 9, 13, 0, 0, 0)
|
||||
end_dt = datetime(2024, 9, 13, 23, 59, 59)
|
||||
start_ts = int(start_dt.timestamp() * 1000)
|
||||
end_ts = int(end_dt.timestamp() * 1000)
|
||||
def load_model(market):
|
||||
path = os.path.join(MODELS_DIR, f'xgb_v25_{market}.json')
|
||||
if not os.path.exists(path):
|
||||
return None
|
||||
b = xgb.Booster()
|
||||
b.load_model(path)
|
||||
return b
|
||||
|
||||
dsn = get_clean_dsn()
|
||||
conn = psycopg2.connect(dsn)
|
||||
cur = conn.cursor(cursor_factory=RealDictCursor)
|
||||
|
||||
# Fetch Matches
|
||||
cur.execute("""
|
||||
SELECT m.id, m.match_name, m.home_team_id, m.away_team_id,
|
||||
m.mst_utc, m.league_id, m.status, m.score_home, m.score_away,
|
||||
t1.name as home_team, t2.name as away_team,
|
||||
l.name as league_name
|
||||
FROM matches m
|
||||
LEFT JOIN teams t1 ON m.home_team_id = t1.id
|
||||
LEFT JOIN teams t2 ON m.away_team_id = t2.id
|
||||
LEFT JOIN leagues l ON m.league_id = l.id
|
||||
WHERE m.mst_utc BETWEEN %s AND %s
|
||||
AND m.league_id IN %s
|
||||
AND m.status = 'FT'
|
||||
ORDER BY m.mst_utc ASC
|
||||
LIMIT 20 -- Limit to 20 matches to avoid running for hours on a single backtest
|
||||
""", (start_ts, end_ts, league_ids))
|
||||
|
||||
rows = cur.fetchall()
|
||||
print(f"📊 Found {len(rows)} finished matches. Starting AI Analysis...")
|
||||
def main():
|
||||
print('Veri yukleniyor...')
|
||||
df = pd.read_csv(DATA_PATH, low_memory=False)
|
||||
df = df.sort_values('mst_utc')
|
||||
n_test = int(len(df) * 0.20)
|
||||
df_test = df.tail(n_test).copy().reset_index(drop=True)
|
||||
print(f'Test seti: {len(df_test):,} mac')
|
||||
|
||||
if not rows:
|
||||
print("⚠️ No matches found for this date.")
|
||||
cur.close()
|
||||
conn.close()
|
||||
return
|
||||
feature_cols = [c for c in df.columns if c not in SKIP_COLS]
|
||||
X = df_test[feature_cols].fillna(0).values
|
||||
|
||||
# Initialize AI Engine
|
||||
try:
|
||||
orchestrator = get_single_match_orchestrator()
|
||||
print("✅ AI Engine (SingleMatchOrchestrator) Loaded.")
|
||||
except Exception as e:
|
||||
print(f"❌ Failed to load AI Engine: {e}")
|
||||
print("💡 Make sure models are trained/present in ai-engine/models/")
|
||||
cur.close()
|
||||
conn.close()
|
||||
return
|
||||
# Modelleri yukle
|
||||
loaded = {}
|
||||
for mkey, n_class, *_ in MARKETS:
|
||||
if mkey not in loaded:
|
||||
m = load_model(mkey)
|
||||
if m:
|
||||
loaded[mkey] = (m, n_class)
|
||||
print(f'Modeller: {list(loaded.keys())}')
|
||||
|
||||
# ─── Backtest Loop ───
|
||||
total_matches_analyzed = 0
|
||||
bets_skipped = 0
|
||||
bets_played = 0
|
||||
bets_won = 0
|
||||
total_profit = 0.0
|
||||
|
||||
# Thresholds matching the NEW Skip Logic
|
||||
MIN_CONF = 45.0
|
||||
# Toplu tahmin
|
||||
raw_preds = {}
|
||||
for mkey, (model, n_class) in loaded.items():
|
||||
dmat = xgb.DMatrix(pd.DataFrame(X, columns=feature_cols))
|
||||
raw = model.predict(dmat)
|
||||
raw_preds[mkey] = raw.reshape(-1, n_class) if n_class > 2 else np.column_stack([1-raw, raw])
|
||||
|
||||
start_time = time.time()
|
||||
# Backtest
|
||||
all_results = []
|
||||
print(f'\n{"Market":<12} {"Edge>=":>7} {"Bahis":>7} {"Hit%":>7} {"AvgOdds":>9} {"ROI/b":>8} {"Toplam":>10}')
|
||||
print('-' * 65)
|
||||
|
||||
for i, row in enumerate(rows):
|
||||
match_id = str(row['id'])
|
||||
home_team = row['home_team']
|
||||
away_team = row['away_team']
|
||||
home_score = row['score_home']
|
||||
away_score = row['score_away']
|
||||
|
||||
print(f"\n[{i+1}/{len(rows)}] Analyzing: {home_team} vs {away_team} ...")
|
||||
for mkey, n_class, pred_cls, label_col, odds_col, isim in MARKETS:
|
||||
if mkey not in raw_preds or label_col not in df_test.columns or odds_col not in df_test.columns:
|
||||
continue
|
||||
|
||||
try:
|
||||
# 1. AI PREDICTION (Actual Model Call)
|
||||
prediction = orchestrator.analyze_match(match_id)
|
||||
|
||||
if not prediction:
|
||||
print(f" ⚠️ AI returned no prediction.")
|
||||
mp = raw_preds[mkey][:, pred_cls]
|
||||
act = pd.to_numeric(df_test[label_col], errors='coerce').values
|
||||
bko = pd.to_numeric(df_test[odds_col], errors='coerce').values
|
||||
|
||||
valid = (~np.isnan(act) & ~np.isnan(bko) &
|
||||
(bko >= MIN_ODDS) & (bko <= MAX_ODDS))
|
||||
mp, act, bko = mp[valid], act[valid].astype(int), bko[valid]
|
||||
implied = 1.0 / bko
|
||||
edge = mp - implied
|
||||
|
||||
print(f'\n{isim}:')
|
||||
for min_e in [0.02, 0.03, 0.05, 0.07, 0.10]:
|
||||
mask = edge >= min_e
|
||||
n = mask.sum()
|
||||
if n < 20:
|
||||
continue
|
||||
won = (act[mask] == pred_cls).astype(int)
|
||||
roi = (bko[mask] - 1) * won - (1 - won)
|
||||
hit = won.mean()
|
||||
avg_roi = roi.mean()
|
||||
total = roi.sum()
|
||||
avg_odds = bko[mask].mean()
|
||||
sign = '+' if total > 0 else ''
|
||||
print(f' edge>={min_e:+.0%} n={n:>5,} hit={hit:.1%} odds={avg_odds:.2f} roi/b={avg_roi:+.3f} toplam={sign}{total:.1f}')
|
||||
all_results.append({'market': isim, 'min_edge': min_e, 'n': n,
|
||||
'hit': round(hit, 4), 'avg_odds': round(avg_odds, 3),
|
||||
'avg_roi': round(avg_roi, 4), 'total_roi': round(total, 2)})
|
||||
|
||||
total_matches_analyzed += 1
|
||||
|
||||
# 2. Extract Main Pick
|
||||
main_pick = prediction.get("main_pick") or {}
|
||||
pick_name = main_pick.get("pick")
|
||||
confidence = main_pick.get("confidence", 0)
|
||||
odds = main_pick.get("odds", 0)
|
||||
# En iyi
|
||||
winners = sorted([r for r in all_results if r['total_roi'] > 0],
|
||||
key=lambda x: x['avg_roi'], reverse=True)
|
||||
print(f'\n{"="*65}')
|
||||
print('KAZANCLI KOMBINASYONLAR (total_roi > 0):')
|
||||
print(f'{"="*65}')
|
||||
for r in winners[:20]:
|
||||
print(f' {r["market"]:<12} edge>={r["min_edge"]:+.0%} | n={r["n"]:>5,} | '
|
||||
f'hit={r["hit"]:.0%} | roi/b={r["avg_roi"]:+.3f} | toplam={r["total_roi"]:+.1f}')
|
||||
|
||||
if not pick_name or not confidence:
|
||||
print(f" ⚠️ No main pick found in prediction.")
|
||||
continue
|
||||
os.makedirs(REPORT_DIR, exist_ok=True)
|
||||
with open(os.path.join(REPORT_DIR, 'backtest_real_odds.json'), 'w') as f:
|
||||
json.dump(all_results, f, indent=2)
|
||||
print(f'\nRapor kaydedildi.')
|
||||
|
||||
print(f" 🤖 Pick: {pick_name} | Conf: {confidence}% | Odds: {odds}")
|
||||
|
||||
# 3. Apply Skip Logic (New Backtest Logic)
|
||||
if confidence < MIN_CONF:
|
||||
print(f" 🚫 SKIPPED (Confidence {confidence}% < {MIN_CONF}%)")
|
||||
bets_skipped += 1
|
||||
continue
|
||||
|
||||
if odds > 0:
|
||||
implied_prob = 1.0 / odds
|
||||
my_prob = confidence / 100.0
|
||||
if my_prob - implied_prob < -0.03: # Negative edge
|
||||
print(f" 🚫 SKIPPED (Negative Edge)")
|
||||
bets_skipped += 1
|
||||
continue
|
||||
|
||||
# 4. Bet Played
|
||||
bets_played += 1
|
||||
print(f" 🎲 BET PLAYED: {pick_name} @ {odds}")
|
||||
|
||||
# 5. Resolve Bet
|
||||
won = False
|
||||
# Basic resolution logic (Need to parse pick_name like "1", "X", "2", "2.5 Üst", etc.)
|
||||
pick_clean = str(pick_name).upper()
|
||||
|
||||
# MS
|
||||
if pick_clean in ["1", "MS 1"] and home_score > away_score: won = True
|
||||
elif pick_clean in ["X", "MS X"] and home_score == away_score: won = True
|
||||
elif pick_clean in ["2", "MS 2"] and away_score > home_score: won = True
|
||||
|
||||
# OU25
|
||||
elif "ÜST" in pick_clean or "OVER" in pick_clean:
|
||||
if (home_score + away_score) > 2.5: won = True
|
||||
elif "ALT" in pick_clean or "UNDER" in pick_clean:
|
||||
if (home_score + away_score) < 2.5: won = True
|
||||
|
||||
# BTTS
|
||||
elif "VAR" in pick_clean and home_score > 0 and away_score > 0: won = True
|
||||
elif "YOK" in pick_clean and (home_score == 0 or away_score == 0): won = True
|
||||
|
||||
if won:
|
||||
bets_won += 1
|
||||
profit = odds - 1.0
|
||||
print(f" ✅ WON! (+{profit:.2f} units)")
|
||||
else:
|
||||
profit = -1.0
|
||||
print(f" ❌ LOST! (-1.00 units)")
|
||||
|
||||
total_profit += profit
|
||||
|
||||
except Exception as e:
|
||||
print(f" 💥 Error during analysis: {e}")
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
# ─── FINAL REPORT ───
|
||||
print("\n" + "="*60)
|
||||
print("📈 REAL AI BACKTEST RESULTS")
|
||||
print(f"🕒 Time taken: {elapsed:.1f} seconds")
|
||||
print("="*60)
|
||||
print(f"📊 Matches Analyzed: {total_matches_analyzed}")
|
||||
print(f"🚫 Bets SKIPPED: {bets_skipped}")
|
||||
print(f"✅ Bets PLAYED: {bets_played}")
|
||||
|
||||
if bets_played > 0:
|
||||
win_rate = (bets_won / bets_played) * 100
|
||||
roi = (total_profit / bets_played) * 100
|
||||
yield_val = total_profit # Net Units
|
||||
|
||||
print(f"🏆 Bets Won: {bets_won}")
|
||||
print(f"💀 Bets Lost: {bets_played - bets_won}")
|
||||
print("-" * 40)
|
||||
print(f" Win Rate: {win_rate:.2f}%")
|
||||
print(f"💰 Total Profit (Units): {total_profit:.2f}")
|
||||
print(f"📊 ROI: {roi:.2f}%")
|
||||
|
||||
if roi > 0:
|
||||
print("🟢 STRATEGY IS PROFITABLE!")
|
||||
else:
|
||||
print("🔴 STRATEGY IS LOSING")
|
||||
else:
|
||||
print("⚠️ No bets were played. All were skipped or failed.")
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_backtest()
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
@@ -128,7 +128,40 @@ FEATURE_COLS = [
|
||||
"home_top_scorer_form", "away_top_scorer_form",
|
||||
"home_avg_player_exp", "away_avg_player_exp",
|
||||
"home_goals_diversity", "away_goals_diversity",
|
||||
|
||||
|
||||
# V27 H2H Expanded (4)
|
||||
"h2h_home_goals_avg", "h2h_away_goals_avg",
|
||||
"h2h_recent_trend", "h2h_venue_advantage",
|
||||
|
||||
# V27 Rolling Stats (13)
|
||||
"home_rolling5_goals", "home_rolling5_conceded",
|
||||
"home_rolling10_goals", "home_rolling10_conceded",
|
||||
"home_rolling20_goals", "home_rolling20_conceded",
|
||||
"away_rolling5_goals", "away_rolling5_conceded",
|
||||
"away_rolling10_goals", "away_rolling10_conceded",
|
||||
"home_rolling5_cs", "away_rolling5_cs",
|
||||
|
||||
# V27 Venue Stats (4)
|
||||
"home_venue_goals", "home_venue_conceded",
|
||||
"away_venue_goals", "away_venue_conceded",
|
||||
|
||||
# V27 Goal Trend (2)
|
||||
"home_goal_trend", "away_goal_trend",
|
||||
|
||||
# V27 Calendar (5)
|
||||
"home_days_rest", "away_days_rest",
|
||||
"match_month", "is_season_start", "is_season_end",
|
||||
|
||||
# V27 Interaction (6)
|
||||
"attack_vs_defense_home", "attack_vs_defense_away",
|
||||
"xg_diff", "form_momentum_interaction",
|
||||
"elo_form_consistency", "upset_x_elo_gap",
|
||||
|
||||
# V27 League Expanded (5)
|
||||
"league_home_win_rate", "league_draw_rate",
|
||||
"league_btts_rate", "league_ou25_rate",
|
||||
"league_reliability_score",
|
||||
|
||||
# Labels
|
||||
"score_home", "score_away", "total_goals",
|
||||
"ht_score_home", "ht_score_away", "ht_total_goals",
|
||||
@@ -296,6 +329,10 @@ class BatchDataLoader:
|
||||
SELECT league_id,
|
||||
AVG(score_home + score_away) as avg_goals,
|
||||
AVG(CASE WHEN score_home = 0 AND score_away = 0 THEN 1.0 ELSE 0.0 END) as zero_rate,
|
||||
AVG(CASE WHEN score_home > score_away THEN 1.0 ELSE 0.0 END) as home_win_rate,
|
||||
AVG(CASE WHEN score_home = score_away THEN 1.0 ELSE 0.0 END) as draw_rate,
|
||||
AVG(CASE WHEN score_home > 0 AND score_away > 0 THEN 1.0 ELSE 0.0 END) as btts_rate,
|
||||
AVG(CASE WHEN score_home + score_away > 2.5 THEN 1.0 ELSE 0.0 END) as ou25_rate,
|
||||
COUNT(*) as match_count
|
||||
FROM matches
|
||||
WHERE status = 'FT'
|
||||
@@ -304,12 +341,17 @@ class BatchDataLoader:
|
||||
AND league_id IN ({ph})
|
||||
GROUP BY league_id
|
||||
""", self.top_league_ids)
|
||||
|
||||
for league_id, avg_goals, zero_rate, cnt in self.cur.fetchall():
|
||||
|
||||
for row in self.cur.fetchall():
|
||||
league_id, avg_goals, zero_rate, home_win_rate, draw_rate, btts_rate, ou25_rate, cnt = row
|
||||
self.league_stats_cache[league_id] = {
|
||||
"avg_goals": float(avg_goals) if avg_goals else 2.5,
|
||||
"zero_rate": float(zero_rate) if zero_rate else 0.07,
|
||||
"match_count": cnt
|
||||
"home_win_rate": float(home_win_rate) if home_win_rate else 0.45,
|
||||
"draw_rate": float(draw_rate) if draw_rate else 0.25,
|
||||
"btts_rate": float(btts_rate) if btts_rate else 0.50,
|
||||
"ou25_rate": float(ou25_rate) if ou25_rate else 0.50,
|
||||
"match_count": cnt,
|
||||
}
|
||||
|
||||
def _load_team_history(self):
|
||||
@@ -666,6 +708,9 @@ class FeatureExtractor:
|
||||
|
||||
print(f"\n🔄 Extracting features for {total} matches...", flush=True)
|
||||
|
||||
_last_print = t_start
|
||||
_PRINT_INTERVAL = 60 # her dakika bir ilerleme
|
||||
|
||||
# Process chronologically — ELO grows as we go
|
||||
for i, m in enumerate(matches):
|
||||
(
|
||||
@@ -683,17 +728,25 @@ class FeatureExtractor:
|
||||
league_name,
|
||||
) = m
|
||||
|
||||
if i % 100 == 0 and i > 0:
|
||||
elapsed = time.time() - t_start
|
||||
rate = i / elapsed # matches per second
|
||||
now = time.time()
|
||||
if now - _last_print >= _PRINT_INTERVAL and i > 0:
|
||||
elapsed = now - t_start
|
||||
rate = i / elapsed
|
||||
remaining = (total - i) / rate if rate > 0 else 0
|
||||
pct = i / total * 100
|
||||
pct = i / total * 100
|
||||
eta_h = int(remaining // 3600)
|
||||
eta_m = int((remaining % 3600) // 60)
|
||||
eta_s = int(remaining % 60)
|
||||
eta_str = (f"{eta_h}s {eta_m}dk" if eta_h else f"{eta_m}dk {eta_s}s")
|
||||
print(
|
||||
f" [{i}/{total}] ({pct:.0f}%) | {rate:.1f} maç/s | "
|
||||
f"ETA: {remaining/60:.1f} dk | skipped: {skipped} | "
|
||||
f"dq_rejected: {dq_rejected}",
|
||||
f" ⏱ [{i:>6}/{total}] %{pct:>4.1f} | "
|
||||
f"{rate:.1f} maç/s | "
|
||||
f"bitti: {len(rows):,} | "
|
||||
f"atlanan: {skipped+dq_rejected} | "
|
||||
f"ETA: {eta_str}",
|
||||
flush=True,
|
||||
)
|
||||
_last_print = now
|
||||
|
||||
row = self._extract_one(
|
||||
mid, hid, aid, sh, sa, hth, hta, mst, lid,
|
||||
@@ -882,7 +935,10 @@ class FeatureExtractor:
|
||||
}
|
||||
|
||||
# === LEAGUE FEATURES ===
|
||||
league = self.loader.league_stats_cache.get(lid, {"avg_goals": 2.5, "zero_rate": 0.07})
|
||||
league = self.loader.league_stats_cache.get(lid, {
|
||||
"avg_goals": 2.5, "zero_rate": 0.07, "home_win_rate": 0.45,
|
||||
"draw_rate": 0.25, "btts_rate": 0.50, "ou25_rate": 0.50, "match_count": 0,
|
||||
})
|
||||
league_features = {
|
||||
"league_avg_goals": league["avg_goals"],
|
||||
"league_zero_goal_rate": league["zero_rate"],
|
||||
@@ -953,6 +1009,11 @@ class FeatureExtractor:
|
||||
home_goals_form = home_sq.get('goals_form', 0)
|
||||
away_goals_form = away_sq.get('goals_form', 0)
|
||||
|
||||
# === V27 ROLLING / VENUE / CALENDAR FEATURES ===
|
||||
v27 = self._compute_v27_features(hid, aid, mst, elo_features, form_features,
|
||||
home_momentum_score, away_momentum_score,
|
||||
upset_feats, h2h_features, league)
|
||||
|
||||
# === ASSEMBLE ROW ===
|
||||
row = {
|
||||
"match_id": mid,
|
||||
@@ -960,13 +1021,13 @@ class FeatureExtractor:
|
||||
"away_team_id": aid,
|
||||
"league_id": lid,
|
||||
"mst_utc": mst,
|
||||
|
||||
|
||||
**elo_features,
|
||||
**form_features,
|
||||
**h2h_features,
|
||||
**stats_features,
|
||||
**odds_features,
|
||||
|
||||
|
||||
"home_xga": form_features["home_conceded_avg"],
|
||||
"away_xga": form_features["away_conceded_avg"],
|
||||
**league_features,
|
||||
@@ -1007,7 +1068,10 @@ class FeatureExtractor:
|
||||
"away_avg_player_exp": away_sq.get('avg_player_exp', 0.0),
|
||||
"home_goals_diversity": home_sq.get('goals_diversity', 0.0),
|
||||
"away_goals_diversity": away_sq.get('goals_diversity', 0.0),
|
||||
|
||||
|
||||
# V27 Features
|
||||
**v27,
|
||||
|
||||
# Labels
|
||||
"score_home": sh,
|
||||
"score_away": sa,
|
||||
@@ -1033,6 +1097,103 @@ class FeatureExtractor:
|
||||
|
||||
return row
|
||||
|
||||
def _compute_v27_features(self, hid, aid, mst, elo_features, form_features,
|
||||
home_momentum, away_momentum, upset_feats, h2h_features, league):
|
||||
"""Compute V27 rolling, venue, calendar, interaction features from pre-loaded data."""
|
||||
home_history = self.loader.team_matches.get(hid, [])
|
||||
away_history = self.loader.team_matches.get(aid, [])
|
||||
|
||||
def _rolling(history, n):
|
||||
recent = [m for m in history if m[0] < mst][-n:]
|
||||
if not recent:
|
||||
return 1.3, 1.1, 0.0
|
||||
goals = sum(m[2] for m in recent) / len(recent)
|
||||
conceded = sum(m[3] for m in recent) / len(recent)
|
||||
cs = sum(1 for m in recent if m[3] == 0) / len(recent)
|
||||
return round(goals, 3), round(conceded, 3), round(cs, 3)
|
||||
|
||||
def _venue(history, is_home):
|
||||
recent = [m for m in history if m[0] < mst and m[1] == is_home][-10:]
|
||||
if not recent:
|
||||
return 1.3, 1.1
|
||||
goals = sum(m[2] for m in recent) / len(recent)
|
||||
conceded = sum(m[3] for m in recent) / len(recent)
|
||||
return round(goals, 3), round(conceded, 3)
|
||||
|
||||
def _days_rest(history):
|
||||
prior = [m[0] for m in history if m[0] < mst]
|
||||
if not prior:
|
||||
return 7.0
|
||||
last = prior[-1]
|
||||
return round(min((mst - last) / 86400000.0, 30.0), 1)
|
||||
|
||||
h5g, h5c, h5cs = _rolling(home_history, 5)
|
||||
h10g, h10c, _ = _rolling(home_history, 10)
|
||||
h20g, h20c, _ = _rolling(home_history, 20)
|
||||
a5g, a5c, a5cs = _rolling(away_history, 5)
|
||||
a10g, a10c, _ = _rolling(away_history, 10)
|
||||
|
||||
hvg, hvc = _venue(home_history, True)
|
||||
avg, avc = _venue(away_history, False)
|
||||
|
||||
home_rest = _days_rest(home_history)
|
||||
away_rest = _days_rest(away_history)
|
||||
|
||||
import datetime
|
||||
match_dt = datetime.datetime.utcfromtimestamp(mst / 1000)
|
||||
match_month = match_dt.month
|
||||
|
||||
elo_diff = elo_features["elo_diff"]
|
||||
form_elo_diff = elo_features["form_elo_diff"]
|
||||
mom_diff = home_momentum - away_momentum
|
||||
home_conceded = form_features["home_conceded_avg"]
|
||||
away_conceded = form_features["away_conceded_avg"]
|
||||
home_goals = form_features["home_goals_avg"]
|
||||
away_goals = form_features["away_goals_avg"]
|
||||
upset_potential = upset_feats.get("upset_potential", 0.0)
|
||||
|
||||
h2h_prior = [m for m in home_history if m[0] < mst and m[4] == aid]
|
||||
h2h_home_goals_avg = sum(m[2] for m in h2h_prior) / len(h2h_prior) if h2h_prior else 1.3
|
||||
h2h_away_goals_avg = sum(m[3] for m in h2h_prior) / len(h2h_prior) if h2h_prior else 1.1
|
||||
recent_h2h = h2h_prior[-3:]
|
||||
h2h_recent_trend = sum(1 if m[2] > m[3] else -1 if m[2] < m[3] else 0 for m in recent_h2h) / max(len(recent_h2h), 1)
|
||||
venue_h2h = [m for m in h2h_prior if m[1]]
|
||||
h2h_venue_advantage = sum(1 if m[2] > m[3] else 0 for m in venue_h2h) / max(len(venue_h2h), 1) if venue_h2h else 0.5
|
||||
|
||||
league_count = league.get("match_count", 0)
|
||||
|
||||
return {
|
||||
"h2h_home_goals_avg": round(h2h_home_goals_avg, 3),
|
||||
"h2h_away_goals_avg": round(h2h_away_goals_avg, 3),
|
||||
"h2h_recent_trend": round(h2h_recent_trend, 3),
|
||||
"h2h_venue_advantage": round(h2h_venue_advantage, 3),
|
||||
"home_rolling5_goals": h5g, "home_rolling5_conceded": h5c,
|
||||
"home_rolling10_goals": h10g, "home_rolling10_conceded": h10c,
|
||||
"home_rolling20_goals": h20g, "home_rolling20_conceded": h20c,
|
||||
"away_rolling5_goals": a5g, "away_rolling5_conceded": a5c,
|
||||
"away_rolling10_goals": a10g, "away_rolling10_conceded": a10c,
|
||||
"home_rolling5_cs": h5cs, "away_rolling5_cs": a5cs,
|
||||
"home_venue_goals": hvg, "home_venue_conceded": hvc,
|
||||
"away_venue_goals": avg, "away_venue_conceded": avc,
|
||||
"home_goal_trend": round(h5g - h10g, 3),
|
||||
"away_goal_trend": round(a5g - a10g, 3),
|
||||
"home_days_rest": home_rest, "away_days_rest": away_rest,
|
||||
"match_month": float(match_month),
|
||||
"is_season_start": 1.0 if match_month in (7, 8, 9) else 0.0,
|
||||
"is_season_end": 1.0 if match_month in (5, 6) else 0.0,
|
||||
"attack_vs_defense_home": round(home_goals - away_conceded, 3),
|
||||
"attack_vs_defense_away": round(away_goals - home_conceded, 3),
|
||||
"xg_diff": round(home_conceded - away_conceded, 3),
|
||||
"form_momentum_interaction": round(mom_diff * form_elo_diff / 1000.0, 4),
|
||||
"elo_form_consistency": round(1.0 - abs(elo_diff - form_elo_diff) / max(abs(elo_diff), 100.0), 4),
|
||||
"upset_x_elo_gap": round(upset_potential * abs(elo_diff) / 500.0, 4),
|
||||
"league_home_win_rate": league.get("home_win_rate", 0.45),
|
||||
"league_draw_rate": league.get("draw_rate", 0.25),
|
||||
"league_btts_rate": league.get("btts_rate", 0.50),
|
||||
"league_ou25_rate": league.get("ou25_rate", 0.50),
|
||||
"league_reliability_score": min(1.0, league_count / 500.0) if league_count else 0.3,
|
||||
}
|
||||
|
||||
def _validate_row_quality(
|
||||
self,
|
||||
row: dict,
|
||||
|
||||
@@ -0,0 +1,166 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": ["# Training Data Extraction — Google Colab\n", "SSH tunnel ile sunucuya bağlanır, DB'den 270K+ maç çeker, Drive'a kaydeder.\n"]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 1. Gerekli paketler\n",
|
||||
"!pip install sshtunnel psycopg2-binary pandas numpy -q\n",
|
||||
"print('Paketler hazır')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 2. Drive bağla\n",
|
||||
"from google.colab import drive\n",
|
||||
"drive.mount('/content/drive')\n",
|
||||
"import os\n",
|
||||
"DRIVE_DIR = '/content/drive/MyDrive/iddaai'\n",
|
||||
"os.makedirs(DRIVE_DIR, exist_ok=True)\n",
|
||||
"print('Drive hazır:', DRIVE_DIR)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 3. SSH private key upload\n",
|
||||
"# Mac'te terminalde şunu çalıştır, çıktıyı kopyala:\n",
|
||||
"# cat ~/.ssh/id_ed25519\n",
|
||||
"# Aşağıya yapıştır (BEGIN ve END satırları dahil)\n",
|
||||
"\n",
|
||||
"SSH_PRIVATE_KEY = \"\"\"-----BEGIN OPENSSH PRIVATE KEY-----\n",
|
||||
"BURAYA_KEY_ICERIGINI_YAPISTIR\n",
|
||||
"-----END OPENSSH PRIVATE KEY-----\"\"\"\n",
|
||||
"\n",
|
||||
"# Key dosyasına yaz\n",
|
||||
"key_path = '/root/.ssh/id_ed25519'\n",
|
||||
"os.makedirs('/root/.ssh', exist_ok=True)\n",
|
||||
"with open(key_path, 'w') as f:\n",
|
||||
" f.write(SSH_PRIVATE_KEY.strip() + '\\n')\n",
|
||||
"os.chmod(key_path, 0o600)\n",
|
||||
"print('SSH key hazır')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 4. SSH Tunnel aç + DB bağlantısını test et\n",
|
||||
"from sshtunnel import SSHTunnelForwarder\n",
|
||||
"import psycopg2\n",
|
||||
"\n",
|
||||
"tunnel = SSHTunnelForwarder(\n",
|
||||
" ('95.70.252.214', 2222),\n",
|
||||
" ssh_username='haruncan',\n",
|
||||
" ssh_pkey=key_path,\n",
|
||||
" remote_bind_address=('localhost', 5432),\n",
|
||||
" local_bind_address=('localhost', 15432),\n",
|
||||
")\n",
|
||||
"tunnel.start()\n",
|
||||
"print(f'Tunnel açık: localhost:{tunnel.local_bind_port}')\n",
|
||||
"\n",
|
||||
"conn = psycopg2.connect(\n",
|
||||
" host='localhost',\n",
|
||||
" port=15432,\n",
|
||||
" dbname='iddaai_db',\n",
|
||||
" user='iddaai_user',\n",
|
||||
" password='IddaA1_S4crET!',\n",
|
||||
")\n",
|
||||
"cur = conn.cursor()\n",
|
||||
"cur.execute(\"SELECT COUNT(*) FROM matches WHERE status='FT' AND score_home IS NOT NULL\")\n",
|
||||
"print(f'DB bağlantısı OK — FT maç sayısı: {cur.fetchone()[0]:,}')\n",
|
||||
"conn.close()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 5. extract_training_data.py kodunu Drive'dan veya doğrudan çalıştır\n",
|
||||
"# Önce repo'yu Drive'a kopyala (yoksa)\n",
|
||||
"import subprocess\n",
|
||||
"\n",
|
||||
"REPO_DIR = f'{DRIVE_DIR}/ai-engine'\n",
|
||||
"SCRIPT = f'{REPO_DIR}/scripts/extract_training_data.py'\n",
|
||||
"\n",
|
||||
"if not os.path.exists(SCRIPT):\n",
|
||||
" print('Script bulunamadı — ai-engine klasörünü Drive a yükle:')\n",
|
||||
" print(' Yerel makinede: cp -r /Users/piton/Documents/GitHub/iddaai/iddaai-be/ai-engine ~/Google\\ Drive/MyDrive/iddaai/')\n",
|
||||
"else:\n",
|
||||
" print('Script hazır:', SCRIPT)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 6. Extraction'ı çalıştır\n",
|
||||
"import sys, os\n",
|
||||
"sys.path.insert(0, REPO_DIR)\n",
|
||||
"\n",
|
||||
"# DB URL'i tunnel üzerinden ayarla\n",
|
||||
"os.environ['DATABASE_URL'] = 'postgresql://iddaai_user:IddaA1_S4crET!@localhost:15432/iddaai_db'\n",
|
||||
"\n",
|
||||
"# Output CSV'yi Drive'a kaydet\n",
|
||||
"OUTPUT_CSV = f'{DRIVE_DIR}/training_data_full.csv'\n",
|
||||
"\n",
|
||||
"# Script'i import et ve main'i çalıştır\n",
|
||||
"import importlib.util\n",
|
||||
"spec = importlib.util.spec_from_file_location('extract', SCRIPT)\n",
|
||||
"mod = importlib.util.load_from_spec(spec)\n",
|
||||
"spec.loader.exec_module(mod)\n",
|
||||
"\n",
|
||||
"# OUTPUT_CSV'yi override et\n",
|
||||
"mod.OUTPUT_CSV = OUTPUT_CSV\n",
|
||||
"mod.TOP_LEAGUES_PATH = f'{DRIVE_DIR}/qualified_leagues.json'\n",
|
||||
"\n",
|
||||
"mod.main()\n",
|
||||
"print(f'\\nKaydedildi: {OUTPUT_CSV}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 7. Tunnel kapat\n",
|
||||
"tunnel.stop()\n",
|
||||
"print('Tunnel kapatıldı')\n",
|
||||
"\n",
|
||||
"# Dosya boyutunu kontrol et\n",
|
||||
"size_mb = os.path.getsize(OUTPUT_CSV) / 1024 / 1024\n",
|
||||
"import pandas as pd\n",
|
||||
"df = pd.read_csv(OUTPUT_CSV, nrows=5)\n",
|
||||
"print(f'CSV: {size_mb:.1f} MB')\n",
|
||||
"print(f'Kolonlar: {len(df.columns)}')"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"},
|
||||
"language_info": {"name": "python", "version": "3.10.0"}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -0,0 +1,806 @@
|
||||
"""
|
||||
V25 Backtest + Calibration Training Script
|
||||
==========================================
|
||||
Runs a full backtest on historical football matches, measures model accuracy
|
||||
by market / confidence band / league, and trains isotonic calibration models
|
||||
for MS, OU15, OU25, and BTTS markets.
|
||||
|
||||
Usage:
|
||||
venv/bin/python scripts/run_backtest_and_calibrate.py
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import pickle
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional, Tuple, Any
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Path setup — works whether executed from ai-engine/ or project root
|
||||
# ---------------------------------------------------------------------------
|
||||
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
AI_ENGINE_DIR = os.path.dirname(SCRIPT_DIR)
|
||||
sys.path.insert(0, AI_ENGINE_DIR)
|
||||
|
||||
from data.db import get_clean_dsn
|
||||
from models.calibration import Calibrator
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
QUALIFIED_LEAGUES_PATH = os.path.join(AI_ENGINE_DIR, "..", "qualified_leagues.json")
|
||||
CALIBRATION_DIR = os.path.join(AI_ENGINE_DIR, "models", "calibration")
|
||||
REPORTS_DIR = os.path.join(AI_ENGINE_DIR, "reports")
|
||||
MAX_MATCHES = 3000 # target upper bound
|
||||
PROGRESS_INTERVAL = 100 # print every N matches
|
||||
|
||||
os.makedirs(CALIBRATION_DIR, exist_ok=True)
|
||||
os.makedirs(REPORTS_DIR, exist_ok=True)
|
||||
|
||||
# Mapping: Turkish category name -> internal feature key
|
||||
ODDS_CATEGORY_MAP = {
|
||||
"Maç Sonucu": {
|
||||
"1": "odds_ms_h",
|
||||
"X": "odds_ms_d",
|
||||
"2": "odds_ms_a",
|
||||
},
|
||||
"1,5 Alt/Üst": {
|
||||
"Üst": "odds_ou15_o",
|
||||
"Alt": "odds_ou15_u",
|
||||
},
|
||||
"2,5 Alt/Üst": {
|
||||
"Üst": "odds_ou25_o",
|
||||
"Alt": "odds_ou25_u",
|
||||
},
|
||||
"3,5 Alt/Üst": {
|
||||
"Üst": "odds_ou35_o",
|
||||
"Alt": "odds_ou35_u",
|
||||
},
|
||||
"0,5 Alt/Üst": {
|
||||
"Üst": "odds_ou05_o",
|
||||
"Alt": "odds_ou05_u",
|
||||
},
|
||||
"Karşılıklı Gol": {
|
||||
"Var": "odds_btts_y",
|
||||
"Yok": "odds_btts_n",
|
||||
},
|
||||
"1. Yarı Sonucu": {
|
||||
"1": "odds_ht_ms_h",
|
||||
"X": "odds_ht_ms_d",
|
||||
"2": "odds_ht_ms_a",
|
||||
},
|
||||
"1. Yarı 0,5 Alt/Üst": {
|
||||
"Üst": "odds_ht_ou05_o",
|
||||
"Alt": "odds_ht_ou05_u",
|
||||
},
|
||||
"1. Yarı 1,5 Alt/Üst": {
|
||||
"Üst": "odds_ht_ou15_o",
|
||||
"Alt": "odds_ht_ou15_u",
|
||||
},
|
||||
}
|
||||
|
||||
# Top 5 leagues by name for individual breakdown (will be matched by league_id)
|
||||
TOP5_LEAGUE_NAMES = {
|
||||
"Premier League",
|
||||
"La Liga",
|
||||
"Bundesliga",
|
||||
"Serie A",
|
||||
"Ligue 1",
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# STEP 1 — Load qualified league IDs
|
||||
# ============================================================================
|
||||
|
||||
def load_qualified_leagues() -> List[str]:
|
||||
path = os.path.abspath(QUALIFIED_LEAGUES_PATH)
|
||||
with open(path, "r") as f:
|
||||
leagues = json.load(f)
|
||||
print(f"[Step 1] Loaded {len(leagues)} qualified league IDs.")
|
||||
return [str(lid) for lid in leagues]
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# STEP 1b — Fetch matches + pre-computed features in batch
|
||||
# ============================================================================
|
||||
|
||||
def fetch_matches(conn, league_ids: List[str]) -> pd.DataFrame:
|
||||
"""
|
||||
Single batch query: matches + football_ai_features + league name.
|
||||
Only returns matches that also have odds data (inner join on odd_categories).
|
||||
Returns a DataFrame with one row per match.
|
||||
"""
|
||||
print("[Step 1b] Fetching matches with pre-computed features and odds ...")
|
||||
cur = conn.cursor(cursor_factory=RealDictCursor)
|
||||
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT
|
||||
m.id AS match_id,
|
||||
m.league_id,
|
||||
l.name AS league_name,
|
||||
m.score_home,
|
||||
m.score_away,
|
||||
m.mst_utc,
|
||||
-- From football_ai_features
|
||||
f.home_elo AS home_overall_elo,
|
||||
f.away_elo AS away_overall_elo,
|
||||
f.elo_diff,
|
||||
f.home_home_elo,
|
||||
f.away_away_elo,
|
||||
f.home_form_elo,
|
||||
f.away_form_elo,
|
||||
f.home_goals_avg_5 AS home_goals_avg,
|
||||
f.away_goals_avg_5 AS away_goals_avg,
|
||||
f.home_conceded_avg_5 AS home_conceded_avg,
|
||||
f.away_conceded_avg_5 AS away_conceded_avg,
|
||||
f.home_clean_sheet_rate,
|
||||
f.away_clean_sheet_rate,
|
||||
f.home_scoring_rate,
|
||||
f.away_scoring_rate,
|
||||
f.home_win_streak AS home_winning_streak,
|
||||
f.away_win_streak AS away_winning_streak,
|
||||
f.home_avg_possession,
|
||||
f.away_avg_possession,
|
||||
f.home_avg_shots_on_target,
|
||||
f.away_avg_shots_on_target,
|
||||
f.home_shot_conversion,
|
||||
f.away_shot_conversion,
|
||||
f.home_avg_corners,
|
||||
f.away_avg_corners,
|
||||
f.h2h_total AS h2h_total_matches,
|
||||
f.h2h_home_win_rate,
|
||||
f.h2h_avg_goals,
|
||||
f.h2h_over25_rate,
|
||||
f.h2h_btts_rate,
|
||||
f.league_avg_goals,
|
||||
f.league_home_win_pct AS league_home_win_rate,
|
||||
f.league_over25_pct AS league_ou25_rate,
|
||||
f.referee_avg_cards AS referee_cards_total,
|
||||
f.referee_home_bias,
|
||||
f.referee_avg_goals,
|
||||
f.missing_players_impact AS home_missing_impact,
|
||||
f.implied_home,
|
||||
f.implied_draw,
|
||||
f.implied_away
|
||||
FROM matches m
|
||||
JOIN football_ai_features f ON f.match_id = m.id
|
||||
-- Only matches that have odds data
|
||||
JOIN (SELECT DISTINCT match_id FROM odd_categories WHERE sport = 'football') oc
|
||||
ON oc.match_id = m.id
|
||||
LEFT JOIN leagues l ON l.id = m.league_id
|
||||
WHERE m.status = 'FT'
|
||||
AND m.score_home IS NOT NULL
|
||||
AND m.score_away IS NOT NULL
|
||||
AND m.league_id = ANY(%s)
|
||||
ORDER BY m.mst_utc DESC
|
||||
LIMIT %s
|
||||
""",
|
||||
(league_ids, MAX_MATCHES),
|
||||
)
|
||||
|
||||
rows = cur.fetchall()
|
||||
cur.close()
|
||||
df = pd.DataFrame([dict(r) for r in rows])
|
||||
print(f"[Step 1b] Fetched {len(df)} matches with features + odds coverage.")
|
||||
return df
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# STEP 1c — Fetch all odds for the matched match IDs in one query
|
||||
# ============================================================================
|
||||
|
||||
def fetch_odds_bulk(conn, match_ids: List[str]) -> Dict[str, Dict[str, float]]:
|
||||
"""
|
||||
Returns {match_id: {feature_key: odd_value, ...}} for all known categories.
|
||||
"""
|
||||
print(f"[Step 1c] Fetching odds for {len(match_ids)} matches ...")
|
||||
cur = conn.cursor(cursor_factory=RealDictCursor)
|
||||
|
||||
# Build a set of known category names
|
||||
known_cats = tuple(ODDS_CATEGORY_MAP.keys())
|
||||
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT oc.match_id, oc.name AS cat_name, os.name AS sel_name, os.odd_value
|
||||
FROM odd_categories oc
|
||||
JOIN odd_selections os ON os.odd_category_db_id = oc.db_id
|
||||
WHERE oc.match_id = ANY(%s)
|
||||
AND oc.name = ANY(%s)
|
||||
AND oc.sport = 'football'
|
||||
AND os.odd_value IS NOT NULL
|
||||
AND os.odd_value ~ '^[0-9]+(\.[0-9]+)?$'
|
||||
""",
|
||||
(match_ids, list(known_cats)),
|
||||
)
|
||||
|
||||
rows = cur.fetchall()
|
||||
cur.close()
|
||||
|
||||
# Build nested dict: match_id -> {feature_key -> value}
|
||||
odds_map: Dict[str, Dict[str, float]] = defaultdict(dict)
|
||||
for r in rows:
|
||||
cat_name = r["cat_name"]
|
||||
sel_name = r["sel_name"]
|
||||
if cat_name in ODDS_CATEGORY_MAP and sel_name in ODDS_CATEGORY_MAP[cat_name]:
|
||||
feat_key = ODDS_CATEGORY_MAP[cat_name][sel_name]
|
||||
try:
|
||||
val = float(r["odd_value"])
|
||||
if val > 1.0:
|
||||
# Keep first encountered (most recent or primary bookmaker)
|
||||
if feat_key not in odds_map[r["match_id"]]:
|
||||
odds_map[r["match_id"]][feat_key] = val
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
|
||||
print(f"[Step 1c] Odds loaded for {len(odds_map)} matches.")
|
||||
return dict(odds_map)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# STEP 2 — Build 114-feature vector per match
|
||||
# ============================================================================
|
||||
|
||||
def load_feature_cols() -> List[str]:
|
||||
path = os.path.join(AI_ENGINE_DIR, "models", "v25", "feature_cols.json")
|
||||
with open(path, "r") as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def build_feature_vector(
|
||||
match_row: pd.Series,
|
||||
odds: Dict[str, float],
|
||||
feature_cols: List[str],
|
||||
) -> Dict[str, float]:
|
||||
"""
|
||||
Construct the full feature dict for one match.
|
||||
Falls back to 0.0 for any missing feature.
|
||||
"""
|
||||
feat: Dict[str, float] = {col: 0.0 for col in feature_cols}
|
||||
|
||||
# ---- Direct columns from match row ----
|
||||
direct_map = {
|
||||
"home_overall_elo": "home_overall_elo",
|
||||
"away_overall_elo": "away_overall_elo",
|
||||
"elo_diff": "elo_diff",
|
||||
"home_home_elo": "home_home_elo",
|
||||
"away_away_elo": "away_away_elo",
|
||||
"home_form_elo": "home_form_elo",
|
||||
"away_form_elo": "away_form_elo",
|
||||
"home_goals_avg": "home_goals_avg",
|
||||
"away_goals_avg": "away_goals_avg",
|
||||
"home_conceded_avg": "home_conceded_avg",
|
||||
"away_conceded_avg": "away_conceded_avg",
|
||||
"home_clean_sheet_rate": "home_clean_sheet_rate",
|
||||
"away_clean_sheet_rate": "away_clean_sheet_rate",
|
||||
"home_scoring_rate": "home_scoring_rate",
|
||||
"away_scoring_rate": "away_scoring_rate",
|
||||
"home_winning_streak": "home_winning_streak",
|
||||
"away_winning_streak": "away_winning_streak",
|
||||
"home_avg_possession": "home_avg_possession",
|
||||
"away_avg_possession": "away_avg_possession",
|
||||
"home_avg_shots_on_target": "home_avg_shots_on_target",
|
||||
"away_avg_shots_on_target": "away_avg_shots_on_target",
|
||||
"home_shot_conversion": "home_shot_conversion",
|
||||
"away_shot_conversion": "away_shot_conversion",
|
||||
"home_avg_corners": "home_avg_corners",
|
||||
"away_avg_corners": "away_avg_corners",
|
||||
"h2h_total_matches": "h2h_total_matches",
|
||||
"h2h_home_win_rate": "h2h_home_win_rate",
|
||||
"h2h_avg_goals": "h2h_avg_goals",
|
||||
"h2h_over25_rate": "h2h_over25_rate",
|
||||
"h2h_btts_rate": "h2h_btts_rate",
|
||||
"league_avg_goals": "league_avg_goals",
|
||||
"league_home_win_rate": "league_home_win_rate",
|
||||
"league_ou25_rate": "league_ou25_rate",
|
||||
"referee_cards_total": "referee_cards_total",
|
||||
"referee_home_bias": "referee_home_bias",
|
||||
"referee_avg_goals": "referee_avg_goals",
|
||||
"home_missing_impact": "home_missing_impact",
|
||||
"implied_home": "implied_home",
|
||||
"implied_draw": "implied_draw",
|
||||
"implied_away": "implied_away",
|
||||
}
|
||||
|
||||
for src_col, feat_col in direct_map.items():
|
||||
if feat_col in feat and src_col in match_row.index:
|
||||
val = match_row.get(src_col)
|
||||
if val is not None and not (isinstance(val, float) and np.isnan(val)):
|
||||
feat[feat_col] = float(val)
|
||||
|
||||
# ---- Derived elo features ----
|
||||
if feat.get("home_form_elo", 0) and feat.get("away_form_elo", 0):
|
||||
feat["form_elo_diff"] = feat["home_form_elo"] - feat["away_form_elo"]
|
||||
|
||||
# ---- Odds features from relational tables ----
|
||||
odds_features = [
|
||||
"odds_ms_h", "odds_ms_d", "odds_ms_a",
|
||||
"odds_ht_ms_h", "odds_ht_ms_d", "odds_ht_ms_a",
|
||||
"odds_ou05_o", "odds_ou05_u",
|
||||
"odds_ou15_o", "odds_ou15_u",
|
||||
"odds_ou25_o", "odds_ou25_u",
|
||||
"odds_ou35_o", "odds_ou35_u",
|
||||
"odds_ht_ou05_o", "odds_ht_ou05_u",
|
||||
"odds_ht_ou15_o", "odds_ht_ou15_u",
|
||||
"odds_btts_y", "odds_btts_n",
|
||||
]
|
||||
for ok in odds_features:
|
||||
if ok in odds:
|
||||
feat[ok] = odds[ok]
|
||||
presence_key = f"{ok}_present"
|
||||
if presence_key in feat:
|
||||
feat[presence_key] = 1.0
|
||||
|
||||
# Recompute implied probabilities from odds if available and not already set
|
||||
if feat.get("odds_ms_h", 0) > 1 and feat.get("odds_ms_d", 0) > 1 and feat.get("odds_ms_a", 0) > 1:
|
||||
raw_h = 1.0 / feat["odds_ms_h"]
|
||||
raw_d = 1.0 / feat["odds_ms_d"]
|
||||
raw_a = 1.0 / feat["odds_ms_a"]
|
||||
total = raw_h + raw_d + raw_a
|
||||
if total > 0:
|
||||
feat["implied_home"] = raw_h / total
|
||||
feat["implied_draw"] = raw_d / total
|
||||
feat["implied_away"] = raw_a / total
|
||||
|
||||
# ---- Derived match metadata ----
|
||||
mst = match_row.get("mst_utc")
|
||||
if mst is not None:
|
||||
try:
|
||||
ts_s = int(mst) / 1000 # stored as epoch ms
|
||||
dt = datetime.utcfromtimestamp(ts_s)
|
||||
if "match_month" in feat:
|
||||
feat["match_month"] = float(dt.month)
|
||||
# Season markers: Sept-Oct = start, April-May = end
|
||||
if "is_season_start" in feat:
|
||||
feat["is_season_start"] = 1.0 if dt.month in (8, 9, 10) else 0.0
|
||||
if "is_season_end" in feat:
|
||||
feat["is_season_end"] = 1.0 if dt.month in (4, 5) else 0.0
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# ---- Interaction features ----
|
||||
if "attack_vs_defense_home" in feat:
|
||||
feat["attack_vs_defense_home"] = feat.get("home_goals_avg", 0) - feat.get("away_conceded_avg", 0)
|
||||
if "attack_vs_defense_away" in feat:
|
||||
feat["attack_vs_defense_away"] = feat.get("away_goals_avg", 0) - feat.get("home_conceded_avg", 0)
|
||||
if "form_momentum_interaction" in feat:
|
||||
feat["form_momentum_interaction"] = (
|
||||
feat.get("home_momentum_score", 0) * feat.get("home_goals_avg", 0)
|
||||
- feat.get("away_momentum_score", 0) * feat.get("away_goals_avg", 0)
|
||||
)
|
||||
if "elo_form_consistency" in feat:
|
||||
feat["elo_form_consistency"] = feat.get("elo_diff", 0) * feat.get("home_goals_avg", 0)
|
||||
|
||||
return feat
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# STEP 3 — Run V25 predictions
|
||||
# ============================================================================
|
||||
|
||||
def load_predictor():
|
||||
from models.v25_ensemble import get_v25_predictor
|
||||
print("[Step 3] Loading V25 predictor ...")
|
||||
pred = get_v25_predictor()
|
||||
print("[Step 3] V25 predictor ready.")
|
||||
return pred
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# STEP 4 — Compute actual outcomes from scores
|
||||
# ============================================================================
|
||||
|
||||
def compute_actuals(score_home: int, score_away: int) -> Dict[str, Any]:
|
||||
total = score_home + score_away
|
||||
return {
|
||||
"ms_actual": "1" if score_home > score_away else ("X" if score_home == score_away else "2"),
|
||||
"ou15_actual": "Over" if total >= 2 else "Under",
|
||||
"ou25_actual": "Over" if total >= 3 else "Under",
|
||||
"btts_actual": "Yes" if score_home > 0 and score_away > 0 else "No",
|
||||
}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# STEP 5 — Accuracy helpers
|
||||
# ============================================================================
|
||||
|
||||
def confidence_band(prob: float) -> str:
|
||||
if prob < 0.50:
|
||||
return "<50%"
|
||||
elif prob < 0.65:
|
||||
return "50-65%"
|
||||
elif prob < 0.75:
|
||||
return "65-75%"
|
||||
else:
|
||||
return "75%+"
|
||||
|
||||
|
||||
def pick_from_ms(home_prob: float, draw_prob: float, away_prob: float) -> Tuple[str, float]:
|
||||
picks = {"1": home_prob, "X": draw_prob, "2": away_prob}
|
||||
best = max(picks, key=picks.__getitem__)
|
||||
return best, picks[best]
|
||||
|
||||
|
||||
def pick_from_binary(yes_prob: float, no_prob: float, yes_label: str, no_label: str) -> Tuple[str, float]:
|
||||
if yes_prob >= no_prob:
|
||||
return yes_label, yes_prob
|
||||
return no_label, no_prob
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# MAIN
|
||||
# ============================================================================
|
||||
|
||||
def main():
|
||||
t_start = time.time()
|
||||
print("=" * 70)
|
||||
print(" V25 Backtest + Calibration Training")
|
||||
print(f" Run at: {datetime.utcnow().isoformat()} UTC")
|
||||
print("=" * 70)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step 1 — Load qualified leagues
|
||||
# ------------------------------------------------------------------
|
||||
league_ids = load_qualified_leagues()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step 1b — Fetch matches with features
|
||||
# ------------------------------------------------------------------
|
||||
conn = psycopg2.connect(get_clean_dsn())
|
||||
try:
|
||||
matches_df = fetch_matches(conn, league_ids)
|
||||
|
||||
if matches_df.empty:
|
||||
print("[ERROR] No matches found. Check DB connection and league IDs.")
|
||||
return
|
||||
|
||||
match_ids = matches_df["match_id"].tolist()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step 1c — Fetch odds in bulk
|
||||
# ------------------------------------------------------------------
|
||||
odds_map = fetch_odds_bulk(conn, match_ids)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step 2 — Build feature vectors
|
||||
# ------------------------------------------------------------------
|
||||
print(f"\n[Step 2] Building feature vectors for {len(matches_df)} matches ...")
|
||||
feature_cols = load_feature_cols()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step 3 — Load V25 predictor
|
||||
# ------------------------------------------------------------------
|
||||
predictor = load_predictor()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Main loop — predict each match, collect results
|
||||
# ------------------------------------------------------------------
|
||||
print(f"\n[Loop] Running predictions ...")
|
||||
|
||||
# Storage for calibration training
|
||||
calib_data: Dict[str, List[Tuple[float, int]]] = {
|
||||
"ms_home": [], # (prob, 1 if home win)
|
||||
"ms_draw": [],
|
||||
"ms_away": [],
|
||||
"ou15": [],
|
||||
"ou25": [],
|
||||
"btts": [],
|
||||
}
|
||||
|
||||
# Storage for accuracy reporting
|
||||
records = []
|
||||
|
||||
skipped = 0
|
||||
processed = 0
|
||||
|
||||
for idx, row in matches_df.iterrows():
|
||||
match_id = row["match_id"]
|
||||
score_home = row.get("score_home")
|
||||
score_away = row.get("score_away")
|
||||
|
||||
# Validate scores
|
||||
try:
|
||||
score_home = int(score_home)
|
||||
score_away = int(score_away)
|
||||
except (TypeError, ValueError):
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
# Build features
|
||||
match_odds = odds_map.get(match_id, {})
|
||||
feat = build_feature_vector(row, match_odds, feature_cols)
|
||||
|
||||
# Run predictions
|
||||
try:
|
||||
home_prob, draw_prob, away_prob = predictor.predict_ms(feat)
|
||||
over25_prob, under25_prob = predictor.predict_ou25(feat)
|
||||
btts_yes_prob, btts_no_prob = predictor.predict_btts(feat)
|
||||
|
||||
# ou15 is loaded via predict_market (returns np.ndarray for binary)
|
||||
ou15_arr = predictor.predict_market("ou15", feat)
|
||||
if ou15_arr is not None and len(ou15_arr) > 0:
|
||||
over15_prob = float(ou15_arr[0])
|
||||
under15_prob = 1.0 - over15_prob
|
||||
else:
|
||||
over15_prob = 0.5
|
||||
under15_prob = 0.5
|
||||
|
||||
except Exception as e:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
# Compute actuals
|
||||
actuals = compute_actuals(score_home, score_away)
|
||||
|
||||
# MS picks
|
||||
ms_pick, ms_conf = pick_from_ms(home_prob, draw_prob, away_prob)
|
||||
ms_correct = int(ms_pick == actuals["ms_actual"])
|
||||
|
||||
# OU15
|
||||
ou15_pick, ou15_conf = pick_from_binary(over15_prob, under15_prob, "Over", "Under")
|
||||
ou15_correct = int(ou15_pick == actuals["ou15_actual"])
|
||||
|
||||
# OU25
|
||||
ou25_pick, ou25_conf = pick_from_binary(over25_prob, under25_prob, "Over", "Under")
|
||||
ou25_correct = int(ou25_pick == actuals["ou25_actual"])
|
||||
|
||||
# BTTS
|
||||
btts_pick, btts_conf = pick_from_binary(btts_yes_prob, btts_no_prob, "Yes", "No")
|
||||
btts_correct = int(btts_pick == actuals["btts_actual"])
|
||||
|
||||
# Collect calibration data
|
||||
calib_data["ms_home"].append((home_prob, int(actuals["ms_actual"] == "1")))
|
||||
calib_data["ms_draw"].append((draw_prob, int(actuals["ms_actual"] == "X")))
|
||||
calib_data["ms_away"].append((away_prob, int(actuals["ms_actual"] == "2")))
|
||||
calib_data["ou15"].append((over15_prob, int(actuals["ou15_actual"] == "Over")))
|
||||
calib_data["ou25"].append((over25_prob, int(actuals["ou25_actual"] == "Over")))
|
||||
calib_data["btts"].append((btts_yes_prob, int(actuals["btts_actual"] == "Yes")))
|
||||
|
||||
# Determine league group
|
||||
league_name = str(row.get("league_name", "Other") or "Other")
|
||||
league_group = league_name if league_name in TOP5_LEAGUE_NAMES else "Other"
|
||||
|
||||
records.append({
|
||||
"match_id": match_id,
|
||||
"league_name": league_name,
|
||||
"league_group": league_group,
|
||||
"score_home": score_home,
|
||||
"score_away": score_away,
|
||||
# MS
|
||||
"ms_pick": ms_pick,
|
||||
"ms_actual": actuals["ms_actual"],
|
||||
"ms_conf": ms_conf,
|
||||
"ms_conf_band": confidence_band(ms_conf),
|
||||
"ms_correct": ms_correct,
|
||||
"ms_home_prob": home_prob,
|
||||
"ms_draw_prob": draw_prob,
|
||||
"ms_away_prob": away_prob,
|
||||
# OU15
|
||||
"ou15_pick": ou15_pick,
|
||||
"ou15_actual": actuals["ou15_actual"],
|
||||
"ou15_conf": ou15_conf,
|
||||
"ou15_conf_band": confidence_band(ou15_conf),
|
||||
"ou15_correct": ou15_correct,
|
||||
"ou15_over_prob": over15_prob,
|
||||
# OU25
|
||||
"ou25_pick": ou25_pick,
|
||||
"ou25_actual": actuals["ou25_actual"],
|
||||
"ou25_conf": ou25_conf,
|
||||
"ou25_conf_band": confidence_band(ou25_conf),
|
||||
"ou25_correct": ou25_correct,
|
||||
"ou25_over_prob": over25_prob,
|
||||
# BTTS
|
||||
"btts_pick": btts_pick,
|
||||
"btts_actual": actuals["btts_actual"],
|
||||
"btts_conf": btts_conf,
|
||||
"btts_conf_band": confidence_band(btts_conf),
|
||||
"btts_correct": btts_correct,
|
||||
"btts_yes_prob": btts_yes_prob,
|
||||
})
|
||||
|
||||
processed += 1
|
||||
if processed % PROGRESS_INTERVAL == 0:
|
||||
elapsed = time.time() - t_start
|
||||
print(f" [Progress] {processed}/{len(matches_df)} matches | "
|
||||
f"skipped={skipped} | elapsed={elapsed:.1f}s")
|
||||
|
||||
print(f"\n[Loop] Done. Processed={processed}, Skipped={skipped}")
|
||||
|
||||
if not records:
|
||||
print("[ERROR] No records to analyze. Exiting.")
|
||||
return
|
||||
|
||||
results_df = pd.DataFrame(records)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step 5 — Accuracy report
|
||||
# ------------------------------------------------------------------
|
||||
print("\n" + "=" * 70)
|
||||
print(" ACCURACY REPORT")
|
||||
print("=" * 70)
|
||||
|
||||
markets = [
|
||||
("MS", "ms_correct", "ms_conf", "ms_conf_band", "ms_pick"),
|
||||
("OU15", "ou15_correct", "ou15_conf", "ou15_conf_band", "ou15_pick"),
|
||||
("OU25", "ou25_correct", "ou25_conf", "ou25_conf_band", "ou25_pick"),
|
||||
("BTTS", "btts_correct", "btts_conf", "btts_conf_band", "btts_pick"),
|
||||
]
|
||||
|
||||
summary: Dict[str, Any] = {
|
||||
"generated_at": datetime.utcnow().isoformat(),
|
||||
"matches_processed": processed,
|
||||
"matches_skipped": skipped,
|
||||
"markets": {},
|
||||
}
|
||||
|
||||
for market_label, correct_col, conf_col, band_col, pick_col in markets:
|
||||
print(f"\n--- {market_label} ---")
|
||||
sub = results_df[[correct_col, conf_col, band_col, pick_col, "league_group"]].copy()
|
||||
total = len(sub)
|
||||
overall_acc = sub[correct_col].mean() * 100
|
||||
print(f" Overall accuracy: {overall_acc:.1f}% ({sub[correct_col].sum()}/{total})")
|
||||
|
||||
market_summary = {
|
||||
"overall_accuracy": round(overall_acc, 2),
|
||||
"total_matches": total,
|
||||
"by_confidence_band": {},
|
||||
"by_league": {},
|
||||
"by_pick_direction": {},
|
||||
}
|
||||
|
||||
# By confidence band
|
||||
print(f" By confidence band:")
|
||||
bands = ["<50%", "50-65%", "65-75%", "75%+"]
|
||||
for band in bands:
|
||||
mask = sub[band_col] == band
|
||||
n = mask.sum()
|
||||
if n > 0:
|
||||
acc = sub.loc[mask, correct_col].mean() * 100
|
||||
mean_conf = sub.loc[mask, conf_col].mean() * 100
|
||||
print(f" {band:8s}: {acc:5.1f}% acc | {n:4d} matches | "
|
||||
f"mean_conf={mean_conf:.1f}%")
|
||||
market_summary["by_confidence_band"][band] = {
|
||||
"accuracy": round(acc, 2),
|
||||
"count": int(n),
|
||||
"mean_confidence": round(mean_conf, 2),
|
||||
}
|
||||
|
||||
# By league group
|
||||
print(f" By league:")
|
||||
league_groups = list(results_df["league_group"].unique())
|
||||
# Sort: named leagues first, then Other
|
||||
named = sorted([g for g in league_groups if g != "Other"])
|
||||
ordered = named + (["Other"] if "Other" in league_groups else [])
|
||||
for lg in ordered:
|
||||
mask = sub["league_group"] == lg
|
||||
n = mask.sum()
|
||||
if n > 0:
|
||||
acc = sub.loc[mask, correct_col].mean() * 100
|
||||
print(f" {lg[:20]:20s}: {acc:5.1f}% ({n} matches)")
|
||||
market_summary["by_league"][lg] = {
|
||||
"accuracy": round(acc, 2),
|
||||
"count": int(n),
|
||||
}
|
||||
|
||||
# By pick direction
|
||||
print(f" By pick direction:")
|
||||
for pick_val in sorted(sub[pick_col].unique()):
|
||||
mask = sub[pick_col] == pick_val
|
||||
n = mask.sum()
|
||||
if n > 0:
|
||||
acc = sub.loc[mask, correct_col].mean() * 100
|
||||
mean_conf = sub.loc[mask, conf_col].mean() * 100
|
||||
print(f" {pick_val:8s}: {acc:5.1f}% acc | {n:4d} matches | "
|
||||
f"mean_conf={mean_conf:.1f}%")
|
||||
market_summary["by_pick_direction"][pick_val] = {
|
||||
"accuracy": round(acc, 2),
|
||||
"count": int(n),
|
||||
"mean_confidence": round(mean_conf, 2),
|
||||
}
|
||||
|
||||
summary["markets"][market_label] = market_summary
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step 6 — Train calibration models
|
||||
# ------------------------------------------------------------------
|
||||
print("\n" + "=" * 70)
|
||||
print(" CALIBRATION TRAINING")
|
||||
print("=" * 70)
|
||||
|
||||
calibrator = Calibrator()
|
||||
|
||||
# Market config: market_key -> (label for prob, label for actual binary)
|
||||
calib_market_map = {
|
||||
"ms_home": "ms_home",
|
||||
"ms_draw": "ms_draw",
|
||||
"ms_away": "ms_away",
|
||||
"ou15": "ou15",
|
||||
"ou25": "ou25",
|
||||
"btts": "btts",
|
||||
}
|
||||
|
||||
calibration_results: Dict[str, Dict] = {}
|
||||
|
||||
for market_key in calib_market_map:
|
||||
pairs = calib_data[market_key]
|
||||
if len(pairs) < 100:
|
||||
print(f"[Calib] {market_key}: only {len(pairs)} samples — skipping.")
|
||||
continue
|
||||
|
||||
probs = np.array([p for p, _ in pairs])
|
||||
actuals_bin = np.array([a for _, a in pairs])
|
||||
|
||||
# Build a tiny DataFrame to use Calibrator.train_calibration
|
||||
calib_df = pd.DataFrame({
|
||||
"prob": probs,
|
||||
"actual": actuals_bin,
|
||||
})
|
||||
|
||||
metrics = calibrator.train_calibration(
|
||||
df=calib_df,
|
||||
market=market_key,
|
||||
prob_col="prob",
|
||||
actual_col="actual",
|
||||
min_samples=100,
|
||||
save=True,
|
||||
)
|
||||
calibration_results[market_key] = metrics.to_dict()
|
||||
print(f" [Calib] {market_key}: Brier={metrics.brier_score:.4f} | "
|
||||
f"ECE={metrics.calibration_error:.4f} | n={metrics.sample_count}")
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step 7 — Save results
|
||||
# ------------------------------------------------------------------
|
||||
output_path = os.path.join(REPORTS_DIR, "backtest_results.json")
|
||||
full_report = {
|
||||
**summary,
|
||||
"calibration": calibration_results,
|
||||
"runtime_seconds": round(time.time() - t_start, 1),
|
||||
}
|
||||
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(full_report, f, indent=2)
|
||||
print(f"\n[Step 7] Report saved to {output_path}")
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Final summary table
|
||||
# ------------------------------------------------------------------
|
||||
print("\n" + "=" * 70)
|
||||
print(" FINAL SUMMARY TABLE")
|
||||
print("=" * 70)
|
||||
print(f"{'Market':<8} {'Overall Acc':>12} {'Matches':>8} "
|
||||
f"{'Best Band (acc)':>18}")
|
||||
print("-" * 70)
|
||||
for market_label, _, _, _, _ in markets:
|
||||
ms = summary["markets"].get(market_label, {})
|
||||
overall = ms.get("overall_accuracy", 0)
|
||||
total_m = ms.get("total_matches", 0)
|
||||
bands_d = ms.get("by_confidence_band", {})
|
||||
# Find best accuracy band with >= 50 matches
|
||||
best_band = "-"
|
||||
best_acc = 0.0
|
||||
for band, bdata in bands_d.items():
|
||||
if bdata["count"] >= 50 and bdata["accuracy"] > best_acc:
|
||||
best_acc = bdata["accuracy"]
|
||||
best_band = f"{band} ({best_acc:.1f}%)"
|
||||
print(f"{market_label:<8} {overall:>11.1f}% {total_m:>8d} {best_band:>18s}")
|
||||
|
||||
elapsed_total = time.time() - t_start
|
||||
print(f"\nTotal runtime: {elapsed_total:.1f}s")
|
||||
print("=" * 70)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,459 @@
|
||||
"""
|
||||
League-Specific Model Trainer
|
||||
==============================
|
||||
Trains dedicated XGBoost models + isotonic calibration for each qualified league.
|
||||
|
||||
Tiers:
|
||||
- >=500 FT matches → full XGBoost (12 markets) + calibration
|
||||
- 100-499 matches → isotonic calibration only (over general V25 predictions)
|
||||
- <100 matches → skipped
|
||||
|
||||
Usage:
|
||||
python scripts/train_league_models.py
|
||||
python scripts/train_league_models.py --min-samples 300 # stricter threshold
|
||||
python scripts/train_league_models.py --colab # Colab-friendly output
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import pickle
|
||||
import argparse
|
||||
import time
|
||||
import warnings
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import xgboost as xgb
|
||||
from sklearn.isotonic import IsotonicRegression
|
||||
from sklearn.metrics import accuracy_score, log_loss
|
||||
|
||||
warnings.filterwarnings("ignore")
|
||||
optuna_available = False
|
||||
try:
|
||||
import optuna
|
||||
optuna.logging.set_verbosity(optuna.logging.WARNING)
|
||||
optuna_available = True
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
DATA_PATH = os.path.join(AI_ENGINE_DIR, "data", "training_data.csv")
|
||||
MODELS_DIR = os.path.join(AI_ENGINE_DIR, "models", "league_specific")
|
||||
REPORTS_DIR = os.path.join(AI_ENGINE_DIR, "reports", "league_models")
|
||||
QUALIFIED_LEAGUES_PATH = os.path.join(os.path.dirname(AI_ENGINE_DIR), "qualified_leagues.json")
|
||||
|
||||
os.makedirs(MODELS_DIR, exist_ok=True)
|
||||
os.makedirs(REPORTS_DIR, exist_ok=True)
|
||||
|
||||
# ─── Markets ────────────────────────────────────────────────────────
|
||||
MARKETS = {
|
||||
"MS": {"label": "label_ms", "num_class": 3, "min_samples": 200},
|
||||
"OU15": {"label": "label_ou15", "num_class": 2, "min_samples": 150},
|
||||
"OU25": {"label": "label_ou25", "num_class": 2, "min_samples": 150},
|
||||
"OU35": {"label": "label_ou35", "num_class": 2, "min_samples": 150},
|
||||
"BTTS": {"label": "label_btts", "num_class": 2, "min_samples": 150},
|
||||
"HT": {"label": "label_ht_result", "num_class": 3, "min_samples": 150},
|
||||
"HT_OU05": {"label": "label_ht_ou05", "num_class": 2, "min_samples": 150},
|
||||
"HT_OU15": {"label": "label_ht_ou15", "num_class": 2, "min_samples": 150},
|
||||
"HTFT": {"label": "label_ht_ft", "num_class": 9, "min_samples": 300},
|
||||
"OE": {"label": "label_odd_even", "num_class": 2, "min_samples": 150},
|
||||
"CARDS": {"label": "label_cards_ou45", "num_class": 2, "min_samples": 150},
|
||||
"HANDICAP": {"label": "label_handicap_ms", "num_class": 3, "min_samples": 200},
|
||||
}
|
||||
|
||||
# Feature columns (from training_data.csv, excluding metadata + labels)
|
||||
SKIP_COLS = {
|
||||
"match_id", "home_team_id", "away_team_id", "league_id", "mst_utc",
|
||||
"score_home", "score_away", "total_goals", "ht_score_home", "ht_score_away",
|
||||
"ht_total_goals",
|
||||
"label_ms", "label_ou05", "label_ou15", "label_ou25", "label_ou35",
|
||||
"label_btts", "label_ht_result", "label_ht_ou05", "label_ht_ou15",
|
||||
"label_ht_ft", "label_odd_even", "label_yellow_cards", "label_cards_ou45",
|
||||
"label_handicap_ms",
|
||||
}
|
||||
|
||||
# XGBoost defaults — fast, no Optuna
|
||||
XGB_PARAMS_BINARY = {
|
||||
"objective": "binary:logistic",
|
||||
"eval_metric": "logloss",
|
||||
"max_depth": 4,
|
||||
"eta": 0.05,
|
||||
"subsample": 0.8,
|
||||
"colsample_bytree": 0.8,
|
||||
"min_child_weight": 5,
|
||||
"gamma": 0.1,
|
||||
"reg_lambda": 1.0,
|
||||
"verbosity": 0,
|
||||
"seed": 42,
|
||||
"nthread": -1,
|
||||
}
|
||||
|
||||
XGB_PARAMS_MULTI = {
|
||||
**XGB_PARAMS_BINARY,
|
||||
"objective": "multi:softprob",
|
||||
"eval_metric": "mlogloss",
|
||||
}
|
||||
|
||||
|
||||
def load_data() -> pd.DataFrame:
|
||||
print(f"Loading training data from {DATA_PATH} ...")
|
||||
df = pd.read_csv(DATA_PATH, low_memory=False)
|
||||
print(f" {len(df):,} rows, {len(df.columns)} columns")
|
||||
return df
|
||||
|
||||
|
||||
def get_feature_cols(df: pd.DataFrame) -> list:
|
||||
return [c for c in df.columns if c not in SKIP_COLS]
|
||||
|
||||
|
||||
def load_qualified_leagues() -> list:
|
||||
if os.path.exists(QUALIFIED_LEAGUES_PATH):
|
||||
with open(QUALIFIED_LEAGUES_PATH) as f:
|
||||
return json.load(f)
|
||||
# fallback: all leagues in CSV
|
||||
return []
|
||||
|
||||
|
||||
def train_xgb_market(
|
||||
X_train: np.ndarray,
|
||||
y_train: np.ndarray,
|
||||
X_test: np.ndarray,
|
||||
y_test: np.ndarray,
|
||||
num_class: int,
|
||||
feature_cols: list,
|
||||
) -> tuple:
|
||||
"""Train XGBoost for one market. Returns (model, accuracy, logloss)."""
|
||||
params = dict(XGB_PARAMS_MULTI if num_class > 2 else XGB_PARAMS_BINARY)
|
||||
if num_class > 2:
|
||||
params["num_class"] = num_class
|
||||
|
||||
dtrain = xgb.DMatrix(X_train, label=y_train, feature_names=feature_cols)
|
||||
dtest = xgb.DMatrix(X_test, label=y_test, feature_names=feature_cols)
|
||||
|
||||
model = xgb.train(
|
||||
params,
|
||||
dtrain,
|
||||
num_boost_round=300,
|
||||
evals=[(dtest, "val")],
|
||||
early_stopping_rounds=30,
|
||||
verbose_eval=False,
|
||||
)
|
||||
|
||||
raw = model.predict(dtest)
|
||||
if num_class > 2:
|
||||
probs = raw.reshape(-1, num_class)
|
||||
preds = np.argmax(probs, axis=1)
|
||||
ll = log_loss(y_test, probs)
|
||||
else:
|
||||
preds = (raw >= 0.5).astype(int)
|
||||
ll = log_loss(y_test, raw)
|
||||
|
||||
acc = accuracy_score(y_test, preds)
|
||||
return model, acc, ll
|
||||
|
||||
|
||||
def train_isotonic(raw_probs: np.ndarray, y_true: np.ndarray) -> IsotonicRegression:
|
||||
iso = IsotonicRegression(out_of_bounds="clip")
|
||||
iso.fit(raw_probs, y_true)
|
||||
return iso
|
||||
|
||||
|
||||
def get_general_v25_probs(df_league: pd.DataFrame, feature_cols: list, market: str, num_class: int):
|
||||
"""Use general V25 model to get predictions on this league's matches (for cal-only leagues)."""
|
||||
try:
|
||||
from models.v25_ensemble import get_v25_predictor
|
||||
v25 = get_v25_predictor()
|
||||
if not v25._loaded:
|
||||
v25.load_models()
|
||||
|
||||
label_col = MARKETS[market]["label"]
|
||||
valid = df_league[feature_cols + [label_col]].dropna()
|
||||
if len(valid) < 50:
|
||||
return None, None
|
||||
|
||||
market_key_map = {
|
||||
"MS": "ms", "OU15": "ou15", "OU25": "ou25", "OU35": "ou35",
|
||||
"BTTS": "btts", "HT": "ht_result", "HT_OU05": "ht_ou05",
|
||||
"HT_OU15": "ht_ou15", "HTFT": "htft", "OE": "odd_even",
|
||||
"CARDS": "cards_ou45", "HANDICAP": "handicap_ms",
|
||||
}
|
||||
mkey = market_key_map.get(market)
|
||||
if not mkey or not v25.has_market(mkey):
|
||||
return None, None
|
||||
|
||||
X = valid[feature_cols].fillna(0).values
|
||||
y = valid[label_col].values
|
||||
|
||||
all_probs = []
|
||||
for i in range(0, len(X), 500):
|
||||
batch = X[i:i+500]
|
||||
feat_dict = {col: float(batch[j, k]) for j, row in enumerate(batch) for k, col in enumerate(feature_cols)}
|
||||
# batch predict
|
||||
df_batch = pd.DataFrame(batch, columns=feature_cols)
|
||||
dmat = xgb.DMatrix(df_batch)
|
||||
models = v25.models.get(mkey, {})
|
||||
batch_probs = []
|
||||
if "xgb" in models:
|
||||
p = models["xgb"].predict(dmat)
|
||||
if num_class > 2:
|
||||
p = p.reshape(-1, num_class)
|
||||
batch_probs.append(p)
|
||||
if batch_probs:
|
||||
all_probs.append(np.mean(batch_probs, axis=0))
|
||||
|
||||
if not all_probs:
|
||||
return None, None
|
||||
|
||||
probs = np.vstack(all_probs) if num_class > 2 else np.concatenate(all_probs)
|
||||
return probs, y
|
||||
except Exception as e:
|
||||
return None, None
|
||||
|
||||
|
||||
def process_league(
|
||||
league_id: str,
|
||||
df_league: pd.DataFrame,
|
||||
feature_cols: list,
|
||||
full_model: bool,
|
||||
league_name: str,
|
||||
) -> dict:
|
||||
"""Train models for one league. Returns metrics dict."""
|
||||
n = len(df_league)
|
||||
out_dir = os.path.join(MODELS_DIR, league_id)
|
||||
os.makedirs(out_dir, exist_ok=True)
|
||||
|
||||
metrics = {"league_id": league_id, "league_name": league_name, "n_matches": n, "markets": {}}
|
||||
|
||||
# Time-based split: last 20% as test
|
||||
split_idx = int(n * 0.80)
|
||||
df_sorted = df_league.sort_values("mst_utc")
|
||||
df_train = df_sorted.iloc[:split_idx]
|
||||
df_test = df_sorted.iloc[split_idx:]
|
||||
|
||||
saved_feature_cols = False
|
||||
|
||||
for market, cfg in MARKETS.items():
|
||||
label_col = cfg["label"]
|
||||
num_class = cfg["num_class"]
|
||||
min_samp = cfg["min_samples"]
|
||||
|
||||
if label_col not in df_league.columns:
|
||||
continue
|
||||
|
||||
valid_train = df_train[feature_cols + [label_col]].dropna()
|
||||
valid_test = df_test[feature_cols + [label_col]].dropna()
|
||||
|
||||
if len(valid_train) < min_samp or len(valid_test) < 30:
|
||||
continue
|
||||
|
||||
X_train = valid_train[feature_cols].fillna(0).values
|
||||
y_train = valid_train[label_col].values.astype(int)
|
||||
X_test = valid_test[feature_cols].fillna(0).values
|
||||
y_test = valid_test[label_col].values.astype(int)
|
||||
|
||||
mkt_metrics = {"n_train": len(X_train), "n_test": len(X_test)}
|
||||
|
||||
if full_model:
|
||||
try:
|
||||
model, acc, ll = train_xgb_market(X_train, y_train, X_test, y_test, num_class, feature_cols)
|
||||
model_path = os.path.join(out_dir, f"xgb_{market.lower()}.json")
|
||||
model.save_model(model_path)
|
||||
mkt_metrics.update({"accuracy": round(acc, 4), "logloss": round(ll, 4), "model": "xgb"})
|
||||
|
||||
if not saved_feature_cols:
|
||||
with open(os.path.join(out_dir, "feature_cols.json"), "w") as f:
|
||||
json.dump(feature_cols, f)
|
||||
saved_feature_cols = True
|
||||
|
||||
# Isotonic calibration from own model predictions
|
||||
dtest_xgb = xgb.DMatrix(X_test, feature_names=feature_cols)
|
||||
raw = model.predict(dtest_xgb)
|
||||
if num_class > 2:
|
||||
raw = raw.reshape(-1, num_class)
|
||||
for cls_idx in range(num_class):
|
||||
iso = train_isotonic(raw[:, cls_idx], (y_test == cls_idx).astype(int))
|
||||
with open(os.path.join(out_dir, f"cal_{market.lower()}_{cls_idx}.pkl"), "wb") as f:
|
||||
pickle.dump(iso, f)
|
||||
else:
|
||||
iso = train_isotonic(raw, y_test)
|
||||
with open(os.path.join(out_dir, f"cal_{market.lower()}.pkl"), "wb") as f:
|
||||
pickle.dump(iso, f)
|
||||
|
||||
except Exception as e:
|
||||
mkt_metrics["error"] = str(e)
|
||||
else:
|
||||
# Calibration only: use general V25 model
|
||||
try:
|
||||
all_valid = df_league[feature_cols + [label_col]].dropna()
|
||||
if len(all_valid) < min_samp:
|
||||
continue
|
||||
|
||||
X_all = all_valid[feature_cols].fillna(0).values
|
||||
y_all = all_valid[label_col].values.astype(int)
|
||||
|
||||
# Use V25 general model
|
||||
from models.v25_ensemble import get_v25_predictor
|
||||
v25 = get_v25_predictor()
|
||||
if not v25._loaded:
|
||||
v25.load_models()
|
||||
|
||||
market_key_map = {
|
||||
"MS": "ms", "OU15": "ou15", "OU25": "ou25", "OU35": "ou35",
|
||||
"BTTS": "btts", "HT": "ht_result", "HT_OU05": "ht_ou05",
|
||||
"HT_OU15": "ht_ou15", "HTFT": "htft", "OE": "odd_even",
|
||||
"CARDS": "cards_ou45", "HANDICAP": "handicap_ms",
|
||||
}
|
||||
mkey = market_key_map.get(market)
|
||||
if not mkey or not v25.has_market(mkey):
|
||||
continue
|
||||
|
||||
df_feat = pd.DataFrame(X_all, columns=feature_cols)
|
||||
dmat = xgb.DMatrix(df_feat)
|
||||
models_v25 = v25.models.get(mkey, {})
|
||||
if "xgb" not in models_v25:
|
||||
continue
|
||||
raw = models_v25["xgb"].predict(dmat)
|
||||
|
||||
if num_class > 2:
|
||||
raw = raw.reshape(-1, num_class)
|
||||
for cls_idx in range(num_class):
|
||||
iso = train_isotonic(raw[:, cls_idx], (y_all == cls_idx).astype(int))
|
||||
with open(os.path.join(out_dir, f"cal_{market.lower()}_{cls_idx}.pkl"), "wb") as f:
|
||||
pickle.dump(iso, f)
|
||||
else:
|
||||
iso = train_isotonic(raw, y_all)
|
||||
with open(os.path.join(out_dir, f"cal_{market.lower()}.pkl"), "wb") as f:
|
||||
pickle.dump(iso, f)
|
||||
|
||||
mkt_metrics.update({"n_train": len(X_all), "model": "cal_only"})
|
||||
except Exception as e:
|
||||
mkt_metrics["error"] = str(e)
|
||||
|
||||
metrics["markets"][market] = mkt_metrics
|
||||
|
||||
# Save metrics
|
||||
with open(os.path.join(out_dir, "metrics.json"), "w") as f:
|
||||
json.dump(metrics, f, indent=2)
|
||||
|
||||
return metrics
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--min-samples", type=int, default=500, help="Min matches for full model")
|
||||
parser.add_argument("--cal-min", type=int, default=100, help="Min matches for calibration")
|
||||
parser.add_argument("--colab", action="store_true", help="Colab-friendly verbose output")
|
||||
args = parser.parse_args()
|
||||
|
||||
start_total = time.time()
|
||||
|
||||
df = load_data()
|
||||
feature_cols = get_feature_cols(df)
|
||||
print(f"Feature columns: {len(feature_cols)}")
|
||||
|
||||
qualified = load_qualified_leagues()
|
||||
if not qualified:
|
||||
qualified = df["league_id"].unique().tolist()
|
||||
print(f"Qualified leagues: {len(qualified)}")
|
||||
|
||||
# Get league names
|
||||
league_names = {}
|
||||
try:
|
||||
import psycopg2
|
||||
from data.db import get_clean_dsn
|
||||
conn = psycopg2.connect(get_clean_dsn())
|
||||
cur = conn.cursor()
|
||||
cur.execute("SELECT id, name FROM leagues WHERE id = ANY(%s)", (qualified,))
|
||||
league_names = {r[0]: r[1] for r in cur.fetchall()}
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Filter to qualified leagues with enough data
|
||||
counts = df[df["league_id"].isin(qualified)].groupby("league_id").size()
|
||||
full_model_ids = counts[counts >= args.min_samples].index.tolist()
|
||||
cal_only_ids = counts[(counts >= args.cal_min) & (counts < args.min_samples)].index.tolist()
|
||||
|
||||
print(f"\nTam model ({args.min_samples}+ maç): {len(full_model_ids)} lig")
|
||||
print(f"Kalibrasyon ({args.cal_min}-{args.min_samples-1} maç): {len(cal_only_ids)} lig")
|
||||
print(f"Atlandı (<{args.cal_min} maç): {len([l for l in qualified if l not in full_model_ids and l not in cal_only_ids])} lig")
|
||||
print()
|
||||
|
||||
all_results = []
|
||||
total = len(full_model_ids) + len(cal_only_ids)
|
||||
done = 0
|
||||
|
||||
for league_id, full_model in (
|
||||
[(lid, True) for lid in full_model_ids] +
|
||||
[(lid, False) for lid in cal_only_ids]
|
||||
):
|
||||
t0 = time.time()
|
||||
df_league = df[df["league_id"] == league_id].copy()
|
||||
n = len(df_league)
|
||||
name = league_names.get(league_id, league_id[:12])
|
||||
tier = "FULL" if full_model else "CAL"
|
||||
|
||||
try:
|
||||
result = process_league(league_id, df_league, feature_cols, full_model, name)
|
||||
done += 1
|
||||
elapsed = time.time() - t0
|
||||
|
||||
# Build accuracy string for key markets
|
||||
acc_parts = []
|
||||
for mkt in ["MS", "OU15", "OU25", "BTTS"]:
|
||||
m = result["markets"].get(mkt, {})
|
||||
if "accuracy" in m:
|
||||
acc_parts.append(f"{mkt}={m['accuracy']*100:.1f}%")
|
||||
acc_str = " | ".join(acc_parts) if acc_parts else "(cal only)"
|
||||
|
||||
print(f"[{done:>3}/{total}] [{tier}] {name:<35} {n:>6,} maç | {acc_str} | {elapsed:.1f}s")
|
||||
all_results.append(result)
|
||||
|
||||
except Exception as e:
|
||||
done += 1
|
||||
print(f"[{done:>3}/{total}] [{tier}] {name:<35} ERROR: {e}")
|
||||
|
||||
if done % 10 == 0:
|
||||
elapsed_total = time.time() - start_total
|
||||
remaining = (elapsed_total / done) * (total - done)
|
||||
print(f" ── {done}/{total} tamamlandı | geçen: {elapsed_total/60:.1f}dk | kalan tahmini: {remaining/60:.1f}dk ──")
|
||||
|
||||
# Final report
|
||||
total_elapsed = time.time() - start_total
|
||||
print(f"\n{'='*70}")
|
||||
print(f"TAMAMLANDI: {len(all_results)}/{total} lig | Süre: {total_elapsed/60:.1f} dakika")
|
||||
print(f"{'='*70}")
|
||||
|
||||
# Top 20 by accuracy
|
||||
printable = [(r["league_name"], r["n_matches"], r["markets"]) for r in all_results
|
||||
if "MS" in r["markets"] and "accuracy" in r["markets"]["MS"]]
|
||||
printable.sort(key=lambda x: x[2]["MS"].get("accuracy", 0), reverse=True)
|
||||
|
||||
print(f"\n{'Liga':<35} {'Maç':>6} {'MS':>7} {'OU15':>7} {'OU25':>7} {'BTTS':>7}")
|
||||
print("-" * 70)
|
||||
for name, n, mkts in printable[:30]:
|
||||
ms = mkts.get("MS", {}).get("accuracy", 0) * 100
|
||||
ou15 = mkts.get("OU15", {}).get("accuracy", 0) * 100
|
||||
ou25 = mkts.get("OU25", {}).get("accuracy", 0) * 100
|
||||
btts = mkts.get("BTTS", {}).get("accuracy", 0) * 100
|
||||
print(f"{name:<35} {n:>6,} {ms:>6.1f}% {ou15:>6.1f}% {ou25:>6.1f}% {btts:>6.1f}%")
|
||||
|
||||
# Save master report
|
||||
report = {
|
||||
"generated_at": datetime.now().isoformat(),
|
||||
"total_leagues": len(all_results),
|
||||
"elapsed_minutes": round(total_elapsed / 60, 1),
|
||||
"results": all_results,
|
||||
}
|
||||
report_path = os.path.join(REPORTS_DIR, "league_models_report.json")
|
||||
with open(report_path, "w") as f:
|
||||
json.dump(report, f, indent=2)
|
||||
print(f"\nRapor kaydedildi: {report_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,259 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# League-Specific Model Trainer \u2014 Google Colab\n",
|
||||
"164 lig i\u00e7in XGBoost + isotonic kalibrasyon. 12 market.\n",
|
||||
"Modeller Drive'a kaydedilir, `models/league_specific/` klas\u00f6r\u00fcne kopyalan\u0131r.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Mount Drive\n",
|
||||
"from google.colab import drive\n",
|
||||
"drive.mount('/content/drive')\n",
|
||||
"\n",
|
||||
"DRIVE_DIR = '/content/drive/MyDrive/iddaai'\n",
|
||||
"import os\n",
|
||||
"os.makedirs(DRIVE_DIR, exist_ok=True)\n",
|
||||
"print('Drive mounted:', DRIVE_DIR)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# training_data.csv zaten Drive da: /content/drive/MyDrive/iddaai/training_data.csv\n",
|
||||
"# Sadece qualified_leagues.json upload et (iddaai-be/ klas\u00f6r\u00fcnden)\n",
|
||||
"from google.colab import files\n",
|
||||
"import shutil\n",
|
||||
"print(\"qualified_leagues.json dosyasini upload edin\")\n",
|
||||
"uploaded = files.upload()\n",
|
||||
"for fname in uploaded:\n",
|
||||
" shutil.copy(fname, f\"{DRIVE_DIR}/{fname}\")\n",
|
||||
" print(f\"Kaydedildi: {DRIVE_DIR}/{fname}\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Upload training_data.csv and qualified_leagues.json from local machine\n",
|
||||
"from google.colab import files\n",
|
||||
"print('training_data.csv upload edin (ai-engine/data/training_data.csv)')\n",
|
||||
"uploaded = files.upload()\n",
|
||||
"import shutil\n",
|
||||
"for fname in uploaded:\n",
|
||||
" shutil.copy(fname, f'{DRIVE_DIR}/{fname}')\n",
|
||||
" print(f'Saved: {DRIVE_DIR}/{fname}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os, json, pickle, time, warnings\n",
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"import xgboost as xgb\n",
|
||||
"from sklearn.isotonic import IsotonicRegression\n",
|
||||
"from sklearn.metrics import accuracy_score, log_loss\n",
|
||||
"warnings.filterwarnings('ignore')\n",
|
||||
"\n",
|
||||
"DRIVE_DIR = '/content/drive/MyDrive/iddaai'\n",
|
||||
"DATA_PATH = f'{DRIVE_DIR}/training_data.csv'\n",
|
||||
"QL_PATH = f'{DRIVE_DIR}/qualified_leagues.json'\n",
|
||||
"MODELS_DIR = f'{DRIVE_DIR}/league_specific'\n",
|
||||
"os.makedirs(MODELS_DIR, exist_ok=True)\n",
|
||||
"\n",
|
||||
"MARKETS = {\n",
|
||||
" 'MS': {'label': 'label_ms', 'num_class': 3, 'min_samples': 200},\n",
|
||||
" 'OU15': {'label': 'label_ou15', 'num_class': 2, 'min_samples': 150},\n",
|
||||
" 'OU25': {'label': 'label_ou25', 'num_class': 2, 'min_samples': 150},\n",
|
||||
" 'OU35': {'label': 'label_ou35', 'num_class': 2, 'min_samples': 150},\n",
|
||||
" 'BTTS': {'label': 'label_btts', 'num_class': 2, 'min_samples': 150},\n",
|
||||
" 'HT': {'label': 'label_ht_result', 'num_class': 3, 'min_samples': 150},\n",
|
||||
" 'HT_OU05': {'label': 'label_ht_ou05', 'num_class': 2, 'min_samples': 150},\n",
|
||||
" 'HT_OU15': {'label': 'label_ht_ou15', 'num_class': 2, 'min_samples': 150},\n",
|
||||
" 'HTFT': {'label': 'label_ht_ft', 'num_class': 9, 'min_samples': 300},\n",
|
||||
" 'OE': {'label': 'label_odd_even', 'num_class': 2, 'min_samples': 150},\n",
|
||||
" 'CARDS': {'label': 'label_cards_ou45', 'num_class': 2, 'min_samples': 150},\n",
|
||||
" 'HANDICAP': {'label': 'label_handicap_ms', 'num_class': 3, 'min_samples': 200},\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"SKIP_COLS = {\n",
|
||||
" 'match_id','home_team_id','away_team_id','league_id','mst_utc',\n",
|
||||
" 'score_home','score_away','total_goals','ht_score_home','ht_score_away','ht_total_goals',\n",
|
||||
" 'label_ms','label_ou05','label_ou15','label_ou25','label_ou35','label_btts',\n",
|
||||
" 'label_ht_result','label_ht_ou05','label_ht_ou15','label_ht_ft',\n",
|
||||
" 'label_odd_even','label_yellow_cards','label_cards_ou45','label_handicap_ms',\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"XGB_BASE = {\n",
|
||||
" 'max_depth': 4, 'eta': 0.05, 'subsample': 0.8,\n",
|
||||
" 'colsample_bytree': 0.8, 'min_child_weight': 5,\n",
|
||||
" 'gamma': 0.1, 'reg_lambda': 1.0, 'verbosity': 0, 'seed': 42,\n",
|
||||
" 'nthread': -1,\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"df = pd.read_csv(DATA_PATH, low_memory=False)\n",
|
||||
"feature_cols = [c for c in df.columns if c not in SKIP_COLS]\n",
|
||||
"print(f'Y\u00fcklendi: {len(df):,} sat\u0131r | {len(feature_cols)} feature')\n",
|
||||
"\n",
|
||||
"qualified = json.load(open(QL_PATH)) if os.path.exists(QL_PATH) else df['league_id'].unique().tolist()\n",
|
||||
"counts = df[df['league_id'].isin(qualified)].groupby('league_id').size()\n",
|
||||
"full_ids = counts[counts >= 500].index.tolist()\n",
|
||||
"cal_ids = counts[(counts >= 100) & (counts < 500)].index.tolist()\n",
|
||||
"print(f'Tam model: {len(full_ids)} | Kalibrasyon: {len(cal_ids)} | Toplam: {len(full_ids)+len(cal_ids)}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def train_one_league(league_id, df_league, feature_cols, full_model):\n",
|
||||
" n = len(df_league)\n",
|
||||
" out_dir = f'{MODELS_DIR}/{league_id}'\n",
|
||||
" os.makedirs(out_dir, exist_ok=True)\n",
|
||||
" metrics = {}\n",
|
||||
"\n",
|
||||
" df_sorted = df_league.sort_values('mst_utc')\n",
|
||||
" split = int(n * 0.80)\n",
|
||||
" df_tr, df_te = df_sorted.iloc[:split], df_sorted.iloc[split:]\n",
|
||||
"\n",
|
||||
" saved_fc = False\n",
|
||||
"\n",
|
||||
" for market, cfg in MARKETS.items():\n",
|
||||
" lbl, nc, ms = cfg['label'], cfg['num_class'], cfg['min_samples']\n",
|
||||
" if lbl not in df_league.columns:\n",
|
||||
" continue\n",
|
||||
"\n",
|
||||
" if full_model:\n",
|
||||
" vtr = df_tr[feature_cols + [lbl]].dropna()\n",
|
||||
" vte = df_te[feature_cols + [lbl]].dropna()\n",
|
||||
" if len(vtr) < ms or len(vte) < 30:\n",
|
||||
" continue\n",
|
||||
" Xtr, ytr = vtr[feature_cols].fillna(0).values, vtr[lbl].values.astype(int)\n",
|
||||
" Xte, yte = vte[feature_cols].fillna(0).values, vte[lbl].values.astype(int)\n",
|
||||
"\n",
|
||||
" params = {**XGB_BASE, 'objective': 'multi:softprob' if nc > 2 else 'binary:logistic',\n",
|
||||
" 'eval_metric': 'mlogloss' if nc > 2 else 'logloss'}\n",
|
||||
" if nc > 2: params['num_class'] = nc\n",
|
||||
"\n",
|
||||
" dtr = xgb.DMatrix(Xtr, label=ytr, feature_names=feature_cols)\n",
|
||||
" dte = xgb.DMatrix(Xte, label=yte, feature_names=feature_cols)\n",
|
||||
" model = xgb.train(params, dtr, 300, [(dte,'v')], early_stopping_rounds=30, verbose_eval=False)\n",
|
||||
" model.save_model(f'{out_dir}/xgb_{market.lower()}.json')\n",
|
||||
"\n",
|
||||
" if not saved_fc:\n",
|
||||
" json.dump(feature_cols, open(f'{out_dir}/feature_cols.json','w'))\n",
|
||||
" saved_fc = True\n",
|
||||
"\n",
|
||||
" raw = model.predict(dte)\n",
|
||||
" if nc > 2:\n",
|
||||
" raw = raw.reshape(-1, nc)\n",
|
||||
" acc = accuracy_score(yte, np.argmax(raw, axis=1))\n",
|
||||
" for ci in range(nc):\n",
|
||||
" iso = IsotonicRegression(out_of_bounds='clip').fit(raw[:,ci], (yte==ci).astype(int))\n",
|
||||
" pickle.dump(iso, open(f'{out_dir}/cal_{market.lower()}_{ci}.pkl','wb'))\n",
|
||||
" else:\n",
|
||||
" acc = accuracy_score(yte, (raw>=0.5).astype(int))\n",
|
||||
" iso = IsotonicRegression(out_of_bounds='clip').fit(raw, yte)\n",
|
||||
" pickle.dump(iso, open(f'{out_dir}/cal_{market.lower()}.pkl','wb'))\n",
|
||||
"\n",
|
||||
" metrics[market] = {'accuracy': round(float(acc),4), 'n_train': len(Xtr)}\n",
|
||||
" else:\n",
|
||||
" # Cal only \u2014 store empty placeholder so prediction knows to use general V25\n",
|
||||
" metrics[market] = {'model': 'cal_only', 'n': n}\n",
|
||||
"\n",
|
||||
" json.dump({'league_id': league_id, 'n': n, 'markets': metrics},\n",
|
||||
" open(f'{out_dir}/metrics.json','w'), indent=2)\n",
|
||||
" return metrics\n",
|
||||
"\n",
|
||||
"start = time.time()\n",
|
||||
"all_ids = [(lid, True) for lid in full_ids] + [(lid, False) for lid in cal_ids]\n",
|
||||
"results = []\n",
|
||||
"\n",
|
||||
"for i, (lid, full) in enumerate(all_ids, 1):\n",
|
||||
" dfl = df[df['league_id'] == lid].copy()\n",
|
||||
" t0 = time.time()\n",
|
||||
" try:\n",
|
||||
" mkt_res = train_one_league(lid, dfl, feature_cols, full)\n",
|
||||
" ms_acc = mkt_res.get('MS', {}).get('accuracy', '-')\n",
|
||||
" results.append((lid, len(dfl), mkt_res))\n",
|
||||
" print(f'[{i:>3}/{len(all_ids)}] {lid[:20]:<20} n={len(dfl):>5,} MS={ms_acc} {time.time()-t0:.1f}s')\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f'[{i:>3}/{len(all_ids)}] {lid[:20]:<20} ERROR: {e}')\n",
|
||||
"\n",
|
||||
" if i % 20 == 0:\n",
|
||||
" el = time.time()-start\n",
|
||||
" print(f' \u2500\u2500 {i}/{len(all_ids)} done | {el/60:.1f}min elapsed | ~{el/i*(len(all_ids)-i)/60:.1f}min left \u2500\u2500')\n",
|
||||
"\n",
|
||||
"print(f'\\nBitti! {len(results)} lig | {(time.time()-start)/60:.1f} dakika')\n",
|
||||
"print(f'Modeller: {MODELS_DIR}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Sonu\u00e7lar\u0131 g\u00f6ster \u2014 MS accuracy s\u0131ralamas\u0131\n",
|
||||
"printable = [(lid, n, m) for lid, n, m in results if 'MS' in m and 'accuracy' in m['MS']]\n",
|
||||
"printable.sort(key=lambda x: x[2]['MS']['accuracy'], reverse=True)\n",
|
||||
"print(f'{\"Liga ID\":<30} {\"Ma\u00e7\":>6} {\"MS\":>7} {\"OU15\":>7} {\"OU25\":>7} {\"BTTS\":>7}')\n",
|
||||
"print('-'*70)\n",
|
||||
"for lid, n, m in printable[:30]:\n",
|
||||
" ms = m.get('MS', {}).get('accuracy', 0)*100\n",
|
||||
" ou15 = m.get('OU15',{}).get('accuracy', 0)*100\n",
|
||||
" ou25 = m.get('OU25',{}).get('accuracy', 0)*100\n",
|
||||
" btts = m.get('BTTS',{}).get('accuracy', 0)*100\n",
|
||||
" print(f'{lid:<30} {n:>6,} {ms:>6.1f}% {ou15:>6.1f}% {ou25:>6.1f}% {btts:>6.1f}%')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Zip ve indir\n",
|
||||
"import shutil\n",
|
||||
"zip_path = f'{DRIVE_DIR}/league_specific_models.zip'\n",
|
||||
"shutil.make_archive(zip_path.replace('.zip',''), 'zip', MODELS_DIR)\n",
|
||||
"print(f'Zip: {zip_path}')\n",
|
||||
"# \u0130ndirmek i\u00e7in:\n",
|
||||
"# from google.colab import files\n",
|
||||
"# files.download(zip_path)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.10.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -0,0 +1,108 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# HÜCRE 1 — Paketler\n",
|
||||
"!pip install xgboost lightgbm optuna scikit-learn pandas numpy -q\n",
|
||||
"print('Hazır')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# HÜCRE 2 — Drive bağla + CSV çek\n",
|
||||
"from google.colab import drive\n",
|
||||
"import os, shutil\n",
|
||||
"drive.mount('/content/drive')\n",
|
||||
"\n",
|
||||
"# training_data.csv'yi Drive'ın iddaai klasöründen kopyala\n",
|
||||
"shutil.copy('/content/drive/MyDrive/iddaai/training_data.csv', '/content/training_data.csv')\n",
|
||||
"print('CSV hazır:', os.path.getsize('/content/training_data.csv') // 1024 // 1024, 'MB')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# HÜCRE 3 — iddaai_colab3.zip upload et (ai-engine kodları)\n",
|
||||
"from google.colab import files\n",
|
||||
"import zipfile\n",
|
||||
"print('iddaai_colab3.zip dosyasını seç:')\n",
|
||||
"uploaded = files.upload()\n",
|
||||
"with zipfile.ZipFile('iddaai_colab3.zip') as z:\n",
|
||||
" z.extractall('/content')\n",
|
||||
"print('Kod hazır')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# HÜCRE 4 — training_data.csv'yi script'in beklediği yere koy\n",
|
||||
"import os, shutil\n",
|
||||
"os.makedirs('/content/ai-engine/data', exist_ok=True)\n",
|
||||
"shutil.copy('/content/training_data.csv', '/content/ai-engine/data/training_data.csv')\n",
|
||||
"print('Yerleştirildi')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# HÜCRE 5 — Eğitimi başlat (her 5 trial'da bir ilerleme gösterir)\n",
|
||||
"import subprocess, os\n",
|
||||
"\n",
|
||||
"proc = subprocess.Popen(\n",
|
||||
" ['python', 'scripts/train_v25_pro.py'],\n",
|
||||
" stdout=subprocess.PIPE,\n",
|
||||
" stderr=subprocess.STDOUT,\n",
|
||||
" text=True,\n",
|
||||
" cwd='/content/ai-engine',\n",
|
||||
" env={**os.environ, 'PYTHONPATH': '/content/ai-engine'}\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"for line in proc.stdout:\n",
|
||||
" print(line, end='', flush=True)\n",
|
||||
"\n",
|
||||
"proc.wait()\n",
|
||||
"print('\\nEĞİTİM BİTTİ!')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# HÜCRE 6 — Modelleri Drive'a kaydet\n",
|
||||
"import shutil, os\n",
|
||||
"os.makedirs('/content/drive/MyDrive/iddaai/models_v25', exist_ok=True)\n",
|
||||
"shutil.copytree(\n",
|
||||
" '/content/ai-engine/models/v25',\n",
|
||||
" '/content/drive/MyDrive/iddaai/models_v25',\n",
|
||||
" dirs_exist_ok=True\n",
|
||||
")\n",
|
||||
"print('Modeller Drive a kaydedildi: MyDrive/iddaai/models_v25/')"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"},
|
||||
"language_info": {"name": "python", "version": "3.10.0"}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -101,6 +101,32 @@ FEATURES = [
|
||||
"home_top_scorer_form", "away_top_scorer_form",
|
||||
"home_avg_player_exp", "away_avg_player_exp",
|
||||
"home_goals_diversity", "away_goals_diversity",
|
||||
# V27 H2H Expanded (4)
|
||||
"h2h_home_goals_avg", "h2h_away_goals_avg",
|
||||
"h2h_recent_trend", "h2h_venue_advantage",
|
||||
# V27 Rolling Stats (13)
|
||||
"home_rolling5_goals", "home_rolling5_conceded",
|
||||
"home_rolling10_goals", "home_rolling10_conceded",
|
||||
"home_rolling20_goals", "home_rolling20_conceded",
|
||||
"away_rolling5_goals", "away_rolling5_conceded",
|
||||
"away_rolling10_goals", "away_rolling10_conceded",
|
||||
"home_rolling5_cs", "away_rolling5_cs",
|
||||
# V27 Venue Stats (4)
|
||||
"home_venue_goals", "home_venue_conceded",
|
||||
"away_venue_goals", "away_venue_conceded",
|
||||
# V27 Goal Trend (2)
|
||||
"home_goal_trend", "away_goal_trend",
|
||||
# V27 Calendar (5)
|
||||
"home_days_rest", "away_days_rest",
|
||||
"match_month", "is_season_start", "is_season_end",
|
||||
# V27 Interaction (6)
|
||||
"attack_vs_defense_home", "attack_vs_defense_away",
|
||||
"xg_diff", "form_momentum_interaction",
|
||||
"elo_form_consistency", "upset_x_elo_gap",
|
||||
# V27 League Expanded (5)
|
||||
"league_home_win_rate", "league_draw_rate",
|
||||
"league_btts_rate", "league_ou25_rate",
|
||||
"league_reliability_score",
|
||||
]
|
||||
|
||||
MARKET_CONFIGS = [
|
||||
@@ -295,12 +321,18 @@ def train_market(df, target_col, market_name, num_class, n_trials):
|
||||
|
||||
print(f"[INFO] Split: train={len(X_train)} val={len(X_val)} cal={len(X_cal)} test={len(X_test)}")
|
||||
|
||||
def _cb(study, trial):
|
||||
if trial.number % 5 == 0 or trial.number == n_trials - 1:
|
||||
best = study.best_value if study.best_trial else float('inf')
|
||||
print(f" [{trial.number+1:>3}/{n_trials}] loss={trial.value:.4f} | best={best:.4f}", flush=True)
|
||||
|
||||
# ── Phase 1: Optuna XGBoost ──────────────────────────────────
|
||||
print(f"\n[OPTUNA] XGBoost tuning ({n_trials} trials)...")
|
||||
xgb_study = optuna.create_study(direction="minimize", sampler=TPESampler(seed=42))
|
||||
xgb_study.optimize(
|
||||
lambda trial: xgb_objective(trial, X_train, y_train, X_val, y_val, num_class),
|
||||
n_trials=n_trials,
|
||||
callbacks=[_cb],
|
||||
)
|
||||
xgb_best = xgb_study.best_params
|
||||
print(f"[OK] XGB best logloss: {xgb_study.best_value:.4f}")
|
||||
@@ -311,6 +343,7 @@ def train_market(df, target_col, market_name, num_class, n_trials):
|
||||
lgb_study.optimize(
|
||||
lambda trial: lgb_objective(trial, X_train, y_train, X_val, y_val, num_class),
|
||||
n_trials=n_trials,
|
||||
callbacks=[_cb],
|
||||
)
|
||||
lgb_best = lgb_study.best_params
|
||||
print(f"[OK] LGB best logloss: {lgb_study.best_value:.4f}")
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -19,11 +19,26 @@ class BettingBrain:
|
||||
SOFT_DIVERGENCE = 0.14
|
||||
EXTREME_MODEL_PROB = 0.85
|
||||
EXTREME_GAP = 0.30
|
||||
# Vetoes that is_value_sniper bypasses (does NOT bypass odds_below_minimum)
|
||||
SNIPER_BYPASSABLE_VETOES = {"calibrated_confidence_too_low", "play_score_too_low"}
|
||||
# Trap market: market implied probability massively exceeds historical band hit rate
|
||||
SNIPER_BYPASSABLE_VETOES = {"play_score_too_low"}
|
||||
TRAP_MARKET_GAP = 0.10
|
||||
|
||||
MARKET_MIN_CONFIDENCE = {
|
||||
"MS": 45.0,
|
||||
"DC": 55.0,
|
||||
"OU25": 48.0,
|
||||
"OU15": 55.0,
|
||||
"OU35": 42.0,
|
||||
"BTTS": 48.0,
|
||||
"HT": 55.0,
|
||||
"HTFT": 65.0,
|
||||
"OE": 55.0,
|
||||
"CARDS": 50.0,
|
||||
"HT_OU05": 55.0,
|
||||
"HT_OU15": 50.0,
|
||||
}
|
||||
|
||||
SNIPER_BLOCKED_MARKETS = {"HT", "HTFT", "OE", "CARDS", "HT_OU05", "HT_OU15"}
|
||||
|
||||
MARKET_PRIORS = {
|
||||
"DC": 4.0,
|
||||
"OU15": 3.0,
|
||||
@@ -31,10 +46,10 @@ class BettingBrain:
|
||||
"BTTS": 0.0,
|
||||
"MS": -2.0,
|
||||
"OU35": -2.0,
|
||||
"HT": -6.0,
|
||||
"HTFT": -12.0,
|
||||
"CARDS": -5.0,
|
||||
"OE": -8.0,
|
||||
"HT": -10.0,
|
||||
"HTFT": -18.0,
|
||||
"CARDS": -8.0,
|
||||
"OE": -12.0,
|
||||
}
|
||||
|
||||
def judge(self, package: Dict[str, Any]) -> Dict[str, Any]:
|
||||
@@ -182,8 +197,10 @@ class BettingBrain:
|
||||
issues.append("base_model_not_playable")
|
||||
|
||||
is_value_sniper = bool(row.get("is_value_sniper"))
|
||||
if market in self.SNIPER_BLOCKED_MARKETS:
|
||||
is_value_sniper = False
|
||||
if is_value_sniper:
|
||||
score += 35.0
|
||||
score += 20.0
|
||||
positives.append("value_sniper_override")
|
||||
|
||||
score += max(0.0, min(20.0, calibrated_conf * 0.22))
|
||||
@@ -197,9 +214,31 @@ class BettingBrain:
|
||||
risk = str((package.get("risk") or {}).get("level") or "MEDIUM").upper()
|
||||
score += {"LOW": 5.0, "MEDIUM": 0.0, "HIGH": -12.0, "EXTREME": -22.0}.get(risk, -4.0)
|
||||
|
||||
# League reliability penalty: weak leagues produce unreliable raw probabilities.
|
||||
# odds_reliability is pre-computed per-league from historical Brier score analysis.
|
||||
odds_rel = self._safe_float(row.get("odds_reliability"), 0.35) or 0.35
|
||||
if odds_rel < 0.30:
|
||||
score -= 22.0
|
||||
issues.append("very_low_reliability_league")
|
||||
if market in {"MS", "DC", "OU25", "BTTS"} and not is_value_sniper:
|
||||
vetoes.append("low_reliability_league_hard_block")
|
||||
elif odds_rel < 0.45:
|
||||
score -= 12.0
|
||||
issues.append("low_reliability_league")
|
||||
elif odds_rel < 0.55:
|
||||
score -= 5.0
|
||||
|
||||
# Inferred features penalty: when ELO/form/H2H come from live enrichment
|
||||
# (not pre-computed table), statistical quality is unknown — penalise hard.
|
||||
dq_flags = list(data_quality.get("flags") or [])
|
||||
if "ai_features_inferred_from_history" in dq_flags:
|
||||
score -= 18.0
|
||||
issues.append("inferred_statistical_features")
|
||||
|
||||
if odds < self.MIN_ODDS:
|
||||
vetoes.append("odds_below_minimum")
|
||||
if calibrated_conf < 38.0 and not is_value_sniper:
|
||||
min_conf = self.MARKET_MIN_CONFIDENCE.get(market, 45.0)
|
||||
if calibrated_conf < min_conf:
|
||||
vetoes.append("calibrated_confidence_too_low")
|
||||
if play_score < 50.0 and not is_value_sniper:
|
||||
vetoes.append("play_score_too_low")
|
||||
@@ -270,7 +309,7 @@ class BettingBrain:
|
||||
score -= 24.0
|
||||
vetoes.append("extreme_probability_without_evidence")
|
||||
|
||||
if market in {"HT", "HTFT", "OE"} and score < 86.0 and not is_value_sniper:
|
||||
if market in {"HT", "HTFT", "OE"} and score < 86.0:
|
||||
vetoes.append("volatile_market_requires_exceptional_evidence")
|
||||
|
||||
# Sniper override: bypass eligible vetoes when value sniper triggered
|
||||
|
||||
@@ -62,7 +62,7 @@ def generate_match_commentary(package: Dict[str, Any]) -> Dict[str, Any]:
|
||||
)
|
||||
|
||||
# ── Quick notes ───────────────────────────────────────────────
|
||||
notes = _build_notes(market_board, v27_engine, score_pred, risk, home, away)
|
||||
notes = _build_notes(market_board, v27_engine, score_pred, risk, home, away, league_name=match_info.get("league", ""))
|
||||
|
||||
# ── Contradiction detection ───────────────────────────────────
|
||||
contradictions = _detect_contradictions(market_board, v27_engine, package)
|
||||
@@ -206,11 +206,17 @@ def _build_notes(
|
||||
risk: Dict[str, Any],
|
||||
home: str,
|
||||
away: str,
|
||||
league_name: str = "",
|
||||
) -> List[str]:
|
||||
notes: List[str] = []
|
||||
triple_value = v27_engine.get("triple_value") or {}
|
||||
odds_band = v27_engine.get("odds_band") or {}
|
||||
|
||||
# Cup game note — model uses league statistics; cup dynamics differ
|
||||
_cup_kws = ("kupa", "cup", "coupe", "copa", "pokal", "ziraat", "trophy", "shield", "super cup", "süper kupa")
|
||||
if any(kw in (league_name or "").lower() for kw in _cup_kws):
|
||||
notes.append("⚠️ Kupa maçı: ev avantajı zayıf, rotasyon ve düşük motivasyon riski var")
|
||||
|
||||
# MS note
|
||||
ms = market_board.get("MS") or {}
|
||||
ms_conf = float(ms.get("confidence", 0) or 0)
|
||||
|
||||
@@ -0,0 +1,28 @@
|
||||
"""Orchestrator package — mixin modules split from the original 5786-line
|
||||
monolithic SingleMatchOrchestrator. Behaviour is identical to the pre-refactor
|
||||
version; only file layout has changed.
|
||||
"""
|
||||
|
||||
from services.orchestrator.data_loader import DataLoaderMixin
|
||||
from services.orchestrator.feature_builder import FeatureBuilderMixin
|
||||
from services.orchestrator.prediction import PredictionMixin
|
||||
from services.orchestrator.basketball import BasketballMixin
|
||||
from services.orchestrator.upper_brain import UpperBrainMixin
|
||||
from services.orchestrator.htms import HtmsMixin
|
||||
from services.orchestrator.coupon import CouponMixin
|
||||
from services.orchestrator.reversal import ReversalMixin
|
||||
from services.orchestrator.market_board import MarketBoardMixin
|
||||
from services.orchestrator.utils import UtilsMixin
|
||||
|
||||
__all__ = [
|
||||
"DataLoaderMixin",
|
||||
"FeatureBuilderMixin",
|
||||
"PredictionMixin",
|
||||
"BasketballMixin",
|
||||
"UpperBrainMixin",
|
||||
"HtmsMixin",
|
||||
"CouponMixin",
|
||||
"ReversalMixin",
|
||||
"MarketBoardMixin",
|
||||
"UtilsMixin",
|
||||
]
|
||||
@@ -0,0 +1,538 @@
|
||||
"""Basketball Mixin — basketball-specific market construction.
|
||||
|
||||
Auto-extracted mixin module — split from services/single_match_orchestrator.py.
|
||||
All methods here are composed into SingleMatchOrchestrator via inheritance.
|
||||
`self` attributes (self.dsn, self.enrichment, self.v25_predictor, etc.) are
|
||||
initialised in the main __init__.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import math
|
||||
import os
|
||||
import pickle
|
||||
from collections import defaultdict
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple, overload
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
from data.db import get_clean_dsn
|
||||
from schemas.prediction import FullMatchPrediction
|
||||
from schemas.match_data import MatchData
|
||||
from models.v25_ensemble import V25Predictor, get_v25_predictor
|
||||
try:
|
||||
from models.v27_predictor import V27Predictor, compute_divergence, compute_value_edge
|
||||
except ImportError:
|
||||
class V27Predictor: # type: ignore[no-redef]
|
||||
def __init__(self): self.models = {}
|
||||
def load_models(self): return False
|
||||
def predict_all(self, features): return {}
|
||||
def compute_divergence(*args, **kwargs):
|
||||
return {}
|
||||
def compute_value_edge(*args, **kwargs):
|
||||
return {}
|
||||
from features.odds_band_analyzer import OddsBandAnalyzer
|
||||
try:
|
||||
from models.basketball_v25 import (
|
||||
BasketballMatchPrediction,
|
||||
get_basketball_v25_predictor,
|
||||
)
|
||||
except ImportError:
|
||||
BasketballMatchPrediction = Any # type: ignore[misc]
|
||||
def get_basketball_v25_predictor() -> Any:
|
||||
raise ImportError("Basketball predictor is not available")
|
||||
from core.engines.player_predictor import PlayerPrediction, get_player_predictor
|
||||
from services.feature_enrichment import FeatureEnrichmentService
|
||||
from services.betting_brain import BettingBrain
|
||||
from services.v26_shadow_engine import V26ShadowEngine, get_v26_shadow_engine
|
||||
from services.match_commentary import generate_match_commentary
|
||||
from utils.top_leagues import load_top_league_ids
|
||||
from utils.league_reliability import load_league_reliability
|
||||
from config.config_loader import build_threshold_dict, get_threshold_default
|
||||
from models.calibration import get_calibrator
|
||||
|
||||
|
||||
class BasketballMixin:
|
||||
def _build_basketball_prediction_package(
|
||||
self,
|
||||
data: MatchData,
|
||||
prediction: Dict[str, Any],
|
||||
) -> Dict[str, Any]:
|
||||
quality = self._compute_data_quality(data)
|
||||
|
||||
raw_market_rows = self._build_basketball_market_rows(data, prediction)
|
||||
market_rows = [
|
||||
self._decorate_basketball_market_row(data, prediction, quality, row)
|
||||
for row in raw_market_rows
|
||||
]
|
||||
market_rows.sort(
|
||||
key=lambda row: (
|
||||
1 if row.get("playable") else 0,
|
||||
float(row.get("play_score", 0.0)),
|
||||
),
|
||||
reverse=True,
|
||||
)
|
||||
|
||||
playable_rows = [row for row in market_rows if row.get("playable")]
|
||||
|
||||
MIN_ODDS = 1.30
|
||||
playable_with_odds = [
|
||||
row for row in playable_rows
|
||||
if float(row.get("odds", 0.0)) >= MIN_ODDS
|
||||
]
|
||||
|
||||
if playable_with_odds:
|
||||
playable_with_odds.sort(
|
||||
key=lambda r: (
|
||||
float(r.get("ev_edge", 0.0)),
|
||||
float(r.get("play_score", 0.0)),
|
||||
),
|
||||
reverse=True,
|
||||
)
|
||||
main_pick = playable_with_odds[0]
|
||||
main_pick["is_guaranteed"] = False
|
||||
main_pick["pick_reason"] = "positive_ev_pick"
|
||||
else:
|
||||
fallback_with_odds = [r for r in market_rows if float(r.get("odds", 0.0)) > 1.0]
|
||||
fallback_with_odds.sort(key=lambda r: float(r.get("play_score", 0.0)), reverse=True)
|
||||
main_pick = fallback_with_odds[0] if fallback_with_odds else (market_rows[0] if market_rows else None)
|
||||
if main_pick:
|
||||
main_pick["is_guaranteed"] = False
|
||||
main_pick["playable"] = False
|
||||
main_pick["stake_units"] = 0.0
|
||||
main_pick["bet_grade"] = "PASS"
|
||||
main_pick["pick_reason"] = "no_playable_value_found"
|
||||
|
||||
supporting: List[Dict[str, Any]] = []
|
||||
for row in market_rows:
|
||||
if main_pick and row["market"] == main_pick["market"] and row["pick"] == main_pick["pick"]:
|
||||
continue
|
||||
supporting.append(row)
|
||||
supporting = supporting[:5]
|
||||
|
||||
bet_summary = [self._to_bet_summary_item(row) for row in market_rows]
|
||||
scenarios = self._build_basketball_scenarios(prediction)
|
||||
reasons = self._build_basketball_reasoning_factors(data, prediction, quality)
|
||||
|
||||
aggressive_pick: Optional[Dict[str, Any]] = None
|
||||
risk_level = prediction.get("risk_level", "MEDIUM")
|
||||
risk_score = float(prediction.get("risk_score", 50.0) or 50.0)
|
||||
|
||||
# Build aggressive pick if available from Spreak in market_board
|
||||
board = prediction.get("market_board", {})
|
||||
if risk_level in ("LOW", "MEDIUM") and "Spread" in board:
|
||||
spr_data = board["Spread"]
|
||||
probs = list(spr_data.values())
|
||||
keys = list(spr_data.keys())
|
||||
if len(probs) >= 2:
|
||||
prob_a = float(str(probs[0]).replace('%', '')) / 100.0
|
||||
prob_h = float(str(probs[1]).replace('%', '')) / 100.0
|
||||
max_prob = max(prob_a, prob_h)
|
||||
|
||||
spr_pick = "Home" if prob_h >= prob_a else "Away"
|
||||
|
||||
conf = 50.0
|
||||
line_str = "Spread"
|
||||
for b in prediction.get("bet_summary", []):
|
||||
if b["market"] == "Spread":
|
||||
conf = float(b["confidence"])
|
||||
line_str = b["pick"]
|
||||
|
||||
aggressive_pick = {
|
||||
"market": "SPREAD",
|
||||
"pick": line_str,
|
||||
"probability": round(max_prob, 4),
|
||||
"confidence": round(conf, 1),
|
||||
"odds": round(
|
||||
float(
|
||||
data.odds_data.get(
|
||||
"spread_h" if spr_pick == "Home" else "spread_a", 0.0
|
||||
)
|
||||
),
|
||||
2,
|
||||
),
|
||||
}
|
||||
|
||||
scores = prediction.get("score_prediction", {})
|
||||
home_score = scores.get("home_expected", 80.0)
|
||||
away_score = scores.get("away_expected", 80.0)
|
||||
total_score = scores.get("total_expected", 160.0)
|
||||
|
||||
mb_out = {
|
||||
"PLAYER_TOP": board.get("PLAYER_TOP", []),
|
||||
}
|
||||
|
||||
if "ML" in board:
|
||||
ml_data = board["ML"]
|
||||
keys = list(ml_data.keys())
|
||||
if len(keys) >= 2:
|
||||
mb_out["ML"] = {
|
||||
"pick": prediction.get("main_pick", ""),
|
||||
"confidence": 60.0,
|
||||
"probs": {
|
||||
"1": round(float(str(ml_data[keys[0]]).replace('%', '')) / 100.0, 4),
|
||||
"2": round(float(str(ml_data[keys[1]]).replace('%', '')) / 100.0, 4),
|
||||
},
|
||||
}
|
||||
|
||||
if "Totals" in board:
|
||||
tot_data = board["Totals"]
|
||||
keys = list(tot_data.keys())
|
||||
if len(keys) >= 2:
|
||||
mb_out["TOTAL"] = {
|
||||
"line": 160.5,
|
||||
"pick": prediction.get("main_pick", ""),
|
||||
"confidence": 60.0,
|
||||
"probs": {
|
||||
"under": round(float(str(tot_data[keys[0]]).replace('%', '')) / 100.0, 4),
|
||||
"over": round(float(str(tot_data[keys[1]]).replace('%', '')) / 100.0, 4),
|
||||
},
|
||||
}
|
||||
|
||||
if "Spread" in board:
|
||||
spr_data = board["Spread"]
|
||||
keys = list(spr_data.keys())
|
||||
if len(keys) >= 2:
|
||||
mb_out["SPREAD"] = {
|
||||
"line_home": 0.0,
|
||||
"pick": prediction.get("main_pick", ""),
|
||||
"confidence": 60.0,
|
||||
"probs": {
|
||||
"away_cover": round(float(str(spr_data[keys[0]]).replace('%', '')) / 100.0, 4),
|
||||
"home_cover": round(float(str(spr_data[keys[1]]).replace('%', '')) / 100.0, 4),
|
||||
},
|
||||
}
|
||||
|
||||
return {
|
||||
"model_version": str(prediction.get("engine_version") or "v28.main.basketball"),
|
||||
"match_info": {
|
||||
"match_id": data.match_id,
|
||||
"match_name": f"{data.home_team_name} vs {data.away_team_name}",
|
||||
"home_team": data.home_team_name,
|
||||
"away_team": data.away_team_name,
|
||||
"league": data.league_name,
|
||||
"match_date_ms": data.match_date_ms,
|
||||
"sport": data.sport,
|
||||
},
|
||||
"data_quality": quality,
|
||||
"risk": {
|
||||
"level": risk_level,
|
||||
"score": round(risk_score, 1),
|
||||
"is_surprise_risk": False,
|
||||
"surprise_type": "",
|
||||
"warnings": [],
|
||||
},
|
||||
"engine_breakdown": prediction.get("engine_breakdown")
|
||||
or {
|
||||
"team": 60.0,
|
||||
"player": 60.0,
|
||||
"odds": 80.0,
|
||||
"referee": 50.0,
|
||||
},
|
||||
"main_pick": main_pick,
|
||||
"bet_advice": {
|
||||
"playable": bool(main_pick and main_pick.get("playable")),
|
||||
"suggested_stake_units": float(main_pick.get("stake_units", 0.0))
|
||||
if (main_pick and main_pick.get("playable"))
|
||||
else 0.0,
|
||||
"reason": "playable_pick_found"
|
||||
if (main_pick and main_pick.get("playable"))
|
||||
else "no_bet_conditions_met",
|
||||
},
|
||||
"bet_summary": bet_summary,
|
||||
"supporting_picks": supporting,
|
||||
"aggressive_pick": aggressive_pick,
|
||||
"scenario_top5": scenarios,
|
||||
"score_prediction": {
|
||||
"ft": f"{int(round(home_score))}-{int(round(away_score))}",
|
||||
"ht": f"{int(round(home_score * 0.52))}-{int(round(away_score * 0.52))}",
|
||||
"xg_home": round(float(home_score), 2),
|
||||
"xg_away": round(float(away_score), 2),
|
||||
"xg_total": round(float(total_score), 2),
|
||||
},
|
||||
"market_board": mb_out,
|
||||
"reasoning_factors": reasons,
|
||||
}
|
||||
|
||||
def _build_basketball_market_rows(
|
||||
self,
|
||||
data: MatchData,
|
||||
pred: Dict[str, Any],
|
||||
) -> List[Dict[str, Any]]:
|
||||
odds = data.odds_data
|
||||
|
||||
market_board = pred.get("market_board", {})
|
||||
|
||||
# 1. Moneyline
|
||||
ml_row = None
|
||||
if "ML" in market_board:
|
||||
ml_data = market_board["ML"]
|
||||
# To get specific pick (MS 1 or MS 2), look at the probability values
|
||||
probs = list(ml_data.values())
|
||||
keys = list(ml_data.keys())
|
||||
if len(probs) >= 2:
|
||||
prob_1 = float(str(probs[0]).replace('%', '')) / 100.0
|
||||
prob_2 = float(str(probs[1]).replace('%', '')) / 100.0
|
||||
max_prob = max(prob_1, prob_2)
|
||||
|
||||
# Derive pick string
|
||||
ml_pick_val = keys[0] if prob_1 >= prob_2 else keys[1]
|
||||
ml_pick = "1" if "1" in ml_pick_val else "2"
|
||||
ml_odd_key = "ml_h" if ml_pick == "1" else "ml_a"
|
||||
|
||||
# Find confidence from bet summary
|
||||
conf = 50.0
|
||||
for b in pred.get("bet_summary", []):
|
||||
if b["market"] == "Moneyline": conf = float(b["confidence"])
|
||||
|
||||
ml_row = {
|
||||
"market": "ML",
|
||||
"pick": ml_pick,
|
||||
"probability": round(max_prob, 4),
|
||||
"confidence": round(conf, 1),
|
||||
"odds": round(float(odds.get(ml_odd_key, 0.0)), 2),
|
||||
}
|
||||
|
||||
# 2. Totals
|
||||
tot_row = None
|
||||
if "Totals" in market_board:
|
||||
tot_data = market_board["Totals"]
|
||||
probs = list(tot_data.values())
|
||||
keys = list(tot_data.keys())
|
||||
if len(probs) >= 2:
|
||||
prob_u = float(str(probs[0]).replace('%', '')) / 100.0
|
||||
prob_o = float(str(probs[1]).replace('%', '')) / 100.0
|
||||
max_prob = max(prob_u, prob_o)
|
||||
|
||||
pick_str = keys[1] if prob_o >= prob_u else keys[0]
|
||||
tot_pick = "Over" if "Over" in pick_str else "Under"
|
||||
line_val = pick_str.replace("Over", "").replace("Under", "").strip()
|
||||
|
||||
conf = 50.0
|
||||
for b in pred.get("bet_summary", []):
|
||||
if b["market"] == "Totals": conf = float(b["confidence"])
|
||||
|
||||
tot_row = {
|
||||
"market": "TOTAL",
|
||||
"pick": f"{tot_pick} {line_val}",
|
||||
"probability": round(max_prob, 4),
|
||||
"confidence": round(conf, 1),
|
||||
"odds": round(float(odds.get("tot_o" if tot_pick == "Over" else "tot_u", 0.0)), 2),
|
||||
}
|
||||
|
||||
# 3. Spread
|
||||
spr_row = None
|
||||
if "Spread" in market_board:
|
||||
spr_data = market_board["Spread"]
|
||||
probs = list(spr_data.values())
|
||||
keys = list(spr_data.keys())
|
||||
if len(probs) >= 2:
|
||||
prob_a = float(str(probs[0]).replace('%', '')) / 100.0
|
||||
prob_h = float(str(probs[1]).replace('%', '')) / 100.0
|
||||
max_prob = max(prob_a, prob_h)
|
||||
|
||||
spr_pick = "Home" if prob_h >= prob_a else "Away"
|
||||
|
||||
conf = 50.0
|
||||
line_str = ""
|
||||
for b in pred.get("bet_summary", []):
|
||||
if b["market"] == "Spread":
|
||||
conf = float(b["confidence"])
|
||||
line_str = b["pick"]
|
||||
|
||||
spr_row = {
|
||||
"market": "SPREAD",
|
||||
"pick": spr_pick + " " + line_str,
|
||||
"probability": round(max_prob, 4),
|
||||
"confidence": round(conf, 1),
|
||||
"odds": round(float(odds.get("spread_h" if spr_pick == "Home" else "spread_a", 0.0)), 2),
|
||||
}
|
||||
|
||||
# Return valid rows
|
||||
rows = []
|
||||
if ml_row: rows.append(ml_row)
|
||||
if tot_row: rows.append(tot_row)
|
||||
if spr_row: rows.append(spr_row)
|
||||
return rows
|
||||
|
||||
def _decorate_basketball_market_row(
|
||||
self,
|
||||
data: MatchData,
|
||||
prediction: Dict[str, Any],
|
||||
quality: Dict[str, Any],
|
||||
row: Dict[str, Any],
|
||||
) -> Dict[str, Any]:
|
||||
market = str(row.get("market") or "")
|
||||
raw_conf = float(row.get("confidence") or 0.0)
|
||||
prob = float(row.get("probability") or 0.0)
|
||||
odd = float(row.get("odds") or 0.0)
|
||||
|
||||
calibration = {"ML": 0.90, "TOTAL": 0.88, "SPREAD": 0.86}.get(market, 0.88)
|
||||
min_conf = {"ML": 55.0, "TOTAL": 56.0, "SPREAD": 55.0}.get(market, 55.0)
|
||||
|
||||
calibrated_conf = max(1.0, min(99.0, raw_conf * calibration))
|
||||
implied_prob = (1.0 / odd) if odd > 1.0 else 0.0
|
||||
edge = prob - implied_prob if implied_prob > 0 else 0.0
|
||||
|
||||
risk_level = str(prediction.get("risk_level", "MEDIUM")).upper()
|
||||
risk_penalty = {"LOW": 0.0, "MEDIUM": 3.0, "HIGH": 8.0, "EXTREME": 12.0}.get(
|
||||
risk_level,
|
||||
4.0,
|
||||
)
|
||||
quality_label = str(quality.get("label") or "MEDIUM").upper()
|
||||
quality_penalty = {"HIGH": 0.0, "MEDIUM": 2.0, "LOW": 6.0}.get(
|
||||
quality_label,
|
||||
4.0,
|
||||
)
|
||||
|
||||
base_score = calibrated_conf + (edge * 100.0)
|
||||
play_score = max(0.0, min(100.0, base_score - risk_penalty - quality_penalty))
|
||||
|
||||
reasons: List[str] = []
|
||||
playable = True
|
||||
|
||||
min_play_score = self.market_min_play_score.get(market, 68.0)
|
||||
min_edge = self.market_min_edge.get(market, 0.02)
|
||||
|
||||
if calibrated_conf < min_conf:
|
||||
playable = False
|
||||
reasons.append("below_calibrated_conf_threshold")
|
||||
if market in self.ODDS_REQUIRED_MARKETS and odd <= 1.01:
|
||||
playable = False
|
||||
reasons.append("market_odds_missing")
|
||||
if risk_level in ("HIGH", "EXTREME") and quality_label == "LOW":
|
||||
playable = False
|
||||
reasons.append("high_risk_low_data_quality")
|
||||
if odd > 1.0 and edge < -0.05:
|
||||
playable = False
|
||||
reasons.append("negative_model_edge")
|
||||
|
||||
if not reasons:
|
||||
reasons.append("market_passed_all_gates")
|
||||
|
||||
if not playable:
|
||||
grade = "PASS"
|
||||
stake_units = 0.0
|
||||
elif play_score >= 72:
|
||||
grade = "A"
|
||||
stake_units = 1.0
|
||||
elif play_score >= 61:
|
||||
grade = "B"
|
||||
stake_units = 0.5
|
||||
else:
|
||||
grade = "C"
|
||||
stake_units = 0.25
|
||||
|
||||
out = dict(row)
|
||||
out.update(
|
||||
{
|
||||
"raw_confidence": round(raw_conf, 1),
|
||||
"calibrated_confidence": round(calibrated_conf, 1),
|
||||
"min_required_confidence": round(min_conf, 1),
|
||||
"edge": round(edge, 4),
|
||||
"play_score": round(play_score, 1),
|
||||
"playable": playable,
|
||||
"bet_grade": grade,
|
||||
"stake_units": stake_units,
|
||||
"decision_reasons": reasons[:3],
|
||||
},
|
||||
)
|
||||
return out
|
||||
|
||||
def _build_basketball_scenarios(
|
||||
self,
|
||||
prediction: Dict[str, Any],
|
||||
) -> List[Dict[str, Any]]:
|
||||
scores = prediction.get("score_prediction", {})
|
||||
home = float(scores.get("home_expected", 80.0))
|
||||
away = float(scores.get("away_expected", 80.0))
|
||||
templates = [
|
||||
(0.00, 0.23),
|
||||
(+3.5, 0.20),
|
||||
(-3.5, 0.19),
|
||||
(+6.0, 0.16),
|
||||
(-6.0, 0.14),
|
||||
]
|
||||
out: List[Dict[str, Any]] = []
|
||||
for delta, prob in templates:
|
||||
h = int(round(home + delta))
|
||||
a = int(round(away - delta))
|
||||
out.append({"score": f"{h}-{a}", "prob": prob})
|
||||
return out
|
||||
|
||||
def _build_basketball_reasoning_factors(
|
||||
self,
|
||||
data: MatchData,
|
||||
prediction: Dict[str, Any],
|
||||
quality: Dict[str, Any],
|
||||
) -> List[str]:
|
||||
factors: List[str] = []
|
||||
|
||||
# XGBoost models are odds-aware, weight it heavily
|
||||
factors.append("market_signal_dominant")
|
||||
|
||||
if quality.get("label") in ("HIGH", "MEDIUM"):
|
||||
factors.append("player_form_signal_strong")
|
||||
else:
|
||||
factors.append("player_form_signal_limited")
|
||||
|
||||
if prediction.get("is_surprise_risk"):
|
||||
factors.append("upset_risk_detected")
|
||||
if quality.get("label") == "LOW":
|
||||
factors.append("limited_data_confidence")
|
||||
|
||||
factors.append("basketball_points_model")
|
||||
return factors
|
||||
|
||||
def _compute_basketball_data_quality(self, data: MatchData) -> Dict[str, Any]:
|
||||
flags: List[str] = []
|
||||
|
||||
has_ml = float(data.odds_data.get("ml_h", 0.0)) > 1.0 and float(data.odds_data.get("ml_a", 0.0)) > 1.0
|
||||
has_total = (
|
||||
float(data.odds_data.get("tot_line", 0.0)) > 0.0
|
||||
and float(data.odds_data.get("tot_o", 0.0)) > 1.0
|
||||
and float(data.odds_data.get("tot_u", 0.0)) > 1.0
|
||||
)
|
||||
has_spread = (
|
||||
"spread_home_line" in data.odds_data
|
||||
and float(data.odds_data.get("spread_h", 0.0)) > 1.0
|
||||
and float(data.odds_data.get("spread_a", 0.0)) > 1.0
|
||||
)
|
||||
|
||||
odds_components = [has_ml, has_total, has_spread]
|
||||
odds_score = sum(1.0 for x in odds_components if x) / 3.0
|
||||
if not has_ml:
|
||||
flags.append("missing_moneyline_odds")
|
||||
if not has_total:
|
||||
flags.append("missing_total_odds")
|
||||
if not has_spread:
|
||||
flags.append("missing_spread_odds")
|
||||
|
||||
# Basketball live lineup/referee coverage is structurally lower in this project.
|
||||
# Keep neutral baseline and rely mostly on odds depth.
|
||||
lineup_score = 0.7
|
||||
ref_score = 0.7
|
||||
|
||||
total_score = (odds_score * 0.75) + (lineup_score * 0.15) + (ref_score * 0.10)
|
||||
if total_score >= 0.75:
|
||||
label = "HIGH"
|
||||
elif total_score >= 0.52:
|
||||
label = "MEDIUM"
|
||||
else:
|
||||
label = "LOW"
|
||||
|
||||
return {
|
||||
"label": label,
|
||||
"score": round(total_score, 3),
|
||||
"home_lineup_count": len(data.home_lineup or []),
|
||||
"away_lineup_count": len(data.away_lineup or []),
|
||||
"lineup_source": data.lineup_source,
|
||||
"flags": flags,
|
||||
}
|
||||
@@ -0,0 +1,444 @@
|
||||
"""Coupon Mixin — multi-match coupon builder + daily bankers.
|
||||
|
||||
Auto-extracted mixin module — split from services/single_match_orchestrator.py.
|
||||
All methods here are composed into SingleMatchOrchestrator via inheritance.
|
||||
`self` attributes (self.dsn, self.enrichment, self.v25_predictor, etc.) are
|
||||
initialised in the main __init__.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import math
|
||||
import os
|
||||
import pickle
|
||||
from collections import defaultdict
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple, overload
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
from data.db import get_clean_dsn
|
||||
from schemas.prediction import FullMatchPrediction
|
||||
from schemas.match_data import MatchData
|
||||
from models.v25_ensemble import V25Predictor, get_v25_predictor
|
||||
try:
|
||||
from models.v27_predictor import V27Predictor, compute_divergence, compute_value_edge
|
||||
except ImportError:
|
||||
class V27Predictor: # type: ignore[no-redef]
|
||||
def __init__(self): self.models = {}
|
||||
def load_models(self): return False
|
||||
def predict_all(self, features): return {}
|
||||
def compute_divergence(*args, **kwargs):
|
||||
return {}
|
||||
def compute_value_edge(*args, **kwargs):
|
||||
return {}
|
||||
from features.odds_band_analyzer import OddsBandAnalyzer
|
||||
try:
|
||||
from models.basketball_v25 import (
|
||||
BasketballMatchPrediction,
|
||||
get_basketball_v25_predictor,
|
||||
)
|
||||
except ImportError:
|
||||
BasketballMatchPrediction = Any # type: ignore[misc]
|
||||
def get_basketball_v25_predictor() -> Any:
|
||||
raise ImportError("Basketball predictor is not available")
|
||||
from core.engines.player_predictor import PlayerPrediction, get_player_predictor
|
||||
from services.feature_enrichment import FeatureEnrichmentService
|
||||
from services.betting_brain import BettingBrain
|
||||
from services.v26_shadow_engine import V26ShadowEngine, get_v26_shadow_engine
|
||||
from services.match_commentary import generate_match_commentary
|
||||
from utils.top_leagues import load_top_league_ids
|
||||
from utils.league_reliability import load_league_reliability
|
||||
from config.config_loader import build_threshold_dict, get_threshold_default
|
||||
from models.calibration import get_calibrator
|
||||
|
||||
|
||||
class CouponMixin:
|
||||
def build_coupon(
|
||||
self,
|
||||
match_ids: List[str],
|
||||
strategy: str = "BALANCED",
|
||||
max_matches: Optional[int] = None,
|
||||
min_confidence: Optional[float] = None,
|
||||
) -> Dict[str, Any]:
|
||||
strategy_name = (strategy or "BALANCED").upper()
|
||||
|
||||
strategy_config = {
|
||||
"SAFE": {"max_matches": 4, "min_conf": 66.0},
|
||||
"BALANCED": {"max_matches": 5, "min_conf": 58.0},
|
||||
"AGGRESSIVE": {"max_matches": 8, "min_conf": 52.0},
|
||||
"VALUE": {"max_matches": 8, "min_conf": 48.0},
|
||||
"MIRACLE": {"max_matches": 10, "min_conf": 44.0},
|
||||
}
|
||||
cfg = strategy_config.get(strategy_name, strategy_config["BALANCED"])
|
||||
max_allowed = max_matches if max_matches is not None else cfg["max_matches"]
|
||||
min_conf = min_confidence if min_confidence is not None else cfg["min_conf"]
|
||||
|
||||
candidates: List[Dict[str, Any]] = []
|
||||
rejected: List[Dict[str, Any]] = []
|
||||
|
||||
for match_id in match_ids:
|
||||
package = self.analyze_match(match_id)
|
||||
if not package:
|
||||
rejected.append({"match_id": match_id, "reason": "match_not_found"})
|
||||
continue
|
||||
|
||||
risk_level = str(package.get("risk", {}).get("level", "MEDIUM")).upper()
|
||||
data_quality = str(package.get("data_quality", {}).get("label", "MEDIUM")).upper()
|
||||
match_candidates: List[Dict[str, Any]] = []
|
||||
seen_keys: Set[Tuple[str, str]] = set()
|
||||
bet_summary = package.get("bet_summary") or []
|
||||
|
||||
raw_picks = []
|
||||
for candidate in [
|
||||
package.get("main_pick"),
|
||||
package.get("value_pick"),
|
||||
*(package.get("supporting_picks") or []),
|
||||
]:
|
||||
if isinstance(candidate, dict):
|
||||
raw_picks.append(candidate)
|
||||
for candidate in bet_summary:
|
||||
if isinstance(candidate, dict):
|
||||
raw_picks.append(candidate)
|
||||
|
||||
for candidate in raw_picks:
|
||||
market = str(candidate.get("market") or "")
|
||||
pick = str(candidate.get("pick") or "")
|
||||
if not market or not pick:
|
||||
continue
|
||||
|
||||
dedupe_key = (market, pick)
|
||||
if dedupe_key in seen_keys:
|
||||
continue
|
||||
seen_keys.add(dedupe_key)
|
||||
|
||||
calibrated_conf = float(
|
||||
candidate.get("calibrated_confidence", candidate.get("confidence", 0.0))
|
||||
or 0.0
|
||||
)
|
||||
odds = float(candidate.get("odds", 0.0) or 0.0)
|
||||
probability = float(candidate.get("probability", 0.0) or 0.0)
|
||||
play_score = float(candidate.get("play_score", 0.0) or 0.0)
|
||||
ev_edge = float(
|
||||
candidate.get("ev_edge", candidate.get("edge", 0.0)) or 0.0
|
||||
)
|
||||
playable = bool(candidate.get("playable"))
|
||||
bet_grade = str(candidate.get("bet_grade", "PASS")).upper()
|
||||
|
||||
if odds <= 1.01:
|
||||
continue
|
||||
|
||||
strict_candidate = (
|
||||
playable
|
||||
and calibrated_conf >= min_conf
|
||||
and bet_grade != "PASS"
|
||||
)
|
||||
|
||||
if strategy_name == "SAFE":
|
||||
strict_pass = strict_candidate
|
||||
if odds > 2.35 or play_score < 60.0 or risk_level in {"HIGH", "EXTREME"}:
|
||||
strict_pass = False
|
||||
if data_quality == "LOW" or ev_edge < 0.01 or bet_grade == "PASS":
|
||||
strict_pass = False
|
||||
strict_score = (
|
||||
calibrated_conf * 1.10
|
||||
+ play_score * 0.90
|
||||
+ (ev_edge * 180.0)
|
||||
- abs(odds - 1.55) * 12.0
|
||||
)
|
||||
soft_pass = (
|
||||
calibrated_conf >= max(min_conf - 10.0, 56.0)
|
||||
and odds <= 2.70
|
||||
and play_score >= 50.0
|
||||
and risk_level != "EXTREME"
|
||||
and data_quality != "LOW"
|
||||
and ev_edge >= -0.01
|
||||
)
|
||||
soft_score = (
|
||||
calibrated_conf
|
||||
+ play_score * 0.85
|
||||
+ (ev_edge * 140.0)
|
||||
- abs(odds - 1.65) * 9.0
|
||||
)
|
||||
elif strategy_name == "BALANCED":
|
||||
strict_pass = strict_candidate
|
||||
if odds > 3.40 or play_score < 52.0 or risk_level == "EXTREME":
|
||||
strict_pass = False
|
||||
if ev_edge < 0.0 or bet_grade == "PASS":
|
||||
strict_pass = False
|
||||
strict_score = (
|
||||
calibrated_conf
|
||||
+ play_score
|
||||
+ (ev_edge * 220.0)
|
||||
+ min(odds, 3.0) * 3.0
|
||||
)
|
||||
soft_pass = (
|
||||
calibrated_conf >= max(min_conf - 10.0, 48.0)
|
||||
and odds <= 4.20
|
||||
and play_score >= 44.0
|
||||
and risk_level != "EXTREME"
|
||||
and ev_edge >= -0.015
|
||||
)
|
||||
soft_score = (
|
||||
calibrated_conf * 0.95
|
||||
+ play_score * 0.90
|
||||
+ (ev_edge * 180.0)
|
||||
+ min(odds, 3.5) * 3.5
|
||||
)
|
||||
elif strategy_name == "AGGRESSIVE":
|
||||
strict_pass = strict_candidate
|
||||
if odds < 1.35 or odds > 7.50 or play_score < 46.0:
|
||||
strict_pass = False
|
||||
if risk_level == "EXTREME" or bet_grade == "PASS":
|
||||
strict_pass = False
|
||||
strict_score = (
|
||||
calibrated_conf * 0.85
|
||||
+ play_score * 0.75
|
||||
+ (ev_edge * 260.0)
|
||||
+ min(odds, 6.0) * 7.0
|
||||
)
|
||||
soft_pass = (
|
||||
calibrated_conf >= max(min_conf - 10.0, 42.0)
|
||||
and 1.25 <= odds <= 8.50
|
||||
and play_score >= 40.0
|
||||
and risk_level != "EXTREME"
|
||||
and ev_edge >= -0.02
|
||||
)
|
||||
soft_score = (
|
||||
calibrated_conf * 0.80
|
||||
+ play_score * 0.70
|
||||
+ (ev_edge * 210.0)
|
||||
+ min(odds, 7.0) * 7.5
|
||||
)
|
||||
elif strategy_name == "VALUE":
|
||||
strict_pass = strict_candidate
|
||||
if odds < 1.55 or play_score < 48.0 or ev_edge < 0.03:
|
||||
strict_pass = False
|
||||
if risk_level == "EXTREME" or data_quality == "LOW" or bet_grade == "PASS":
|
||||
strict_pass = False
|
||||
strict_score = (
|
||||
calibrated_conf * 0.75
|
||||
+ play_score * 0.85
|
||||
+ (ev_edge * 320.0)
|
||||
+ min(odds, 6.5) * 8.0
|
||||
)
|
||||
soft_pass = (
|
||||
calibrated_conf >= max(min_conf - 10.0, 40.0)
|
||||
and odds >= 1.35
|
||||
and play_score >= 40.0
|
||||
and risk_level != "EXTREME"
|
||||
and data_quality != "LOW"
|
||||
and ev_edge >= 0.0
|
||||
)
|
||||
soft_score = (
|
||||
calibrated_conf * 0.70
|
||||
+ play_score * 0.80
|
||||
+ (ev_edge * 260.0)
|
||||
+ min(odds, 7.0) * 7.0
|
||||
)
|
||||
else: # MIRACLE
|
||||
strict_pass = strict_candidate
|
||||
if odds < 2.10 or play_score < 40.0 or ev_edge < 0.01:
|
||||
strict_pass = False
|
||||
if risk_level == "EXTREME" or bet_grade == "PASS":
|
||||
strict_pass = False
|
||||
strict_score = (
|
||||
calibrated_conf * 0.55
|
||||
+ play_score * 0.60
|
||||
+ (ev_edge * 260.0)
|
||||
+ min(odds, 10.0) * 10.0
|
||||
)
|
||||
soft_pass = (
|
||||
calibrated_conf >= max(min_conf - 10.0, 36.0)
|
||||
and odds >= 1.60
|
||||
and play_score >= 34.0
|
||||
and risk_level != "EXTREME"
|
||||
and ev_edge >= -0.02
|
||||
)
|
||||
soft_score = (
|
||||
calibrated_conf * 0.50
|
||||
+ play_score * 0.55
|
||||
+ (ev_edge * 200.0)
|
||||
+ min(odds, 10.0) * 9.0
|
||||
)
|
||||
|
||||
fallback_pass = (
|
||||
calibrated_conf >= max(min_conf - 14.0, 34.0)
|
||||
and odds >= 1.20
|
||||
and play_score >= 32.0
|
||||
and risk_level != "EXTREME"
|
||||
)
|
||||
fallback_score = (
|
||||
calibrated_conf * 0.60
|
||||
+ play_score * 0.65
|
||||
+ (ev_edge * 120.0)
|
||||
+ min(odds, 6.0) * 4.0
|
||||
)
|
||||
|
||||
strategy_score = strict_score
|
||||
selection_mode = "strict"
|
||||
if strict_pass:
|
||||
pass
|
||||
elif soft_pass:
|
||||
strategy_score = soft_score
|
||||
selection_mode = "soft"
|
||||
elif fallback_pass:
|
||||
strategy_score = fallback_score
|
||||
selection_mode = "fallback"
|
||||
else:
|
||||
continue
|
||||
|
||||
match_candidates.append(
|
||||
{
|
||||
"match_id": package["match_info"]["match_id"],
|
||||
"match_name": package["match_info"]["match_name"],
|
||||
"market": market,
|
||||
"pick": pick,
|
||||
"probability": probability,
|
||||
"confidence": calibrated_conf,
|
||||
"odds": odds,
|
||||
"risk_level": risk_level,
|
||||
"data_quality": data_quality,
|
||||
"bet_grade": bet_grade,
|
||||
"playable": playable,
|
||||
"play_score": round(play_score, 1),
|
||||
"ev_edge": round(ev_edge, 4),
|
||||
"selection_mode": selection_mode,
|
||||
"strategy_score": round(strategy_score, 3),
|
||||
}
|
||||
)
|
||||
|
||||
if not match_candidates:
|
||||
rejected.append(
|
||||
{
|
||||
"match_id": match_id,
|
||||
"reason": "no_strategy_fit",
|
||||
"threshold": min_conf,
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
match_candidates.sort(
|
||||
key=lambda item: (
|
||||
float(item.get("strategy_score", 0.0)),
|
||||
float(item.get("confidence", 0.0)),
|
||||
float(item.get("ev_edge", 0.0)),
|
||||
),
|
||||
reverse=True,
|
||||
)
|
||||
candidates.append(match_candidates[0])
|
||||
|
||||
candidates.sort(
|
||||
key=lambda item: (
|
||||
float(item.get("strategy_score", 0.0)),
|
||||
float(item.get("confidence", 0.0)),
|
||||
float(item.get("ev_edge", 0.0)),
|
||||
),
|
||||
reverse=True,
|
||||
)
|
||||
selected = candidates[: max(1, max_allowed)]
|
||||
|
||||
total_odds = 1.0
|
||||
win_probability = 1.0
|
||||
for pick in selected:
|
||||
odd = float(pick.get("odds") or 1.0)
|
||||
prob = float(pick.get("probability") or 0.0)
|
||||
total_odds *= odd if odd > 1.0 else 1.0
|
||||
win_probability *= prob
|
||||
|
||||
return {
|
||||
"strategy": strategy_name,
|
||||
"generated_at": __import__("datetime").datetime.utcnow().isoformat() + "Z",
|
||||
"match_count": len(selected),
|
||||
"bets": selected,
|
||||
"total_odds": round(total_odds, 2),
|
||||
"expected_win_rate": round(win_probability, 4),
|
||||
"rejected_matches": rejected,
|
||||
}
|
||||
|
||||
def get_daily_bankers_live(self, count: int = 3) -> List[Dict[str, Any]]:
|
||||
with psycopg2.connect(self.dsn) as conn:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT id
|
||||
FROM live_matches
|
||||
WHERE mst_utc > EXTRACT(EPOCH FROM NOW()) * 1000
|
||||
AND mst_utc < EXTRACT(EPOCH FROM NOW() + INTERVAL '24 hours') * 1000
|
||||
ORDER BY mst_utc ASC
|
||||
LIMIT 60
|
||||
""",
|
||||
)
|
||||
ids = [row["id"] for row in cur.fetchall()]
|
||||
|
||||
if not ids:
|
||||
return []
|
||||
|
||||
coupon = self.build_coupon(
|
||||
match_ids=ids,
|
||||
strategy="SAFE",
|
||||
max_matches=max(1, count),
|
||||
min_confidence=78.0,
|
||||
)
|
||||
return coupon.get("bets", [])[: max(1, count)]
|
||||
|
||||
def get_daily_bankers(self, count: int = 3) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Identifies the safest, highest value bets for the next 24 hours.
|
||||
"""
|
||||
now_ms = int(time.time() * 1000)
|
||||
horizon_ms = now_ms + (24 * 60 * 60 * 1000)
|
||||
|
||||
with psycopg2.connect(self.dsn) as conn:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
cur.execute("""
|
||||
SELECT m.id, m.match_name, m.mst_utc
|
||||
FROM matches m
|
||||
WHERE m.mst_utc >= %s AND m.mst_utc <= %s
|
||||
AND m.status = 'NS'
|
||||
AND EXISTS (SELECT 1 FROM odd_categories oc WHERE oc.match_id = m.id)
|
||||
ORDER BY m.mst_utc ASC
|
||||
LIMIT 50
|
||||
""", (now_ms, horizon_ms))
|
||||
matches = cur.fetchall()
|
||||
|
||||
potential_bankers = []
|
||||
print(f"🔍 Scanning {len(matches)} upcoming matches for Bankers...")
|
||||
|
||||
for match in matches:
|
||||
try:
|
||||
data = self._load_match_data(match['id'])
|
||||
if data is None: continue
|
||||
|
||||
result = self.analyze_match(match['id'])
|
||||
|
||||
if result and 'main_pick' in result:
|
||||
pick = result['main_pick']
|
||||
conf = pick.get('calibrated_confidence', pick.get('confidence', 0))
|
||||
odds = pick.get('odds', 0)
|
||||
market = pick.get('market', '')
|
||||
pick_name = pick.get('pick', '')
|
||||
|
||||
# Banker Criteria: High Confidence (>75%) AND Decent Odds (>1.30)
|
||||
if conf >= 75.0 and odds >= 1.30:
|
||||
score = conf * (odds - 1.0)
|
||||
potential_bankers.append({
|
||||
"match_id": match['id'],
|
||||
"match_name": match['match_name'] or f"{data.home_team_name} vs {data.away_team_name}",
|
||||
"league": data.league_name,
|
||||
"pick": f"{market} - {pick_name}",
|
||||
"confidence": conf,
|
||||
"odds": odds,
|
||||
"value_score": score
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
potential_bankers.sort(key=lambda x: x['value_score'], reverse=True)
|
||||
return potential_bankers[:count]
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,498 @@
|
||||
"""Feature Builder Mixin — V25/V28 feature vector assembly.
|
||||
|
||||
Auto-extracted mixin module — split from services/single_match_orchestrator.py.
|
||||
All methods here are composed into SingleMatchOrchestrator via inheritance.
|
||||
`self` attributes (self.dsn, self.enrichment, self.v25_predictor, etc.) are
|
||||
initialised in the main __init__.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import math
|
||||
import os
|
||||
import pickle
|
||||
from collections import defaultdict
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple, overload
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
from data.db import get_clean_dsn
|
||||
from schemas.prediction import FullMatchPrediction
|
||||
from schemas.match_data import MatchData
|
||||
from models.v25_ensemble import V25Predictor, get_v25_predictor
|
||||
try:
|
||||
from models.v27_predictor import V27Predictor, compute_divergence, compute_value_edge
|
||||
except ImportError:
|
||||
class V27Predictor: # type: ignore[no-redef]
|
||||
def __init__(self): self.models = {}
|
||||
def load_models(self): return False
|
||||
def predict_all(self, features): return {}
|
||||
def compute_divergence(*args, **kwargs):
|
||||
return {}
|
||||
def compute_value_edge(*args, **kwargs):
|
||||
return {}
|
||||
from features.odds_band_analyzer import OddsBandAnalyzer
|
||||
try:
|
||||
from models.basketball_v25 import (
|
||||
BasketballMatchPrediction,
|
||||
get_basketball_v25_predictor,
|
||||
)
|
||||
except ImportError:
|
||||
BasketballMatchPrediction = Any # type: ignore[misc]
|
||||
def get_basketball_v25_predictor() -> Any:
|
||||
raise ImportError("Basketball predictor is not available")
|
||||
from core.engines.player_predictor import PlayerPrediction, get_player_predictor
|
||||
from features.upset_engine import get_upset_engine
|
||||
from services.feature_enrichment import FeatureEnrichmentService
|
||||
from services.betting_brain import BettingBrain
|
||||
from services.v26_shadow_engine import V26ShadowEngine, get_v26_shadow_engine
|
||||
from services.match_commentary import generate_match_commentary
|
||||
from utils.top_leagues import load_top_league_ids
|
||||
from utils.league_reliability import load_league_reliability
|
||||
from config.config_loader import build_threshold_dict, get_threshold_default
|
||||
from models.calibration import get_calibrator
|
||||
|
||||
|
||||
class FeatureBuilderMixin:
|
||||
def _build_v25_features(self, data: MatchData) -> Dict[str, float]:
|
||||
"""
|
||||
Build the single authoritative V25 pre-match feature vector.
|
||||
"""
|
||||
odds = self._sanitize_v25_odds(data.odds_data or {})
|
||||
ms_h = float(odds.get('ms_h') or 0)
|
||||
ms_d = float(odds.get('ms_d') or 0)
|
||||
ms_a = float(odds.get('ms_a') or 0)
|
||||
|
||||
# Implied probabilities (vig-normalised)
|
||||
implied_home, implied_draw, implied_away = 0.33, 0.33, 0.33
|
||||
if ms_h > 0 and ms_d > 0 and ms_a > 0:
|
||||
raw_sum = 1 / ms_h + 1 / ms_d + 1 / ms_a
|
||||
implied_home = (1 / ms_h) / raw_sum
|
||||
implied_draw = (1 / ms_d) / raw_sum
|
||||
implied_away = (1 / ms_a) / raw_sum
|
||||
upset_potential = max(
|
||||
0.0,
|
||||
min(
|
||||
1.0,
|
||||
1.0 - abs(implied_home - implied_away) + (implied_draw * 0.35),
|
||||
),
|
||||
)
|
||||
|
||||
# All enrichment queries in a single DB connection
|
||||
home_elo, away_elo = 1500.0, 1500.0
|
||||
home_venue_elo, away_venue_elo = 1500.0, 1500.0
|
||||
home_form_elo_val, away_form_elo_val = 1500.0, 1500.0
|
||||
enr = self.enrichment
|
||||
# Defaults — overridden by successful queries
|
||||
home_stats = dict(enr._DEFAULT_TEAM_STATS)
|
||||
away_stats = dict(enr._DEFAULT_TEAM_STATS)
|
||||
h2h = dict(enr._DEFAULT_H2H)
|
||||
home_form = dict(enr._DEFAULT_FORM)
|
||||
away_form = dict(enr._DEFAULT_FORM)
|
||||
ref = dict(enr._DEFAULT_REFEREE)
|
||||
league = dict(enr._DEFAULT_LEAGUE)
|
||||
home_momentum, away_momentum = 0.0, 0.0
|
||||
home_rolling = dict(enr._DEFAULT_ROLLING)
|
||||
away_rolling = dict(enr._DEFAULT_ROLLING)
|
||||
home_venue = dict(enr._DEFAULT_VENUE)
|
||||
away_venue = dict(enr._DEFAULT_VENUE)
|
||||
home_rest, away_rest = 7.0, 7.0
|
||||
odds_band_features = {}
|
||||
enrichment_failures = []
|
||||
|
||||
try:
|
||||
with psycopg2.connect(self.dsn) as conn:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
# ELO
|
||||
try:
|
||||
cur.execute(
|
||||
"SELECT home_elo, away_elo, "
|
||||
" home_home_elo, away_away_elo, "
|
||||
" home_form_elo, away_form_elo "
|
||||
"FROM football_ai_features "
|
||||
"WHERE match_id = %s LIMIT 1",
|
||||
(data.match_id,),
|
||||
)
|
||||
elo_row = cur.fetchone()
|
||||
if elo_row:
|
||||
home_elo = float(elo_row.get('home_elo') or 1500.0)
|
||||
away_elo = float(elo_row.get('away_elo') or 1500.0)
|
||||
home_venue_elo = float(elo_row.get('home_home_elo') or home_elo)
|
||||
away_venue_elo = float(elo_row.get('away_away_elo') or away_elo)
|
||||
home_form_elo_val = float(elo_row.get('home_form_elo') or home_elo)
|
||||
away_form_elo_val = float(elo_row.get('away_form_elo') or away_elo)
|
||||
else:
|
||||
cur.execute(
|
||||
"SELECT team_id, overall_elo, home_elo, away_elo, form_elo "
|
||||
"FROM team_elo_ratings WHERE team_id IN (%s, %s)",
|
||||
(data.home_team_id, data.away_team_id),
|
||||
)
|
||||
by_team = {str(r.get("team_id")): r for r in cur.fetchall()}
|
||||
home_row = by_team.get(str(data.home_team_id))
|
||||
away_row = by_team.get(str(data.away_team_id))
|
||||
if home_row:
|
||||
home_elo = float(home_row.get("overall_elo") or 1500.0)
|
||||
home_venue_elo = float(home_row.get("home_elo") or home_elo)
|
||||
home_form_elo_val = float(home_row.get("form_elo") or home_elo)
|
||||
if away_row:
|
||||
away_elo = float(away_row.get("overall_elo") or 1500.0)
|
||||
away_venue_elo = float(away_row.get("away_elo") or away_elo)
|
||||
away_form_elo_val = float(away_row.get("form_elo") or away_elo)
|
||||
setattr(data, "feature_source", "football_ai_features" if elo_row else "live_prematch_enrichment")
|
||||
# Staleness check: both teams at exact 1500 → ELO was never computed
|
||||
if home_elo == 1500.0 and away_elo == 1500.0:
|
||||
enrichment_failures.append("elo_stale:both_teams_at_default_1500")
|
||||
except Exception as e:
|
||||
enrichment_failures.append(f"elo:{e}")
|
||||
setattr(data, "feature_source", "fallback_defaults")
|
||||
|
||||
# Team stats
|
||||
try:
|
||||
home_stats = enr.compute_team_stats(cur, data.home_team_id, data.match_date_ms)
|
||||
away_stats = enr.compute_team_stats(cur, data.away_team_id, data.match_date_ms)
|
||||
except Exception as e:
|
||||
enrichment_failures.append(f"team_stats:{e}")
|
||||
|
||||
# H2H
|
||||
try:
|
||||
h2h = enr.compute_h2h(cur, data.home_team_id, data.away_team_id, data.match_date_ms)
|
||||
except Exception as e:
|
||||
enrichment_failures.append(f"h2h:{e}")
|
||||
|
||||
# Form
|
||||
try:
|
||||
home_form = enr.compute_form_streaks(cur, data.home_team_id, data.match_date_ms)
|
||||
away_form = enr.compute_form_streaks(cur, data.away_team_id, data.match_date_ms)
|
||||
except Exception as e:
|
||||
enrichment_failures.append(f"form:{e}")
|
||||
|
||||
# Referee
|
||||
try:
|
||||
ref = enr.compute_referee_stats(cur, data.referee_name, data.match_date_ms)
|
||||
except Exception as e:
|
||||
enrichment_failures.append(f"referee:{e}")
|
||||
|
||||
# League
|
||||
try:
|
||||
league = enr.compute_league_averages(cur, data.league_id, data.match_date_ms)
|
||||
except Exception as e:
|
||||
enrichment_failures.append(f"league:{e}")
|
||||
|
||||
# Momentum
|
||||
try:
|
||||
home_momentum = enr.compute_momentum(cur, data.home_team_id, data.match_date_ms)
|
||||
away_momentum = enr.compute_momentum(cur, data.away_team_id, data.match_date_ms)
|
||||
except Exception as e:
|
||||
enrichment_failures.append(f"momentum:{e}")
|
||||
|
||||
# V27 Rolling + Venue + Rest
|
||||
try:
|
||||
home_rolling = enr.compute_rolling_stats(cur, data.home_team_id, data.match_date_ms)
|
||||
away_rolling = enr.compute_rolling_stats(cur, data.away_team_id, data.match_date_ms)
|
||||
home_venue = enr.compute_venue_stats(cur, data.home_team_id, data.match_date_ms, is_home=True)
|
||||
away_venue = enr.compute_venue_stats(cur, data.away_team_id, data.match_date_ms, is_home=False)
|
||||
home_rest = enr.compute_days_rest(cur, data.home_team_id, data.match_date_ms)
|
||||
away_rest = enr.compute_days_rest(cur, data.away_team_id, data.match_date_ms)
|
||||
except Exception as e:
|
||||
enrichment_failures.append(f"rolling/venue:{e}")
|
||||
|
||||
# V28 Odds-Band
|
||||
try:
|
||||
odds_band_features = self.odds_band_analyzer.compute_all(
|
||||
cur=cur,
|
||||
home_team_id=data.home_team_id,
|
||||
away_team_id=data.away_team_id,
|
||||
league_id=data.league_id,
|
||||
odds=odds,
|
||||
before_ts=data.match_date_ms,
|
||||
referee_name=data.referee_name,
|
||||
)
|
||||
except Exception as e:
|
||||
enrichment_failures.append(f"odds_band:{e}")
|
||||
|
||||
except Exception as e:
|
||||
enrichment_failures.append(f"db_connection:{e}")
|
||||
setattr(data, "feature_source", "fallback_defaults")
|
||||
|
||||
setattr(data, "odds_band_features", odds_band_features)
|
||||
if enrichment_failures:
|
||||
print(f"⚠️ Enrichment partial failures for {data.match_id}: {', '.join(enrichment_failures)}")
|
||||
|
||||
# Upset engine features
|
||||
upset_atmosphere, upset_motivation, upset_fatigue = 0.0, 0.0, 0.0
|
||||
try:
|
||||
upset_engine = get_upset_engine()
|
||||
upset_feats = upset_engine.get_features(
|
||||
home_team_name=getattr(data, 'home_team_name', '') or '',
|
||||
home_team_id=data.home_team_id,
|
||||
away_team_name=getattr(data, 'away_team_name', '') or '',
|
||||
league_name=getattr(data, 'league_name', '') or '',
|
||||
home_position=10,
|
||||
away_position=10,
|
||||
match_date_ms=data.match_date_ms,
|
||||
home_days_rest=int(home_rest),
|
||||
away_days_rest=int(away_rest),
|
||||
)
|
||||
upset_atmosphere = upset_feats.get('upset_atmosphere', 0.0)
|
||||
upset_motivation = upset_feats.get('upset_motivation', 0.0)
|
||||
upset_fatigue = upset_feats.get('upset_fatigue', 0.0)
|
||||
except Exception as e:
|
||||
print(f"⚠️ Upset engine failed: {e}")
|
||||
|
||||
odds_presence = {
|
||||
'odds_ms_h_present': 1.0 if ms_h > 1.01 else 0.0,
|
||||
'odds_ms_d_present': 1.0 if ms_d > 1.01 else 0.0,
|
||||
'odds_ms_a_present': 1.0 if ms_a > 1.01 else 0.0,
|
||||
'odds_ht_ms_h_present': 1.0 if float(odds.get('ht_h') or 0) > 1.01 else 0.0,
|
||||
'odds_ht_ms_d_present': 1.0 if float(odds.get('ht_d') or 0) > 1.01 else 0.0,
|
||||
'odds_ht_ms_a_present': 1.0 if float(odds.get('ht_a') or 0) > 1.01 else 0.0,
|
||||
'odds_ou05_o_present': 1.0 if float(odds.get('ou05_o') or 0) > 1.01 else 0.0,
|
||||
'odds_ou05_u_present': 1.0 if float(odds.get('ou05_u') or 0) > 1.01 else 0.0,
|
||||
'odds_ou15_o_present': 1.0 if float(odds.get('ou15_o') or 0) > 1.01 else 0.0,
|
||||
'odds_ou15_u_present': 1.0 if float(odds.get('ou15_u') or 0) > 1.01 else 0.0,
|
||||
'odds_ou25_o_present': 1.0 if float(odds.get('ou25_o') or 0) > 1.01 else 0.0,
|
||||
'odds_ou25_u_present': 1.0 if float(odds.get('ou25_u') or 0) > 1.01 else 0.0,
|
||||
'odds_ou35_o_present': 1.0 if float(odds.get('ou35_o') or 0) > 1.01 else 0.0,
|
||||
'odds_ou35_u_present': 1.0 if float(odds.get('ou35_u') or 0) > 1.01 else 0.0,
|
||||
'odds_ht_ou05_o_present': 1.0 if float(odds.get('ht_ou05_o') or 0) > 1.01 else 0.0,
|
||||
'odds_ht_ou05_u_present': 1.0 if float(odds.get('ht_ou05_u') or 0) > 1.01 else 0.0,
|
||||
'odds_ht_ou15_o_present': 1.0 if float(odds.get('ht_ou15_o') or 0) > 1.01 else 0.0,
|
||||
'odds_ht_ou15_u_present': 1.0 if float(odds.get('ht_ou15_u') or 0) > 1.01 else 0.0,
|
||||
'odds_btts_y_present': 1.0 if float(odds.get('btts_y') or 0) > 1.01 else 0.0,
|
||||
'odds_btts_n_present': 1.0 if float(odds.get('btts_n') or 0) > 1.01 else 0.0,
|
||||
}
|
||||
|
||||
# ── Calendar features (V27) ──
|
||||
import datetime
|
||||
match_dt = datetime.datetime.utcfromtimestamp(data.match_date_ms / 1000)
|
||||
match_month = match_dt.month
|
||||
is_season_start = 1.0 if match_month in (7, 8, 9) else 0.0
|
||||
is_season_end = 1.0 if match_month in (5, 6) else 0.0
|
||||
|
||||
# ── Cup game detection: dampen home advantage in feature space ──
|
||||
_league_name = (getattr(data, 'league_name', '') or '').lower()
|
||||
_cup_keywords = ("kupa", "cup", "coupe", "copa", "coppa", "pokal",
|
||||
"trophy", "shield", "ziraat", "süper kupa", "super cup")
|
||||
_is_cup = any(kw in _league_name for kw in _cup_keywords)
|
||||
|
||||
# ── Derived / Interaction features (V27) ──
|
||||
# Cup games: home ELO advantage is ~30% weaker (rotation, lower motivation)
|
||||
elo_diff = (home_elo - away_elo) * (0.70 if _is_cup else 1.0)
|
||||
form_elo_diff = home_form_elo_val - away_form_elo_val
|
||||
attack_vs_defense_home = data.home_goals_avg - data.away_conceded_avg
|
||||
attack_vs_defense_away = data.away_goals_avg - data.home_conceded_avg
|
||||
xga_home = data.home_conceded_avg
|
||||
xga_away = data.away_conceded_avg
|
||||
xg_diff = xga_home - xga_away
|
||||
mom_diff = home_momentum - away_momentum
|
||||
form_momentum_interaction = mom_diff * form_elo_diff / 1000.0
|
||||
elo_form_consistency = 1.0 - abs(elo_diff - form_elo_diff) / max(abs(elo_diff), 100.0)
|
||||
upset_x_elo_gap = upset_potential * abs(elo_diff) / 500.0
|
||||
|
||||
return {
|
||||
# META (1)
|
||||
'mst_utc': float(data.match_date_ms),
|
||||
# ELO (8)
|
||||
'home_overall_elo': home_elo,
|
||||
'away_overall_elo': away_elo,
|
||||
'elo_diff': elo_diff,
|
||||
'home_home_elo': home_venue_elo,
|
||||
'away_away_elo': away_venue_elo,
|
||||
'home_form_elo': home_form_elo_val,
|
||||
'away_form_elo': away_form_elo_val,
|
||||
'form_elo_diff': form_elo_diff,
|
||||
# Form (12)
|
||||
'home_goals_avg': data.home_goals_avg,
|
||||
'home_conceded_avg': data.home_conceded_avg,
|
||||
'away_goals_avg': data.away_goals_avg,
|
||||
'away_conceded_avg': data.away_conceded_avg,
|
||||
'home_clean_sheet_rate': home_form['clean_sheet_rate'],
|
||||
'away_clean_sheet_rate': away_form['clean_sheet_rate'],
|
||||
'home_scoring_rate': home_form['scoring_rate'],
|
||||
'away_scoring_rate': away_form['scoring_rate'],
|
||||
'home_winning_streak': home_form['winning_streak'],
|
||||
'away_winning_streak': away_form['winning_streak'],
|
||||
'home_unbeaten_streak': home_form['unbeaten_streak'],
|
||||
'away_unbeaten_streak': away_form['unbeaten_streak'],
|
||||
# H2H (10 — original 6 + V27 expanded 4)
|
||||
'h2h_total_matches': h2h['total_matches'],
|
||||
'h2h_home_win_rate': h2h['home_win_rate'],
|
||||
'h2h_draw_rate': h2h['draw_rate'],
|
||||
'h2h_avg_goals': h2h['avg_goals'],
|
||||
'h2h_btts_rate': h2h['btts_rate'],
|
||||
'h2h_over25_rate': h2h['over25_rate'],
|
||||
'h2h_home_goals_avg': h2h['home_goals_avg'],
|
||||
'h2h_away_goals_avg': h2h['away_goals_avg'],
|
||||
'h2h_recent_trend': h2h['recent_trend'],
|
||||
'h2h_venue_advantage': h2h['venue_advantage'],
|
||||
# Stats (8)
|
||||
'home_avg_possession': home_stats['avg_possession'],
|
||||
'away_avg_possession': away_stats['avg_possession'],
|
||||
'home_avg_shots_on_target': home_stats['avg_shots_on_target'],
|
||||
'away_avg_shots_on_target': away_stats['avg_shots_on_target'],
|
||||
'home_shot_conversion': home_stats['shot_conversion'],
|
||||
'away_shot_conversion': away_stats['shot_conversion'],
|
||||
'home_avg_corners': home_stats['avg_corners'],
|
||||
'away_avg_corners': away_stats['avg_corners'],
|
||||
# Odds (24)
|
||||
'odds_ms_h': ms_h,
|
||||
'odds_ms_d': ms_d,
|
||||
'odds_ms_a': ms_a,
|
||||
'implied_home': implied_home,
|
||||
'implied_draw': implied_draw,
|
||||
'implied_away': implied_away,
|
||||
'odds_ht_ms_h': float(odds.get('ht_h') or 0),
|
||||
'odds_ht_ms_d': float(odds.get('ht_d') or 0),
|
||||
'odds_ht_ms_a': float(odds.get('ht_a') or 0),
|
||||
'odds_ou05_o': float(odds.get('ou05_o') or 0),
|
||||
'odds_ou05_u': float(odds.get('ou05_u') or 0),
|
||||
'odds_ou15_o': float(odds.get('ou15_o') or 0),
|
||||
'odds_ou15_u': float(odds.get('ou15_u') or 0),
|
||||
'odds_ou25_o': float(odds.get('ou25_o') or 0),
|
||||
'odds_ou25_u': float(odds.get('ou25_u') or 0),
|
||||
'odds_ou35_o': float(odds.get('ou35_o') or 0),
|
||||
'odds_ou35_u': float(odds.get('ou35_u') or 0),
|
||||
'odds_ht_ou05_o': float(odds.get('ht_ou05_o') or 0),
|
||||
'odds_ht_ou05_u': float(odds.get('ht_ou05_u') or 0),
|
||||
'odds_ht_ou15_o': float(odds.get('ht_ou15_o') or 0),
|
||||
'odds_ht_ou15_u': float(odds.get('ht_ou15_u') or 0),
|
||||
'odds_btts_y': float(odds.get('btts_y') or 0),
|
||||
'odds_btts_n': float(odds.get('btts_n') or 0),
|
||||
**odds_presence,
|
||||
# League (9 — original 2 + V27 expanded 5 + xga 2)
|
||||
'home_xga': xga_home,
|
||||
'away_xga': xga_away,
|
||||
'league_avg_goals': league['avg_goals'],
|
||||
'league_zero_goal_rate': league['zero_goal_rate'],
|
||||
'league_home_win_rate': league['home_win_rate'],
|
||||
'league_draw_rate': league['draw_rate'],
|
||||
'league_btts_rate': league['btts_rate'],
|
||||
'league_ou25_rate': league['ou25_rate'],
|
||||
'league_reliability_score': league['reliability_score'],
|
||||
# Upset (4)
|
||||
'upset_atmosphere': upset_atmosphere,
|
||||
'upset_motivation': upset_motivation,
|
||||
'upset_fatigue': upset_fatigue,
|
||||
'upset_potential': upset_potential,
|
||||
# Referee (5)
|
||||
'referee_home_bias': ref['home_bias'],
|
||||
'referee_avg_goals': ref['avg_goals'],
|
||||
'referee_cards_total': ref['cards_total'],
|
||||
'referee_avg_yellow': ref['avg_yellow'],
|
||||
'referee_experience': ref['experience'],
|
||||
# Momentum (3)
|
||||
'home_momentum_score': home_momentum,
|
||||
'away_momentum_score': away_momentum,
|
||||
'momentum_diff': mom_diff,
|
||||
# ── V27 Rolling Stats (13) ──
|
||||
'home_rolling5_goals': home_rolling['rolling5_goals'],
|
||||
'home_rolling5_conceded': home_rolling['rolling5_conceded'],
|
||||
'home_rolling10_goals': home_rolling['rolling10_goals'],
|
||||
'home_rolling10_conceded': home_rolling['rolling10_conceded'],
|
||||
'home_rolling20_goals': home_rolling['rolling20_goals'],
|
||||
'home_rolling20_conceded': home_rolling['rolling20_conceded'],
|
||||
'away_rolling5_goals': away_rolling['rolling5_goals'],
|
||||
'away_rolling5_conceded': away_rolling['rolling5_conceded'],
|
||||
'away_rolling10_goals': away_rolling['rolling10_goals'],
|
||||
'away_rolling10_conceded': away_rolling['rolling10_conceded'],
|
||||
'home_rolling5_cs': home_rolling['rolling5_cs'],
|
||||
'away_rolling5_cs': away_rolling['rolling5_cs'],
|
||||
# ── V27 Venue Stats (4) ──
|
||||
'home_venue_goals': home_venue['venue_goals'],
|
||||
'home_venue_conceded': home_venue['venue_conceded'],
|
||||
'away_venue_goals': away_venue['venue_goals'],
|
||||
'away_venue_conceded': away_venue['venue_conceded'],
|
||||
# ── V27 Goal Trend (2) ──
|
||||
'home_goal_trend': home_rolling['rolling5_goals'] - home_rolling['rolling10_goals'],
|
||||
'away_goal_trend': away_rolling['rolling5_goals'] - away_rolling['rolling10_goals'],
|
||||
# ── V27 Calendar (4) ──
|
||||
'home_days_rest': home_rest,
|
||||
'away_days_rest': away_rest,
|
||||
'match_month': float(match_month),
|
||||
'is_season_start': is_season_start,
|
||||
'is_season_end': is_season_end,
|
||||
# ── V27 Interaction (6) ──
|
||||
'attack_vs_defense_home': attack_vs_defense_home,
|
||||
'attack_vs_defense_away': attack_vs_defense_away,
|
||||
'xg_diff': xg_diff,
|
||||
'form_momentum_interaction': form_momentum_interaction,
|
||||
'elo_form_consistency': elo_form_consistency,
|
||||
'upset_x_elo_gap': upset_x_elo_gap,
|
||||
# Squad Features (9) — PlayerPredictorEngine
|
||||
**self._get_squad_features(data),
|
||||
# V28 Odds-Band Historical Performance Features
|
||||
**odds_band_features,
|
||||
}
|
||||
|
||||
def _get_squad_features(self, data: MatchData) -> Dict[str, float]:
|
||||
"""Non-fatal squad analysis with 12 player-level features."""
|
||||
defaults = {
|
||||
'home_squad_quality': 12.0, 'away_squad_quality': 12.0, 'squad_diff': 0.0,
|
||||
'home_key_players': 3.0, 'away_key_players': 3.0,
|
||||
'home_missing_impact': 0.0, 'away_missing_impact': 0.0,
|
||||
'home_goals_form': 1.3, 'away_goals_form': 1.3,
|
||||
'home_lineup_goals_per90': 0.0, 'away_lineup_goals_per90': 0.0,
|
||||
'home_lineup_assists_per90': 0.0, 'away_lineup_assists_per90': 0.0,
|
||||
'home_squad_continuity': 0.5, 'away_squad_continuity': 0.5,
|
||||
'home_top_scorer_form': 0.0, 'away_top_scorer_form': 0.0,
|
||||
'home_avg_player_exp': 0.0, 'away_avg_player_exp': 0.0,
|
||||
'home_goals_diversity': 0.0, 'away_goals_diversity': 0.0,
|
||||
}
|
||||
try:
|
||||
engine = get_player_predictor()
|
||||
pred = engine.predict(
|
||||
match_id=data.match_id,
|
||||
home_team_id=data.home_team_id,
|
||||
away_team_id=data.away_team_id,
|
||||
home_lineup=data.home_lineup,
|
||||
away_lineup=data.away_lineup,
|
||||
sidelined_data=data.sidelined_data,
|
||||
)
|
||||
result = {
|
||||
'home_squad_quality': float(pred.home_squad_quality or 0.0),
|
||||
'away_squad_quality': float(pred.away_squad_quality or 0.0),
|
||||
'squad_diff': float(pred.squad_diff or 0.0),
|
||||
'home_key_players': float(pred.home_key_players or 0),
|
||||
'away_key_players': float(pred.away_key_players or 0),
|
||||
'home_missing_impact': float(pred.home_missing_impact or 0.0),
|
||||
'away_missing_impact': float(pred.away_missing_impact or 0.0),
|
||||
'home_goals_form': float(pred.home_goals_form or 0.0),
|
||||
'away_goals_form': float(pred.away_goals_form or 0.0),
|
||||
'home_lineup_goals_per90': float(pred.home_lineup_goals_per90 or 0.0),
|
||||
'away_lineup_goals_per90': float(pred.away_lineup_goals_per90 or 0.0),
|
||||
'home_lineup_assists_per90': float(pred.home_lineup_assists_per90 or 0.0),
|
||||
'away_lineup_assists_per90': float(pred.away_lineup_assists_per90 or 0.0),
|
||||
'home_squad_continuity': float(pred.home_squad_continuity or 0.5),
|
||||
'away_squad_continuity': float(pred.away_squad_continuity or 0.5),
|
||||
'home_top_scorer_form': float(pred.home_top_scorer_form or 0),
|
||||
'away_top_scorer_form': float(pred.away_top_scorer_form or 0),
|
||||
'home_avg_player_exp': float(pred.home_avg_player_exp or 0.0),
|
||||
'away_avg_player_exp': float(pred.away_avg_player_exp or 0.0),
|
||||
'home_goals_diversity': float(pred.home_goals_diversity or 0.0),
|
||||
'away_goals_diversity': float(pred.away_goals_diversity or 0.0),
|
||||
}
|
||||
for side in ('home', 'away'):
|
||||
sq = result[f'{side}_squad_quality']
|
||||
if sq > 50 or sq < 0:
|
||||
print(f"🚨 SCALE MISMATCH: {side}_squad_quality={sq:.1f} "
|
||||
f"(expected 3-36). Check player_predictor formula!")
|
||||
return result
|
||||
except Exception as e:
|
||||
print(f"⚠️ Squad features failed: {e}")
|
||||
return defaults
|
||||
|
||||
def _sanitize_v25_odds(self, odds_data: Dict[str, Any]) -> Dict[str, float]:
|
||||
sanitized: Dict[str, float] = {}
|
||||
for key in self.V25_ODDS_FEATURE_KEYS:
|
||||
sanitized[key] = self._real_market_odds(odds_data, key)
|
||||
for key in ("dc_1x", "dc_x2", "dc_12", "oe_odd", "oe_even", "cards_o", "cards_u", "hcap_h", "hcap_d", "hcap_a"):
|
||||
if key in odds_data:
|
||||
sanitized[key] = self._real_market_odds(odds_data, key)
|
||||
return sanitized
|
||||
@@ -0,0 +1,231 @@
|
||||
"""HT/MS Mixin — analyze_match_htms endpoint and helpers.
|
||||
|
||||
Auto-extracted mixin module — split from services/single_match_orchestrator.py.
|
||||
All methods here are composed into SingleMatchOrchestrator via inheritance.
|
||||
`self` attributes (self.dsn, self.enrichment, self.v25_predictor, etc.) are
|
||||
initialised in the main __init__.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import math
|
||||
import os
|
||||
import pickle
|
||||
from collections import defaultdict
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple, overload
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
from data.db import get_clean_dsn
|
||||
from schemas.prediction import FullMatchPrediction
|
||||
from schemas.match_data import MatchData
|
||||
from models.v25_ensemble import V25Predictor, get_v25_predictor
|
||||
try:
|
||||
from models.v27_predictor import V27Predictor, compute_divergence, compute_value_edge
|
||||
except ImportError:
|
||||
class V27Predictor: # type: ignore[no-redef]
|
||||
def __init__(self): self.models = {}
|
||||
def load_models(self): return False
|
||||
def predict_all(self, features): return {}
|
||||
def compute_divergence(*args, **kwargs):
|
||||
return {}
|
||||
def compute_value_edge(*args, **kwargs):
|
||||
return {}
|
||||
from features.odds_band_analyzer import OddsBandAnalyzer
|
||||
try:
|
||||
from models.basketball_v25 import (
|
||||
BasketballMatchPrediction,
|
||||
get_basketball_v25_predictor,
|
||||
)
|
||||
except ImportError:
|
||||
BasketballMatchPrediction = Any # type: ignore[misc]
|
||||
def get_basketball_v25_predictor() -> Any:
|
||||
raise ImportError("Basketball predictor is not available")
|
||||
from core.engines.player_predictor import PlayerPrediction, get_player_predictor
|
||||
from services.feature_enrichment import FeatureEnrichmentService
|
||||
from services.betting_brain import BettingBrain
|
||||
from services.v26_shadow_engine import V26ShadowEngine, get_v26_shadow_engine
|
||||
from services.match_commentary import generate_match_commentary
|
||||
from utils.top_leagues import load_top_league_ids
|
||||
from utils.league_reliability import load_league_reliability
|
||||
from config.config_loader import build_threshold_dict, get_threshold_default
|
||||
from models.calibration import get_calibrator
|
||||
|
||||
|
||||
class HtmsMixin:
|
||||
def analyze_match_htms(self, match_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
HT/MS focused response for upset-hunting workflows.
|
||||
|
||||
This endpoint is intentionally additive and does not mutate the
|
||||
standard /v20plus/analyze package contract.
|
||||
"""
|
||||
data = self._load_match_data(match_id)
|
||||
if data is None:
|
||||
return None
|
||||
|
||||
if str(data.sport or "").lower() != "football":
|
||||
return {
|
||||
"status": "skip",
|
||||
"match_id": match_id,
|
||||
"reason": "unsupported_sport",
|
||||
"engine_used": "htms_router",
|
||||
}
|
||||
|
||||
is_top_league = self._is_top_league(data.league_id)
|
||||
engine_used = "v20plus_top_htms"
|
||||
|
||||
# Hard gate: HT/MS upset model is trained on top leagues only.
|
||||
if not is_top_league:
|
||||
return {
|
||||
"status": "skip",
|
||||
"match_id": match_id,
|
||||
"reason": "out_of_training_scope",
|
||||
"engine_used": engine_used,
|
||||
"data_quality": {
|
||||
"label": "LOW",
|
||||
"flags": ["league_out_of_scope"],
|
||||
},
|
||||
}
|
||||
|
||||
missing_requirements = self._missing_htms_requirements(data)
|
||||
if missing_requirements:
|
||||
return {
|
||||
"status": "skip",
|
||||
"match_id": match_id,
|
||||
"reason": "missing_critical_data",
|
||||
"missing": missing_requirements,
|
||||
"engine_used": engine_used,
|
||||
"data_quality": {
|
||||
"label": "LOW",
|
||||
"flags": [f"missing_{item}" for item in missing_requirements],
|
||||
},
|
||||
}
|
||||
|
||||
base_package = self.analyze_match(match_id)
|
||||
if not base_package:
|
||||
return None
|
||||
data_quality = base_package.get("data_quality", {})
|
||||
market_board = base_package.get("market_board", {})
|
||||
ms_market = market_board.get("MS", {})
|
||||
ht_market = market_board.get("HT", {})
|
||||
htft_probs = market_board.get("HTFT", {}).get("probs", {})
|
||||
|
||||
reversal_probs = {
|
||||
"1/2": float(htft_probs.get("1/2", 0.0)),
|
||||
"2/1": float(htft_probs.get("2/1", 0.0)),
|
||||
"X/1": float(htft_probs.get("X/1", 0.0)),
|
||||
"X/2": float(htft_probs.get("X/2", 0.0)),
|
||||
}
|
||||
top_reversal = max(reversal_probs.items(), key=lambda item: item[1])
|
||||
|
||||
ms_conf = float(ms_market.get("confidence", 0.0))
|
||||
ht_conf = float(ht_market.get("confidence", 0.0))
|
||||
base_conf = (ms_conf + ht_conf) / 2.0
|
||||
|
||||
confidence_cap = 100.0
|
||||
penalties: List[str] = []
|
||||
if data.lineup_source == "probable_xi":
|
||||
confidence_cap = min(confidence_cap, 72.0)
|
||||
penalties.append("lineup_probable_xi")
|
||||
if data.lineup_source == "none":
|
||||
confidence_cap = min(confidence_cap, 58.0)
|
||||
penalties.append("lineup_unavailable")
|
||||
if str(data_quality.get("label", "LOW")).upper() == "LOW":
|
||||
confidence_cap = min(confidence_cap, 55.0)
|
||||
penalties.append("low_data_quality")
|
||||
|
||||
final_conf = min(base_conf, confidence_cap)
|
||||
|
||||
upset_score = self._compute_htms_upset_score(
|
||||
reversal_probs=reversal_probs,
|
||||
odds_data=data.odds_data,
|
||||
is_top_league=is_top_league,
|
||||
)
|
||||
upset_threshold = 58.0 if is_top_league else 54.0
|
||||
upset_playable = (
|
||||
upset_score >= upset_threshold
|
||||
and top_reversal[1] >= 0.045
|
||||
and final_conf >= 45.0
|
||||
and "low_data_quality" not in penalties
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "ok",
|
||||
"engine_used": engine_used,
|
||||
"match_info": base_package.get("match_info", {}),
|
||||
"data_quality": data_quality,
|
||||
"htms_core": {
|
||||
"ms_pick": ms_market.get("pick"),
|
||||
"ms_confidence": round(ms_conf, 1),
|
||||
"ht_pick": ht_market.get("pick"),
|
||||
"ht_confidence": round(ht_conf, 1),
|
||||
"combined_confidence": round(final_conf, 1),
|
||||
"confidence_cap": round(confidence_cap, 1),
|
||||
"penalties": penalties,
|
||||
},
|
||||
"surprise_hunter": {
|
||||
"upset_score": round(upset_score, 1),
|
||||
"threshold": upset_threshold,
|
||||
"playable": upset_playable,
|
||||
"top_reversal_pick": top_reversal[0],
|
||||
"top_reversal_prob": round(top_reversal[1], 4),
|
||||
"reversal_probs": {
|
||||
key: round(value, 4) for key, value in reversal_probs.items()
|
||||
},
|
||||
},
|
||||
"risk": base_package.get("risk", {}),
|
||||
"reasoning_factors": base_package.get("reasoning_factors", []),
|
||||
}
|
||||
|
||||
def _is_top_league(self, league_id: Optional[str]) -> bool:
|
||||
if not league_id:
|
||||
return False
|
||||
return str(league_id) in self.top_league_ids
|
||||
|
||||
def _missing_htms_requirements(self, data: MatchData) -> List[str]:
|
||||
missing: List[str] = []
|
||||
ms_keys = ("ms_h", "ms_d", "ms_a")
|
||||
ht_keys = ("ht_h", "ht_d", "ht_a")
|
||||
if not all(float(data.odds_data.get(k, 0.0) or 0.0) > 1.0 for k in ms_keys):
|
||||
missing.append("ms_odds")
|
||||
if not all(float(data.odds_data.get(k, 0.0) or 0.0) > 1.0 for k in ht_keys):
|
||||
missing.append("ht_odds")
|
||||
|
||||
return missing
|
||||
|
||||
def _compute_htms_upset_score(
|
||||
self,
|
||||
reversal_probs: Dict[str, float],
|
||||
odds_data: Dict[str, float],
|
||||
is_top_league: bool,
|
||||
) -> float:
|
||||
ms_h = self._to_float(odds_data.get("ms_h"), 0.0)
|
||||
ms_a = self._to_float(odds_data.get("ms_a"), 0.0)
|
||||
if ms_h <= 1.0 or ms_a <= 1.0:
|
||||
favorite_gap = 0.0
|
||||
else:
|
||||
favorite_gap = abs(ms_h - ms_a)
|
||||
|
||||
reversal_max = max(reversal_probs.values()) if reversal_probs else 0.0
|
||||
reversal_sum = sum(reversal_probs.values())
|
||||
|
||||
# Strong favorite + reversal probability is the core upset signal.
|
||||
gap_factor = min(1.0, favorite_gap / 2.0)
|
||||
score = (
|
||||
(reversal_max * 100.0 * 0.60)
|
||||
+ (reversal_sum * 100.0 * 0.25)
|
||||
+ (gap_factor * 100.0 * 0.15)
|
||||
)
|
||||
|
||||
if not is_top_league:
|
||||
# Non-top leagues are noisier; keep it slightly conservative.
|
||||
score *= 0.92
|
||||
return max(0.0, min(100.0, score))
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,662 @@
|
||||
"""Prediction Mixin — V25 signal extraction and prediction building.
|
||||
|
||||
Auto-extracted mixin module — split from services/single_match_orchestrator.py.
|
||||
All methods here are composed into SingleMatchOrchestrator via inheritance.
|
||||
`self` attributes (self.dsn, self.enrichment, self.v25_predictor, etc.) are
|
||||
initialised in the main __init__.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import math
|
||||
import os
|
||||
import pickle
|
||||
from collections import defaultdict
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple, overload
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
from data.db import get_clean_dsn
|
||||
from schemas.prediction import FullMatchPrediction
|
||||
from schemas.match_data import MatchData
|
||||
from models.v25_ensemble import V25Predictor, get_v25_predictor
|
||||
try:
|
||||
from models.v27_predictor import V27Predictor, compute_divergence, compute_value_edge
|
||||
except ImportError:
|
||||
class V27Predictor: # type: ignore[no-redef]
|
||||
def __init__(self): self.models = {}
|
||||
def load_models(self): return False
|
||||
def predict_all(self, features): return {}
|
||||
def compute_divergence(*args, **kwargs):
|
||||
return {}
|
||||
def compute_value_edge(*args, **kwargs):
|
||||
return {}
|
||||
from features.odds_band_analyzer import OddsBandAnalyzer
|
||||
try:
|
||||
from models.basketball_v25 import (
|
||||
BasketballMatchPrediction,
|
||||
get_basketball_v25_predictor,
|
||||
)
|
||||
except ImportError:
|
||||
BasketballMatchPrediction = Any # type: ignore[misc]
|
||||
def get_basketball_v25_predictor() -> Any:
|
||||
raise ImportError("Basketball predictor is not available")
|
||||
from core.engines.player_predictor import PlayerPrediction, get_player_predictor
|
||||
from services.feature_enrichment import FeatureEnrichmentService
|
||||
from services.betting_brain import BettingBrain
|
||||
from services.v26_shadow_engine import V26ShadowEngine, get_v26_shadow_engine
|
||||
from services.match_commentary import generate_match_commentary
|
||||
from utils.top_leagues import load_top_league_ids
|
||||
from utils.league_reliability import load_league_reliability
|
||||
from config.config_loader import build_threshold_dict, get_threshold_default, get_config
|
||||
from models.calibration import get_calibrator
|
||||
from models.league_model import get_league_model_loader, FILE_TO_SIGNAL
|
||||
|
||||
|
||||
class PredictionMixin:
|
||||
def _get_score_model(self) -> Optional[Dict]:
|
||||
"""Load XGBoost score prediction model (non-fatal)."""
|
||||
if hasattr(self, "_score_model_cache"):
|
||||
return self._score_model_cache
|
||||
score_model_path = os.path.join(
|
||||
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
|
||||
"models", "xgb_score.pkl",
|
||||
)
|
||||
try:
|
||||
if os.path.exists(score_model_path):
|
||||
with open(score_model_path, "rb") as f:
|
||||
model_data = pickle.load(f)
|
||||
if all(k in model_data for k in ("home_model", "away_model", "ht_home_model", "ht_away_model", "features")):
|
||||
self._score_model_cache = model_data
|
||||
print(f"[SCORE] ✅ Score model loaded ({len(model_data['features'])} features)")
|
||||
return self._score_model_cache
|
||||
except Exception as e:
|
||||
print(f"[SCORE] ⚠ Load failed (non-fatal, using heuristic): {e}")
|
||||
self._score_model_cache = None
|
||||
return None
|
||||
|
||||
def _predict_score_with_model(self, features: Dict[str, float]) -> Optional[Dict[str, float]]:
|
||||
"""Predict FT/HT scores using XGBoost score model."""
|
||||
score_model = self._get_score_model()
|
||||
if score_model is None:
|
||||
return None
|
||||
try:
|
||||
import pandas as _pd
|
||||
model_features = score_model["features"]
|
||||
row = {f: float(features.get(f, 0)) for f in model_features}
|
||||
df = _pd.DataFrame([row])
|
||||
ft_home = max(0.0, float(score_model["home_model"].predict(df)[0]))
|
||||
ft_away = max(0.0, float(score_model["away_model"].predict(df)[0]))
|
||||
ht_home = max(0.0, float(score_model["ht_home_model"].predict(df)[0]))
|
||||
ht_away = max(0.0, float(score_model["ht_away_model"].predict(df)[0]))
|
||||
return {
|
||||
"ft_home": round(ft_home, 2),
|
||||
"ft_away": round(ft_away, 2),
|
||||
"ht_home": round(ht_home, 2),
|
||||
"ht_away": round(ht_away, 2),
|
||||
}
|
||||
except Exception as e:
|
||||
print(f"[SCORE] ⚠ Prediction error (fallback to heuristic): {e}")
|
||||
return None
|
||||
|
||||
_V25_KEY_MAP = {
|
||||
"ms": "MS",
|
||||
"ou15": "OU15",
|
||||
"ou25": "OU25",
|
||||
"ou35": "OU35",
|
||||
"btts": "BTTS",
|
||||
"ht_result": "HT",
|
||||
"ht_ou05": "HT_OU05",
|
||||
"ht_ou15": "HT_OU15",
|
||||
"htft": "HTFT",
|
||||
"cards_ou45": "CARDS",
|
||||
"handicap_ms": "HCAP",
|
||||
"odd_even": "OE",
|
||||
}
|
||||
|
||||
def _get_v25_signal(
|
||||
self,
|
||||
data: MatchData,
|
||||
features: Optional[Dict[str, float]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Get V25 ensemble predictions for all available markets.
|
||||
Returns a dict keyed by UPPERCASE market name (MS, OU25, BTTS, etc.)
|
||||
each with a 'probs' sub-dict that _prob_map can consume.
|
||||
|
||||
CRITICAL: Keys MUST be uppercase to match _build_v25_prediction lookups.
|
||||
"""
|
||||
v25 = self._get_v25_predictor()
|
||||
feature_row = features or self._build_v25_features(data)
|
||||
|
||||
signal: Dict[str, Any] = {}
|
||||
|
||||
# ── League-specific model override ─────────────────────────────────
|
||||
league_id = getattr(data, "league_id", None)
|
||||
league_model = None
|
||||
if league_id:
|
||||
try:
|
||||
league_model = get_league_model_loader().get(league_id)
|
||||
except Exception:
|
||||
league_model = None
|
||||
|
||||
if league_model:
|
||||
# Predict all available markets with league-specific XGBoost
|
||||
for mkey, sig_key in FILE_TO_SIGNAL.items():
|
||||
probs = league_model.predict_market(mkey, feature_row)
|
||||
if probs:
|
||||
best_label = max(probs, key=probs.__getitem__)
|
||||
signal[sig_key] = {
|
||||
"probs": probs,
|
||||
"raw_probs": probs,
|
||||
"pick": best_label,
|
||||
"probability": float(probs[best_label]),
|
||||
"confidence": round(float(probs[best_label]) * 100.0, 1),
|
||||
"source": "league_specific",
|
||||
}
|
||||
if signal:
|
||||
print(f" [LEAGUE-MODEL] {league_id}: {len(signal)} markets predicted")
|
||||
# Fill remaining markets from general V25 (markets not in league model)
|
||||
# fall through to general prediction below for missing ones
|
||||
|
||||
def _temperature_scale(probs_dict: Dict[str, float], temperature: float = 1.5) -> Dict[str, float]:
|
||||
"""
|
||||
Apply temperature scaling to soften overconfident model outputs.
|
||||
|
||||
LightGBM often produces extreme probabilities (e.g., 0.999 / 0.001).
|
||||
Temperature scaling converts to log-odds, divides by T, then re-normalizes.
|
||||
T=1.0 → no change, T>1 → softer probabilities.
|
||||
|
||||
Standard approach for post-hoc model calibration (Guo et al., 2017).
|
||||
|
||||
V34: Reduced from 2.5 to 1.5 — V25 model is already calibrated via
|
||||
odds-aware training. Excessive flattening was destroying signal.
|
||||
"""
|
||||
import math
|
||||
eps = 1e-7 # numerical stability
|
||||
n = len(probs_dict)
|
||||
|
||||
# V34: Reduced temperature — odds-aware model is already calibrated
|
||||
# Binary markets (2-class) tend to be more overconfident in LGB
|
||||
if n <= 2:
|
||||
T = max(temperature, 1.5) # was 2.0
|
||||
elif n == 3:
|
||||
T = max(temperature * 0.8, 1.2) # was 1.5 — 3-way slightly less aggressive
|
||||
else:
|
||||
T = max(temperature * 0.6, 1.0) # was 1.3 — 9-way (HTFT) already spread
|
||||
|
||||
# Convert to log-odds and apply temperature
|
||||
labels = list(probs_dict.keys())
|
||||
log_odds = []
|
||||
for label in labels:
|
||||
p = max(eps, min(1.0 - eps, float(probs_dict[label])))
|
||||
log_odds.append(math.log(p) / T)
|
||||
|
||||
# Softmax re-normalization
|
||||
max_lo = max(log_odds)
|
||||
exp_vals = [math.exp(lo - max_lo) for lo in log_odds]
|
||||
total = sum(exp_vals)
|
||||
|
||||
scaled = {}
|
||||
for i, label in enumerate(labels):
|
||||
scaled[label] = exp_vals[i] / total
|
||||
|
||||
return scaled
|
||||
|
||||
calibrator = get_calibrator()
|
||||
_temperature = float(get_config().get('model_ensemble.temperature', 1.5))
|
||||
|
||||
# Map (market_key, label) → calibrator market key
|
||||
_CAL_KEY_MAP: Dict[str, str] = {
|
||||
"ms_1": "ms_home", "ms_x": "ms_draw", "ms_2": "ms_away",
|
||||
"ou15_over": "ou15", "ou15_under": "ou15",
|
||||
"ou25_over": "ou25", "ou25_under": "ou25",
|
||||
"ou35_over": "ou35", "ou35_under": "ou35",
|
||||
"btts_yes": "btts", "btts_no": "btts",
|
||||
"ht_1": "ht_home", "ht_x": "ht_draw", "ht_2": "ht_away",
|
||||
}
|
||||
|
||||
def _enrich_signal_entry(probs_dict: Dict[str, float], market_key: str = "") -> Dict[str, Any]:
|
||||
"""Temperature scaling + Isotonic calibration pipeline."""
|
||||
scaled_probs = _temperature_scale(probs_dict, temperature=_temperature)
|
||||
|
||||
# Isotonic calibration per outcome (if trained models exist)
|
||||
if market_key:
|
||||
calibrated = {}
|
||||
for label, prob in scaled_probs.items():
|
||||
raw_key = f"{market_key}_{label}".lower().replace(" ", "_")
|
||||
cal_key = _CAL_KEY_MAP.get(raw_key, raw_key)
|
||||
calibrated[label] = calibrator.calibrate(cal_key, prob)
|
||||
total = sum(calibrated.values())
|
||||
if total > 0:
|
||||
calibrated = {k: v / total for k, v in calibrated.items()}
|
||||
scaled_probs = calibrated
|
||||
|
||||
best_label = max(scaled_probs, key=scaled_probs.__getitem__)
|
||||
best_prob = float(scaled_probs[best_label])
|
||||
return {
|
||||
"probs": scaled_probs,
|
||||
"raw_probs": probs_dict,
|
||||
"pick": best_label,
|
||||
"probability": best_prob,
|
||||
"confidence": round(best_prob * 100.0, 1),
|
||||
}
|
||||
|
||||
# Core markets using dedicated methods (skip if league model already covered them)
|
||||
if "MS" not in signal:
|
||||
h, d, a = v25.predict_ms(feature_row)
|
||||
signal["MS"] = _enrich_signal_entry({"1": h, "X": d, "2": a}, "ms")
|
||||
print(f" [V25-SIGNAL] MS → H={h:.4f} D={d:.4f} A={a:.4f}")
|
||||
else:
|
||||
print(f" [LEAGUE-MODEL] MS → {signal['MS']['probs']}")
|
||||
|
||||
if "OU25" not in signal:
|
||||
over25, under25 = v25.predict_ou25(feature_row)
|
||||
signal["OU25"] = _enrich_signal_entry({"Over": over25, "Under": under25}, "ou25")
|
||||
print(f" [V25-SIGNAL] OU25 → O={over25:.4f} U={under25:.4f}")
|
||||
|
||||
if "BTTS" not in signal:
|
||||
btts_y, btts_n = v25.predict_btts(feature_row)
|
||||
signal["BTTS"] = _enrich_signal_entry({"Yes": btts_y, "No": btts_n}, "btts")
|
||||
print(f" [V25-SIGNAL] BTTS → Y={btts_y:.4f} N={btts_n:.4f}")
|
||||
|
||||
# Additional markets via generic predict_market (skip if league model covered them)
|
||||
for model_key, label_map in [
|
||||
("ou15", {"Over": 0, "Under": None}),
|
||||
("ou35", {"Over": 0, "Under": None}),
|
||||
("ht_result", {"1": 0, "X": 1, "2": 2}),
|
||||
("ht_ou05", {"Over": 0, "Under": None}),
|
||||
("ht_ou15", {"Over": 0, "Under": None}),
|
||||
("htft", None),
|
||||
("cards_ou45", {"Over": 0, "Under": None}),
|
||||
("handicap_ms", {"1": 0, "X": 1, "2": 2}),
|
||||
("odd_even", {"Odd": 0, "Even": None}),
|
||||
]:
|
||||
out_key = str(self._V25_KEY_MAP.get(model_key, model_key.upper()))
|
||||
if out_key in signal:
|
||||
continue # already predicted by league-specific model
|
||||
if not v25.has_market(model_key):
|
||||
continue
|
||||
raw = v25.predict_market(model_key, feature_row)
|
||||
if raw is None:
|
||||
continue
|
||||
|
||||
if label_map is None:
|
||||
# HTFT — 9 combinations
|
||||
htft_labels = ["1/1", "1/X", "1/2", "X/1", "X/X", "X/2", "2/1", "2/X", "2/2"]
|
||||
probs_dict = {}
|
||||
for i, label in enumerate(htft_labels):
|
||||
probs_dict[label] = float(raw[i]) if i < len(raw) else 0.0
|
||||
signal[out_key] = _enrich_signal_entry(probs_dict, model_key)
|
||||
elif len(label_map) == 2:
|
||||
# Binary market
|
||||
labels = list(label_map.keys())
|
||||
p = float(raw[0]) if len(raw) >= 1 else None
|
||||
if p is None:
|
||||
print(f" [V25-SIGNAL] {out_key} → EMPTY raw output, skipped")
|
||||
continue
|
||||
signal[out_key] = _enrich_signal_entry({labels[0]: p, labels[1]: 1.0 - p}, model_key)
|
||||
elif len(label_map) == 3:
|
||||
# 3-class market
|
||||
labels = list(label_map.keys())
|
||||
probs_dict = {}
|
||||
for i, label in enumerate(labels):
|
||||
if i >= len(raw):
|
||||
print(f" [V25-SIGNAL] {out_key} → insufficient probabilities in raw output")
|
||||
break
|
||||
probs_dict[label] = float(raw[i])
|
||||
else:
|
||||
signal[out_key] = _enrich_signal_entry(probs_dict, model_key)
|
||||
|
||||
if out_key in signal:
|
||||
print(f" [V25-SIGNAL] {out_key} → {signal[out_key]['probs']}")
|
||||
|
||||
print(f" [V25-SIGNAL] Total markets with real predictions: {len(signal)}")
|
||||
if not signal:
|
||||
raise RuntimeError("V25 model produced ZERO market predictions — cannot continue")
|
||||
|
||||
return signal
|
||||
|
||||
@staticmethod
|
||||
def _prob_map(signal: Optional[Dict[str, Any]], market: str, defaults: Dict[str, float]) -> Dict[str, float]:
|
||||
"""Extract normalised probabilities from signal.
|
||||
|
||||
If the signal contains real model output for this market, use it.
|
||||
If the market is missing from the signal, log a warning and return
|
||||
the defaults as a LAST RESORT (so the pipeline doesn't crash).
|
||||
The defaults are ONLY used for non-core / secondary markets that
|
||||
may not have a trained model yet (e.g. CARDS, HCAP, OE).
|
||||
"""
|
||||
market_payload = signal.get(market, {}) if isinstance(signal, dict) else {}
|
||||
probs = market_payload.get("probs", {}) if isinstance(market_payload, dict) else {}
|
||||
if not isinstance(probs, dict) or not probs:
|
||||
print(f" ⚠️ [PROB_MAP] Market '{market}' NOT found in V25 signal — model output missing")
|
||||
return dict(defaults)
|
||||
out = {key: float(probs.get(key, value)) for key, value in defaults.items()}
|
||||
total = sum(out.values())
|
||||
if total <= 0:
|
||||
print(f" ⚠️ [PROB_MAP] Market '{market}' has zero total probability")
|
||||
return dict(defaults)
|
||||
return {key: value / total for key, value in out.items()}
|
||||
|
||||
@staticmethod
|
||||
def _is_cup_game(league_name: str) -> bool:
|
||||
"""Detect cup/knockout competitions where home advantage is significantly weaker."""
|
||||
name = (league_name or "").lower()
|
||||
cup_keywords = (
|
||||
"kupa", "cup", "coupe", "copa", "coppa", "pokal",
|
||||
"trophy", "shield", "challenge",
|
||||
"ziraat", "süper kupa", "super cup",
|
||||
)
|
||||
return any(kw in name for kw in cup_keywords)
|
||||
|
||||
@staticmethod
|
||||
def _best_prob_pick(prob_map: Dict[str, float]) -> Tuple[str, float]:
|
||||
if not prob_map:
|
||||
return "", 0.0
|
||||
pick = max(prob_map, key=prob_map.__getitem__)
|
||||
return pick, float(prob_map[pick])
|
||||
|
||||
@staticmethod
|
||||
def _poisson_score_top5(home_xg: float, away_xg: float, max_goals: int = 5) -> List[Dict[str, Any]]:
|
||||
def poisson_p(lmbda: float, k: int) -> float:
|
||||
return math.exp(-lmbda) * (lmbda ** k) / math.factorial(k)
|
||||
|
||||
scores: List[Tuple[str, float]] = []
|
||||
for home_goals in range(max_goals + 1):
|
||||
for away_goals in range(max_goals + 1):
|
||||
prob = poisson_p(home_xg, home_goals) * poisson_p(away_xg, away_goals)
|
||||
scores.append((f"{home_goals}-{away_goals}", prob))
|
||||
scores.sort(key=lambda item: item[1], reverse=True)
|
||||
return [
|
||||
{"score": score, "prob": round(prob, 4)}
|
||||
for score, prob in scores[:5]
|
||||
]
|
||||
|
||||
def _build_v25_prediction(
|
||||
self,
|
||||
data: MatchData,
|
||||
features: Dict[str, float],
|
||||
v25_signal: Dict[str, Any],
|
||||
) -> FullMatchPrediction:
|
||||
prediction = FullMatchPrediction(
|
||||
match_id=data.match_id,
|
||||
home_team=data.home_team_name,
|
||||
away_team=data.away_team_name,
|
||||
)
|
||||
|
||||
ms_probs = self._prob_map(v25_signal, "MS", {"1": 0.33, "X": 0.34, "2": 0.33})
|
||||
ou15_probs = self._prob_map(v25_signal, "OU15", {"Under": 0.5, "Over": 0.5})
|
||||
ou25_probs = self._prob_map(v25_signal, "OU25", {"Under": 0.5, "Over": 0.5})
|
||||
ou35_probs = self._prob_map(v25_signal, "OU35", {"Under": 0.5, "Over": 0.5})
|
||||
btts_probs = self._prob_map(v25_signal, "BTTS", {"No": 0.5, "Yes": 0.5})
|
||||
ht_probs = self._prob_map(v25_signal, "HT", {"1": 0.33, "X": 0.34, "2": 0.33})
|
||||
ht_ou05_probs = self._prob_map(v25_signal, "HT_OU05", {"Under": 0.5, "Over": 0.5})
|
||||
ht_ou15_probs = self._prob_map(v25_signal, "HT_OU15", {"Under": 0.5, "Over": 0.5})
|
||||
htft_probs = self._prob_map(
|
||||
v25_signal,
|
||||
"HTFT",
|
||||
{"1/1": 1 / 9, "1/X": 1 / 9, "1/2": 1 / 9, "X/1": 1 / 9, "X/X": 1 / 9, "X/2": 1 / 9, "2/1": 1 / 9, "2/X": 1 / 9, "2/2": 1 / 9},
|
||||
)
|
||||
oe_probs = self._prob_map(v25_signal, "OE", {"Even": 0.5, "Odd": 0.5})
|
||||
cards_probs = self._prob_map(v25_signal, "CARDS", {"Under": 0.5, "Over": 0.5})
|
||||
hcap_probs = self._prob_map(v25_signal, "HCAP", {"1": 0.33, "X": 0.34, "2": 0.33})
|
||||
|
||||
# Cup game: dampen home advantage — model trained on league data overestimates home edge
|
||||
is_cup = self._is_cup_game(getattr(data, "league_name", "") or "")
|
||||
if is_cup:
|
||||
# Shift 8% of home probability toward away and draw (rotation, neutral venue effect)
|
||||
cup_transfer = ms_probs["1"] * 0.08
|
||||
ms_probs = {
|
||||
"1": ms_probs["1"] - cup_transfer,
|
||||
"X": ms_probs["X"] + cup_transfer * 0.4,
|
||||
"2": ms_probs["2"] + cup_transfer * 0.6,
|
||||
}
|
||||
total = sum(ms_probs.values())
|
||||
ms_probs = {k: v / total for k, v in ms_probs.items()}
|
||||
|
||||
prediction.ms_home_prob = ms_probs["1"]
|
||||
prediction.ms_draw_prob = ms_probs["X"]
|
||||
prediction.ms_away_prob = ms_probs["2"]
|
||||
prediction.ms_pick, ms_top = self._best_prob_pick(ms_probs)
|
||||
prediction.ms_confidence = ms_top * 100.0
|
||||
|
||||
prediction.dc_1x_prob = prediction.ms_home_prob + prediction.ms_draw_prob
|
||||
prediction.dc_x2_prob = prediction.ms_draw_prob + prediction.ms_away_prob
|
||||
prediction.dc_12_prob = prediction.ms_home_prob + prediction.ms_away_prob
|
||||
dc_probs = {"1X": prediction.dc_1x_prob, "X2": prediction.dc_x2_prob, "12": prediction.dc_12_prob}
|
||||
prediction.dc_pick, dc_top = self._best_prob_pick(dc_probs)
|
||||
prediction.dc_confidence = dc_top * 100.0
|
||||
|
||||
prediction.over_15_prob = ou15_probs["Over"]
|
||||
prediction.under_15_prob = ou15_probs["Under"]
|
||||
prediction.ou15_pick = "1.5 Üst" if prediction.over_15_prob >= prediction.under_15_prob else "1.5 Alt"
|
||||
prediction.ou15_confidence = max(prediction.over_15_prob, prediction.under_15_prob) * 100.0
|
||||
|
||||
prediction.over_25_prob = ou25_probs["Over"]
|
||||
prediction.under_25_prob = ou25_probs["Under"]
|
||||
prediction.ou25_pick = "2.5 Üst" if prediction.over_25_prob >= prediction.under_25_prob else "2.5 Alt"
|
||||
prediction.ou25_confidence = max(prediction.over_25_prob, prediction.under_25_prob) * 100.0
|
||||
|
||||
prediction.over_35_prob = ou35_probs["Over"]
|
||||
prediction.under_35_prob = ou35_probs["Under"]
|
||||
prediction.ou35_pick = "3.5 Üst" if prediction.over_35_prob >= prediction.under_35_prob else "3.5 Alt"
|
||||
prediction.ou35_confidence = max(prediction.over_35_prob, prediction.under_35_prob) * 100.0
|
||||
|
||||
prediction.btts_yes_prob = btts_probs["Yes"]
|
||||
prediction.btts_no_prob = btts_probs["No"]
|
||||
prediction.btts_pick = "KG Var" if prediction.btts_yes_prob >= prediction.btts_no_prob else "KG Yok"
|
||||
prediction.btts_confidence = max(prediction.btts_yes_prob, prediction.btts_no_prob) * 100.0
|
||||
|
||||
prediction.ht_home_prob = ht_probs["1"]
|
||||
prediction.ht_draw_prob = ht_probs["X"]
|
||||
prediction.ht_away_prob = ht_probs["2"]
|
||||
prediction.ht_pick, ht_top = self._best_prob_pick(ht_probs)
|
||||
prediction.ht_confidence = ht_top * 100.0
|
||||
|
||||
prediction.ht_over_05_prob = ht_ou05_probs["Over"]
|
||||
prediction.ht_under_05_prob = ht_ou05_probs["Under"]
|
||||
prediction.ht_ou_pick = "İY 0.5 Üst" if prediction.ht_over_05_prob >= prediction.ht_under_05_prob else "İY 0.5 Alt"
|
||||
|
||||
prediction.ht_over_15_prob = ht_ou15_probs["Over"]
|
||||
prediction.ht_under_15_prob = ht_ou15_probs["Under"]
|
||||
prediction.ht_ou15_pick = "İY 1.5 Üst" if prediction.ht_over_15_prob >= prediction.ht_under_15_prob else "İY 1.5 Alt"
|
||||
|
||||
prediction.ht_ft_probs = htft_probs
|
||||
|
||||
prediction.odd_prob = oe_probs["Odd"]
|
||||
prediction.even_prob = oe_probs["Even"]
|
||||
prediction.odd_even_pick = "Tek" if prediction.odd_prob >= prediction.even_prob else "Çift"
|
||||
|
||||
prediction.cards_over_prob = cards_probs["Over"]
|
||||
prediction.cards_under_prob = cards_probs["Under"]
|
||||
prediction.card_pick = "4.5 Üst" if prediction.cards_over_prob >= prediction.cards_under_prob else "4.5 Alt"
|
||||
prediction.cards_confidence = max(prediction.cards_over_prob, prediction.cards_under_prob) * 100.0
|
||||
|
||||
prediction.handicap_home_prob = hcap_probs["1"]
|
||||
prediction.handicap_draw_prob = hcap_probs["X"]
|
||||
prediction.handicap_away_prob = hcap_probs["2"]
|
||||
prediction.handicap_pick, hcap_top = self._best_prob_pick(hcap_probs)
|
||||
prediction.handicap_confidence = hcap_top * 100.0
|
||||
|
||||
# ── Score Prediction: Model-first, heuristic fallback ──────────
|
||||
ms_edge = prediction.ms_home_prob - prediction.ms_away_prob
|
||||
score_result = self._predict_score_with_model(features)
|
||||
if score_result is not None:
|
||||
# ML model predicted scores
|
||||
prediction.home_xg = score_result["ft_home"]
|
||||
prediction.away_xg = score_result["ft_away"]
|
||||
prediction.total_xg = round(prediction.home_xg + prediction.away_xg, 2)
|
||||
ht_home_xg = score_result["ht_home"]
|
||||
ht_away_xg = score_result["ht_away"]
|
||||
prediction.predicted_ft_score = f"{int(round(prediction.home_xg))}-{int(round(prediction.away_xg))}"
|
||||
prediction.predicted_ht_score = f"{int(round(ht_home_xg))}-{int(round(ht_away_xg))}"
|
||||
else:
|
||||
# Heuristic fallback (original formula)
|
||||
base_home_xg = max(0.25, (float(data.home_goals_avg or 1.3) + float(features.get("away_xga", data.away_conceded_avg) or 1.2)) / 2.0)
|
||||
base_away_xg = max(0.25, (float(data.away_goals_avg or 1.3) + float(features.get("home_xga", data.home_conceded_avg) or 1.2)) / 2.0)
|
||||
# ms_edge already computed above
|
||||
total_target = max(
|
||||
1.4,
|
||||
min(
|
||||
4.8,
|
||||
(float(features.get("league_avg_goals", 2.7)) * 0.55)
|
||||
+ ((float(data.home_goals_avg or 1.3) + float(data.away_goals_avg or 1.3)) * 0.45)
|
||||
+ ((prediction.over_25_prob - prediction.under_25_prob) * 1.15),
|
||||
),
|
||||
)
|
||||
home_xg = max(0.2, base_home_xg + (ms_edge * 0.55) + ((prediction.btts_yes_prob - 0.5) * 0.18))
|
||||
away_xg = max(0.2, base_away_xg - (ms_edge * 0.55) + ((prediction.btts_yes_prob - 0.5) * 0.18))
|
||||
scale = total_target / max(home_xg + away_xg, 0.1)
|
||||
prediction.home_xg = round(home_xg * scale, 2)
|
||||
prediction.away_xg = round(away_xg * scale, 2)
|
||||
prediction.total_xg = round(prediction.home_xg + prediction.away_xg, 2)
|
||||
|
||||
# Cup game: reduce xG by 20% — rotation + lower motivation + defensive tactics
|
||||
if is_cup:
|
||||
prediction.home_xg = round(prediction.home_xg * 0.80, 2)
|
||||
prediction.away_xg = round(prediction.away_xg * 0.80, 2)
|
||||
prediction.total_xg = round(prediction.home_xg + prediction.away_xg, 2)
|
||||
prediction.predicted_ft_score = f"{int(round(prediction.home_xg))}-{int(round(prediction.away_xg))}"
|
||||
prediction.predicted_ht_score = f"{int(round(prediction.home_xg * 0.45))}-{int(round(prediction.away_xg * 0.45))}"
|
||||
prediction.ft_scores_top5 = self._poisson_score_top5(prediction.home_xg, prediction.away_xg)
|
||||
|
||||
# Score prediction: find the most likely scoreline consistent with the MS pick
|
||||
# Instead of just rounding xG (misleading), filter Poisson top scores by result direction
|
||||
ms_pick = prediction.ms_pick # "1", "X", or "2"
|
||||
top5 = prediction.ft_scores_top5
|
||||
if top5 and ms_pick in ("1", "X", "2"):
|
||||
def _result_of(score_str: str) -> str:
|
||||
try:
|
||||
h, a = map(int, score_str.split("-"))
|
||||
if h > a: return "1"
|
||||
if h < a: return "2"
|
||||
return "X"
|
||||
except Exception:
|
||||
return "?"
|
||||
|
||||
# Filter to scorelines matching the predicted result
|
||||
matching = [s for s in top5 if _result_of(s["score"]) == ms_pick]
|
||||
if matching:
|
||||
best = matching[0] # already sorted by probability desc
|
||||
h_str, a_str = best["score"].split("-")
|
||||
prediction.predicted_ft_score = best["score"]
|
||||
# Recalculate HT score proportionally from the FT pick
|
||||
h_val, a_val = int(h_str), int(a_str)
|
||||
prediction.predicted_ht_score = f"{int(round(h_val * 0.45))}-{int(round(a_val * 0.45))}"
|
||||
|
||||
max_market_conf = max(
|
||||
prediction.ms_confidence,
|
||||
prediction.ou15_confidence,
|
||||
prediction.ou25_confidence,
|
||||
prediction.ou35_confidence,
|
||||
prediction.btts_confidence,
|
||||
prediction.ht_confidence,
|
||||
prediction.cards_confidence,
|
||||
prediction.handicap_confidence,
|
||||
)
|
||||
lineup_conf = max(0.0, min(1.0, float(getattr(data, "lineup_confidence", 0.0) or 0.0)))
|
||||
lineup_penalty = 12.0 if data.lineup_source == "none" else max(1.5, (1.0 - lineup_conf) * 8.0) if data.lineup_source == "probable_xi" else 0.0
|
||||
referee_penalty = 6.0 if not data.referee_name else 0.0
|
||||
parity_penalty = 8.0 if abs(ms_edge) < 0.08 else 0.0
|
||||
# Cup game penalty: model trained on league data has lower reliability for cup matches
|
||||
cup_penalty = 10.0 if is_cup else 0.0
|
||||
# Bookmaker margin penalty: high margin signals that even the market is uncertain
|
||||
bm_margin = 0.0
|
||||
odds_data = getattr(data, "odds_data", {}) or {}
|
||||
_h, _d, _a = float(odds_data.get("ms_h") or 0), float(odds_data.get("ms_d") or 0), float(odds_data.get("ms_a") or 0)
|
||||
if _h > 1.01 and _d > 1.01 and _a > 1.01:
|
||||
bm_margin = (1 / _h + 1 / _d + 1 / _a) - 1
|
||||
bookmaker_penalty = 12.0 if bm_margin > 0.20 else 6.0 if bm_margin > 0.15 else 0.0
|
||||
prediction.risk_score = round(min(100.0, max(10.0, 100.0 - max_market_conf + lineup_penalty + referee_penalty + parity_penalty + cup_penalty + bookmaker_penalty)), 1)
|
||||
if prediction.risk_score >= 78:
|
||||
prediction.risk_level = "EXTREME"
|
||||
elif prediction.risk_score >= 62:
|
||||
prediction.risk_level = "HIGH"
|
||||
elif prediction.risk_score >= 40:
|
||||
prediction.risk_level = "MEDIUM"
|
||||
else:
|
||||
prediction.risk_level = "LOW"
|
||||
prediction.is_surprise_risk = prediction.risk_level in {"HIGH", "EXTREME"} or prediction.ms_draw_prob >= 0.30
|
||||
prediction.surprise_type = "balanced_match_risk" if abs(ms_edge) < 0.08 else "draw_pressure" if prediction.ms_draw_prob >= 0.30 else ""
|
||||
prediction.risk_warnings = []
|
||||
if is_cup:
|
||||
prediction.risk_warnings.append("cup_game_home_advantage_reduced")
|
||||
if bookmaker_penalty > 0:
|
||||
prediction.risk_warnings.append(f"bookmaker_margin_high_{bm_margin*100:.0f}pct")
|
||||
if data.lineup_source == "probable_xi":
|
||||
prediction.risk_warnings.append("lineup_probable_not_confirmed")
|
||||
if lineup_conf < 0.65:
|
||||
prediction.risk_warnings.append("lineup_projection_low_confidence")
|
||||
if data.lineup_source == "none":
|
||||
prediction.risk_warnings.append("lineup_unavailable")
|
||||
if not data.referee_name:
|
||||
prediction.risk_warnings.append("missing_referee")
|
||||
if prediction.ms_draw_prob >= 0.30:
|
||||
prediction.risk_warnings.append("draw_probability_elevated")
|
||||
|
||||
prediction.upset_score = int(round(max(0.0, min(100.0, (prediction.ms_draw_prob + min(prediction.ms_home_prob, prediction.ms_away_prob)) * 100.0))))
|
||||
prediction.upset_level = "HIGH" if prediction.upset_score >= 65 else "MEDIUM" if prediction.upset_score >= 45 else "LOW"
|
||||
prediction.upset_reasons = [prediction.surprise_type] if prediction.surprise_type else []
|
||||
surprise = self._build_surprise_profile(data, prediction)
|
||||
prediction.surprise_score = surprise["score"]
|
||||
prediction.surprise_comment = surprise["comment"]
|
||||
prediction.surprise_reasons = surprise["reasons"]
|
||||
prediction.surprise_breakdown = surprise.get("breakdown", [])
|
||||
# Auto-flag is_surprise_risk when score crosses 45 even if other paths didn't fire
|
||||
if surprise["score"] >= 45.0:
|
||||
prediction.is_surprise_risk = True
|
||||
|
||||
prediction.team_confidence = round(max(35.0, min(95.0, 45.0 + (abs(ms_edge) * 85.0) + (abs(float(features.get("form_elo_diff", 0.0))) / 40.0))), 1)
|
||||
prediction.player_confidence = round(max(20.0, min(95.0, 38.0 + (float(features.get("home_key_players", 0.0)) + float(features.get("away_key_players", 0.0))) * 2.0 - (float(features.get("home_missing_impact", 0.0)) + float(features.get("away_missing_impact", 0.0))) * 22.0)), 1)
|
||||
prediction.odds_confidence = round(max(30.0, min(95.0, float(np.mean([prediction.ms_confidence, prediction.ou25_confidence, prediction.btts_confidence])))), 1)
|
||||
prediction.referee_confidence = 62.0 if data.referee_name else 35.0
|
||||
|
||||
prediction.total_cards_pred = 4.8 if prediction.cards_over_prob >= prediction.cards_under_prob else 4.1
|
||||
prediction.total_corners_pred = round(8.8 + (prediction.over_25_prob - 0.5) * 2.5, 1)
|
||||
prediction.corner_pick = "9.5 Üst" if prediction.total_corners_pred >= 9.5 else "9.5 Alt"
|
||||
prediction.analysis_details = {
|
||||
"primary_model": "v25",
|
||||
"features_source": "v25.pre_match",
|
||||
"market_count": len([key for key in v25_signal.keys() if key != "value_bets"]),
|
||||
"lineup_source": data.lineup_source,
|
||||
}
|
||||
return prediction
|
||||
|
||||
def _build_engine_breakdown(self, prediction: FullMatchPrediction) -> Dict[str, Any]:
|
||||
"""
|
||||
Engine breakdown with backward-compatible flat scores + rich detail siblings.
|
||||
|
||||
Shape:
|
||||
{
|
||||
team: 74.1, player: 55.7, odds: 55.2, referee: 62.0, # legacy flat scores
|
||||
detail: { team: {score, label, ...}, player: {...}, ... }
|
||||
}
|
||||
"""
|
||||
components = {
|
||||
"team": ("Takım modeli", float(prediction.team_confidence)),
|
||||
"player": ("Oyuncu / kadro modeli", float(prediction.player_confidence)),
|
||||
"odds": ("Oran piyasası", float(prediction.odds_confidence)),
|
||||
"referee": ("Hakem etkisi", float(prediction.referee_confidence)),
|
||||
}
|
||||
flat: Dict[str, Any] = {}
|
||||
detail: Dict[str, Any] = {}
|
||||
for key, (display, raw) in components.items():
|
||||
score = round(raw, 1)
|
||||
label, interpretation = self._confidence_label(score)
|
||||
flat[key] = score
|
||||
detail[key] = {
|
||||
"score": score,
|
||||
"label": label,
|
||||
"display_name": display,
|
||||
"interpretation": interpretation,
|
||||
}
|
||||
flat["detail"] = detail
|
||||
return flat
|
||||
@@ -0,0 +1,469 @@
|
||||
"""Reversal Mixin — HT/FT reversal watchlist and cycle metrics.
|
||||
|
||||
Auto-extracted mixin module — split from services/single_match_orchestrator.py.
|
||||
All methods here are composed into SingleMatchOrchestrator via inheritance.
|
||||
`self` attributes (self.dsn, self.enrichment, self.v25_predictor, etc.) are
|
||||
initialised in the main __init__.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import math
|
||||
import os
|
||||
import pickle
|
||||
from collections import defaultdict
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple, overload
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
from data.db import get_clean_dsn
|
||||
from schemas.prediction import FullMatchPrediction
|
||||
from schemas.match_data import MatchData
|
||||
from models.v25_ensemble import V25Predictor, get_v25_predictor
|
||||
try:
|
||||
from models.v27_predictor import V27Predictor, compute_divergence, compute_value_edge
|
||||
except ImportError:
|
||||
class V27Predictor: # type: ignore[no-redef]
|
||||
def __init__(self): self.models = {}
|
||||
def load_models(self): return False
|
||||
def predict_all(self, features): return {}
|
||||
def compute_divergence(*args, **kwargs):
|
||||
return {}
|
||||
def compute_value_edge(*args, **kwargs):
|
||||
return {}
|
||||
from features.odds_band_analyzer import OddsBandAnalyzer
|
||||
try:
|
||||
from models.basketball_v25 import (
|
||||
BasketballMatchPrediction,
|
||||
get_basketball_v25_predictor,
|
||||
)
|
||||
except ImportError:
|
||||
BasketballMatchPrediction = Any # type: ignore[misc]
|
||||
def get_basketball_v25_predictor() -> Any:
|
||||
raise ImportError("Basketball predictor is not available")
|
||||
from core.engines.player_predictor import PlayerPrediction, get_player_predictor
|
||||
from services.feature_enrichment import FeatureEnrichmentService
|
||||
from services.betting_brain import BettingBrain
|
||||
from services.v26_shadow_engine import V26ShadowEngine, get_v26_shadow_engine
|
||||
from services.match_commentary import generate_match_commentary
|
||||
from utils.top_leagues import load_top_league_ids
|
||||
from utils.league_reliability import load_league_reliability
|
||||
from config.config_loader import build_threshold_dict, get_threshold_default
|
||||
from models.calibration import get_calibrator
|
||||
|
||||
|
||||
class ReversalMixin:
|
||||
def get_reversal_watchlist(
|
||||
self,
|
||||
count: int = 20,
|
||||
horizon_hours: int = 72,
|
||||
min_score: float = 45.0,
|
||||
top_leagues_only: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
safe_count = max(1, min(100, int(count)))
|
||||
safe_horizon = max(6, min(168, int(horizon_hours)))
|
||||
safe_min_score = max(0.0, min(100.0, float(min_score)))
|
||||
now_ms = int(time.time() * 1000)
|
||||
horizon_ms = now_ms + (safe_horizon * 60 * 60 * 1000)
|
||||
|
||||
with psycopg2.connect(self.dsn) as conn:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT
|
||||
lm.id,
|
||||
lm.home_team_id,
|
||||
lm.away_team_id,
|
||||
lm.league_id,
|
||||
lm.mst_utc
|
||||
FROM live_matches lm
|
||||
WHERE lm.sport = 'football'
|
||||
AND lm.mst_utc >= %s
|
||||
AND lm.mst_utc <= %s
|
||||
ORDER BY lm.mst_utc ASC
|
||||
LIMIT 200
|
||||
""",
|
||||
(now_ms, horizon_ms),
|
||||
)
|
||||
raw_candidates = cur.fetchall()
|
||||
|
||||
candidates = [
|
||||
row
|
||||
for row in raw_candidates
|
||||
if row.get("home_team_id") and row.get("away_team_id")
|
||||
]
|
||||
if top_leagues_only:
|
||||
candidates = [
|
||||
row for row in candidates if self._is_top_league(row.get("league_id"))
|
||||
]
|
||||
|
||||
team_ids: Set[str] = set()
|
||||
pair_keys: Set[Tuple[str, str]] = set()
|
||||
for row in candidates:
|
||||
home_id = str(row["home_team_id"])
|
||||
away_id = str(row["away_team_id"])
|
||||
team_ids.add(home_id)
|
||||
team_ids.add(away_id)
|
||||
h, a = sorted((home_id, away_id))
|
||||
pair_keys.add((h, a))
|
||||
|
||||
team_cycle = self._fetch_team_reversal_cycle_metrics(cur, team_ids, now_ms)
|
||||
h2h_ctx = self._fetch_h2h_reversal_context(cur, pair_keys, now_ms)
|
||||
|
||||
watch_items_all: List[Dict[str, Any]] = []
|
||||
scanned = 0
|
||||
for row in candidates:
|
||||
match_id = str(row["id"])
|
||||
data = self._load_match_data(match_id)
|
||||
if data is None:
|
||||
continue
|
||||
|
||||
package = self.analyze_match(match_id)
|
||||
if not package:
|
||||
continue
|
||||
|
||||
scanned += 1
|
||||
htft_probs = package.get("market_board", {}).get("HTFT", {}).get("probs", {})
|
||||
prob_12 = float(htft_probs.get("1/2", 0.0))
|
||||
prob_21 = float(htft_probs.get("2/1", 0.0))
|
||||
if prob_12 <= 0.0 and prob_21 <= 0.0:
|
||||
continue
|
||||
overall_htft_pick = None
|
||||
overall_htft_prob = 0.0
|
||||
if htft_probs:
|
||||
overall_htft_pick, overall_htft_prob = max(
|
||||
htft_probs.items(),
|
||||
key=lambda item: float(item[1]),
|
||||
)
|
||||
|
||||
reversal_sum = prob_12 + prob_21
|
||||
reversal_max = max(prob_12, prob_21)
|
||||
top_pick = "2/1" if prob_21 >= prob_12 else "1/2"
|
||||
top_prob = prob_21 if top_pick == "2/1" else prob_12
|
||||
|
||||
ms_h = self._to_float(data.odds_data.get("ms_h"), 0.0)
|
||||
ms_a = self._to_float(data.odds_data.get("ms_a"), 0.0)
|
||||
gap = abs(ms_h - ms_a) if ms_h > 1.0 and ms_a > 1.0 else 0.0
|
||||
favorite_odd = min(ms_h, ms_a) if ms_h > 1.0 and ms_a > 1.0 else 0.0
|
||||
|
||||
# Reversal events are rare (~5% baseline), so convert raw probs to a more useful
|
||||
# watchlist scale where p in [0.02, 0.08] becomes meaningfully separable.
|
||||
base_score = (reversal_max * 100.0 * 8.0) + (reversal_sum * 100.0 * 4.0)
|
||||
|
||||
balance_bonus = 0.0
|
||||
if gap > 0.0:
|
||||
balance_bonus = max(0.0, (1.0 - min(gap, 1.2) / 1.2) * 7.0)
|
||||
elif ms_h > 1.0 and ms_a > 1.0:
|
||||
balance_bonus = 2.0
|
||||
|
||||
favorite_bonus = 0.0
|
||||
if favorite_odd > 0.0 and favorite_odd <= 1.70 and reversal_max >= 0.02:
|
||||
favorite_bonus = min(8.0, (1.70 - favorite_odd) * 12.0)
|
||||
|
||||
home_metrics = team_cycle.get(data.home_team_id, {})
|
||||
away_metrics = team_cycle.get(data.away_team_id, {})
|
||||
cycle_pressure = max(
|
||||
float(home_metrics.get("cycle_pressure", 0.0)),
|
||||
float(away_metrics.get("cycle_pressure", 0.0)),
|
||||
)
|
||||
cycle_bonus = cycle_pressure * 10.0
|
||||
|
||||
h, a = sorted((data.home_team_id, data.away_team_id))
|
||||
pair_key = (h, a)
|
||||
pair_ctx = h2h_ctx.get(pair_key, {})
|
||||
blowout_bonus = 0.0
|
||||
last_diff = int(pair_ctx.get("goal_diff", 0))
|
||||
if abs(last_diff) >= 3:
|
||||
blowout_bonus = 6.0
|
||||
if abs(last_diff) >= 5:
|
||||
blowout_bonus += 3.0
|
||||
|
||||
ou25_o = self._to_float(data.odds_data.get("ou25_o"), 0.0)
|
||||
tempo_bonus = 0.0
|
||||
if ou25_o > 1.0 and ou25_o <= 1.72:
|
||||
tempo_bonus = 2.5
|
||||
|
||||
watch_score = max(
|
||||
0.0,
|
||||
min(
|
||||
100.0,
|
||||
base_score + balance_bonus + favorite_bonus + cycle_bonus + blowout_bonus + tempo_bonus,
|
||||
),
|
||||
)
|
||||
reason_codes: List[str] = []
|
||||
if top_prob >= 0.045:
|
||||
reason_codes.append("reversal_prob_hot")
|
||||
elif top_prob >= 0.030:
|
||||
reason_codes.append("reversal_prob_warm")
|
||||
if gap > 0.0 and gap <= 0.80:
|
||||
reason_codes.append("balanced_matchup")
|
||||
if favorite_bonus > 0.0:
|
||||
reason_codes.append("strong_favorite_reversal_window")
|
||||
if cycle_pressure >= 0.55:
|
||||
reason_codes.append("team_reversal_cycle_pressure")
|
||||
if blowout_bonus > 0.0:
|
||||
reason_codes.append("h2h_blowout_rematch")
|
||||
if tempo_bonus > 0.0:
|
||||
reason_codes.append("high_tempo_profile")
|
||||
if not reason_codes:
|
||||
reason_codes.append("model_signal_only")
|
||||
|
||||
item = (
|
||||
{
|
||||
"match_id": data.match_id,
|
||||
"match_name": f"{data.home_team_name} vs {data.away_team_name}",
|
||||
"match_date_ms": data.match_date_ms,
|
||||
"league_id": data.league_id,
|
||||
"league": data.league_name,
|
||||
"risk_band": self._watchlist_risk_band(watch_score),
|
||||
"watch_score": round(watch_score, 2),
|
||||
"top_pick": top_pick,
|
||||
"top_pick_prob": round(top_prob, 4),
|
||||
"top_pick_scope": "reversal_only",
|
||||
"overall_htft_pick": overall_htft_pick,
|
||||
"overall_htft_pick_prob": round(float(overall_htft_prob), 4),
|
||||
"reversal_probs": {
|
||||
"1/2": round(prob_12, 4),
|
||||
"2/1": round(prob_21, 4),
|
||||
},
|
||||
"odds_snapshot": {
|
||||
"ms_h": round(ms_h, 2) if ms_h > 0 else None,
|
||||
"ms_a": round(ms_a, 2) if ms_a > 0 else None,
|
||||
"ms_gap": round(gap, 3),
|
||||
"favorite_odd": round(favorite_odd, 2) if favorite_odd > 0 else None,
|
||||
},
|
||||
"pattern_signals": {
|
||||
"home_cycle_pressure": round(float(home_metrics.get("cycle_pressure", 0.0)), 3),
|
||||
"away_cycle_pressure": round(float(away_metrics.get("cycle_pressure", 0.0)), 3),
|
||||
"home_matches_since_last_reversal": int(home_metrics.get("matches_since_last_reversal", 99)),
|
||||
"away_matches_since_last_reversal": int(away_metrics.get("matches_since_last_reversal", 99)),
|
||||
"h2h_last_goal_diff": last_diff if pair_ctx else None,
|
||||
"h2h_last_result": pair_ctx.get("result"),
|
||||
},
|
||||
"reason_codes": reason_codes,
|
||||
}
|
||||
)
|
||||
watch_items_all.append(item)
|
||||
|
||||
watch_items_all.sort(
|
||||
key=lambda item: (
|
||||
float(item.get("watch_score", 0.0)),
|
||||
float(item.get("top_pick_prob", 0.0)),
|
||||
),
|
||||
reverse=True,
|
||||
)
|
||||
|
||||
selected = [
|
||||
item for item in watch_items_all if float(item.get("watch_score", 0.0)) >= safe_min_score
|
||||
][:safe_count]
|
||||
preview = watch_items_all[: min(5, len(watch_items_all))]
|
||||
return {
|
||||
"engine": "v28.main",
|
||||
"generated_at": __import__("datetime").datetime.utcnow().isoformat() + "Z",
|
||||
"horizon_hours": safe_horizon,
|
||||
"min_score": round(safe_min_score, 2),
|
||||
"top_leagues_only": bool(top_leagues_only),
|
||||
"scanned_matches": scanned,
|
||||
"candidate_matches": len(candidates),
|
||||
"listed_matches": len(selected),
|
||||
"watchlist": selected,
|
||||
"top_candidates_preview": preview,
|
||||
}
|
||||
|
||||
def _fetch_team_reversal_cycle_metrics(
|
||||
self,
|
||||
cur: RealDictCursor,
|
||||
team_ids: Set[str],
|
||||
now_ms: int,
|
||||
) -> Dict[str, Dict[str, float]]:
|
||||
if not team_ids:
|
||||
return {}
|
||||
|
||||
cur.execute(
|
||||
"""
|
||||
WITH team_matches AS (
|
||||
SELECT
|
||||
m.home_team_id AS team_id,
|
||||
m.mst_utc,
|
||||
CASE
|
||||
WHEN m.ht_score_home > m.ht_score_away THEN 'L'
|
||||
WHEN m.ht_score_home < m.ht_score_away THEN 'T'
|
||||
ELSE 'D'
|
||||
END AS ht_state,
|
||||
CASE
|
||||
WHEN m.score_home > m.score_away THEN 'W'
|
||||
WHEN m.score_home < m.score_away THEN 'L'
|
||||
ELSE 'D'
|
||||
END AS ft_state
|
||||
FROM matches m
|
||||
WHERE m.status = 'FT'
|
||||
AND m.score_home IS NOT NULL
|
||||
AND m.score_away IS NOT NULL
|
||||
AND m.ht_score_home IS NOT NULL
|
||||
AND m.ht_score_away IS NOT NULL
|
||||
AND m.home_team_id = ANY(%s)
|
||||
AND m.mst_utc < %s
|
||||
UNION ALL
|
||||
SELECT
|
||||
m.away_team_id AS team_id,
|
||||
m.mst_utc,
|
||||
CASE
|
||||
WHEN m.ht_score_away > m.ht_score_home THEN 'L'
|
||||
WHEN m.ht_score_away < m.ht_score_home THEN 'T'
|
||||
ELSE 'D'
|
||||
END AS ht_state,
|
||||
CASE
|
||||
WHEN m.score_away > m.score_home THEN 'W'
|
||||
WHEN m.score_away < m.score_home THEN 'L'
|
||||
ELSE 'D'
|
||||
END AS ft_state
|
||||
FROM matches m
|
||||
WHERE m.status = 'FT'
|
||||
AND m.score_home IS NOT NULL
|
||||
AND m.score_away IS NOT NULL
|
||||
AND m.ht_score_home IS NOT NULL
|
||||
AND m.ht_score_away IS NOT NULL
|
||||
AND m.away_team_id = ANY(%s)
|
||||
AND m.mst_utc < %s
|
||||
),
|
||||
ranked AS (
|
||||
SELECT
|
||||
team_id,
|
||||
mst_utc,
|
||||
ht_state,
|
||||
ft_state,
|
||||
ROW_NUMBER() OVER (PARTITION BY team_id ORDER BY mst_utc DESC) AS rn
|
||||
FROM team_matches
|
||||
)
|
||||
SELECT team_id, mst_utc, ht_state, ft_state
|
||||
FROM ranked
|
||||
WHERE rn <= 80
|
||||
ORDER BY team_id ASC, mst_utc DESC
|
||||
""",
|
||||
(list(team_ids), now_ms, list(team_ids), now_ms),
|
||||
)
|
||||
rows = cur.fetchall()
|
||||
|
||||
by_team: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
|
||||
for row in rows:
|
||||
by_team[str(row["team_id"])].append(row)
|
||||
|
||||
out: Dict[str, Dict[str, float]] = {}
|
||||
for team_id in team_ids:
|
||||
team_rows = by_team.get(str(team_id), [])
|
||||
if not team_rows:
|
||||
out[str(team_id)] = {
|
||||
"recent_reversal_rate": 0.0,
|
||||
"matches_since_last_reversal": 99.0,
|
||||
"avg_gap_matches": 12.0,
|
||||
"cycle_pressure": 0.0,
|
||||
}
|
||||
continue
|
||||
|
||||
reversal_indexes: List[int] = []
|
||||
recent_reversal = 0
|
||||
recent_n = min(15, len(team_rows))
|
||||
for idx, row in enumerate(team_rows, start=1):
|
||||
ht_state = str(row.get("ht_state") or "")
|
||||
ft_state = str(row.get("ft_state") or "")
|
||||
is_reversal = (ht_state == "L" and ft_state == "L") or (ht_state == "T" and ft_state == "W")
|
||||
if idx <= recent_n and is_reversal:
|
||||
recent_reversal += 1
|
||||
if is_reversal:
|
||||
reversal_indexes.append(idx)
|
||||
|
||||
recent_rate = (recent_reversal / recent_n) if recent_n > 0 else 0.0
|
||||
since_last = float(reversal_indexes[0]) if reversal_indexes else 99.0
|
||||
|
||||
gaps: List[float] = []
|
||||
if len(reversal_indexes) >= 2:
|
||||
for i in range(1, len(reversal_indexes)):
|
||||
gaps.append(float(reversal_indexes[i] - reversal_indexes[i - 1]))
|
||||
avg_gap = (sum(gaps) / len(gaps)) if gaps else 12.0
|
||||
if avg_gap <= 0:
|
||||
avg_gap = 12.0
|
||||
|
||||
cycle_pressure = 0.0
|
||||
if reversal_indexes:
|
||||
tolerance = max(3.0, avg_gap * 0.7)
|
||||
diff = abs(since_last - avg_gap)
|
||||
cycle_pressure = max(0.0, 1.0 - (diff / tolerance))
|
||||
|
||||
out[str(team_id)] = {
|
||||
"recent_reversal_rate": round(recent_rate, 4),
|
||||
"matches_since_last_reversal": round(since_last, 2),
|
||||
"avg_gap_matches": round(avg_gap, 2),
|
||||
"cycle_pressure": round(cycle_pressure, 4),
|
||||
}
|
||||
return out
|
||||
|
||||
def _fetch_h2h_reversal_context(
|
||||
self,
|
||||
cur: RealDictCursor,
|
||||
pair_keys: Set[Tuple[str, str]],
|
||||
now_ms: int,
|
||||
) -> Dict[Tuple[str, str], Dict[str, Any]]:
|
||||
if not pair_keys:
|
||||
return {}
|
||||
|
||||
team_ids = sorted({team_id for pair in pair_keys for team_id in pair})
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT
|
||||
m.home_team_id,
|
||||
m.away_team_id,
|
||||
m.score_home,
|
||||
m.score_away,
|
||||
m.ht_score_home,
|
||||
m.ht_score_away,
|
||||
m.mst_utc
|
||||
FROM matches m
|
||||
WHERE m.status = 'FT'
|
||||
AND m.score_home IS NOT NULL
|
||||
AND m.score_away IS NOT NULL
|
||||
AND m.home_team_id = ANY(%s)
|
||||
AND m.away_team_id = ANY(%s)
|
||||
AND m.mst_utc < %s
|
||||
ORDER BY m.mst_utc DESC
|
||||
LIMIT 4000
|
||||
""",
|
||||
(team_ids, team_ids, now_ms),
|
||||
)
|
||||
rows = cur.fetchall()
|
||||
|
||||
out: Dict[Tuple[str, str], Dict[str, Any]] = {}
|
||||
for row in rows:
|
||||
home_id = str(row["home_team_id"])
|
||||
away_id = str(row["away_team_id"])
|
||||
h, a = sorted((home_id, away_id))
|
||||
key = (h, a)
|
||||
if key not in pair_keys or key in out:
|
||||
continue
|
||||
|
||||
score_home = int(row["score_home"])
|
||||
score_away = int(row["score_away"])
|
||||
goal_diff = score_home - score_away
|
||||
out[key] = {
|
||||
"goal_diff": goal_diff,
|
||||
"result": f"{score_home}-{score_away}",
|
||||
"match_date_ms": int(row["mst_utc"] or 0),
|
||||
}
|
||||
if len(out) >= len(pair_keys):
|
||||
break
|
||||
|
||||
return out
|
||||
|
||||
@staticmethod
|
||||
def _watchlist_risk_band(score: float) -> str:
|
||||
if score >= 68.0:
|
||||
return "HIGH"
|
||||
if score >= 54.0:
|
||||
return "MEDIUM"
|
||||
return "LOW"
|
||||
@@ -0,0 +1,350 @@
|
||||
"""Upper Brain Mixin — V27 cross-check guards and assessments.
|
||||
|
||||
Auto-extracted mixin module — split from services/single_match_orchestrator.py.
|
||||
All methods here are composed into SingleMatchOrchestrator via inheritance.
|
||||
`self` attributes (self.dsn, self.enrichment, self.v25_predictor, etc.) are
|
||||
initialised in the main __init__.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import math
|
||||
import os
|
||||
import pickle
|
||||
from collections import defaultdict
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple, overload
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
from data.db import get_clean_dsn
|
||||
from schemas.prediction import FullMatchPrediction
|
||||
from schemas.match_data import MatchData
|
||||
from models.v25_ensemble import V25Predictor, get_v25_predictor
|
||||
try:
|
||||
from models.v27_predictor import V27Predictor, compute_divergence, compute_value_edge
|
||||
except ImportError:
|
||||
class V27Predictor: # type: ignore[no-redef]
|
||||
def __init__(self): self.models = {}
|
||||
def load_models(self): return False
|
||||
def predict_all(self, features): return {}
|
||||
def compute_divergence(*args, **kwargs):
|
||||
return {}
|
||||
def compute_value_edge(*args, **kwargs):
|
||||
return {}
|
||||
from features.odds_band_analyzer import OddsBandAnalyzer
|
||||
try:
|
||||
from models.basketball_v25 import (
|
||||
BasketballMatchPrediction,
|
||||
get_basketball_v25_predictor,
|
||||
)
|
||||
except ImportError:
|
||||
BasketballMatchPrediction = Any # type: ignore[misc]
|
||||
def get_basketball_v25_predictor() -> Any:
|
||||
raise ImportError("Basketball predictor is not available")
|
||||
from core.engines.player_predictor import PlayerPrediction, get_player_predictor
|
||||
from services.feature_enrichment import FeatureEnrichmentService
|
||||
from services.betting_brain import BettingBrain
|
||||
from services.v26_shadow_engine import V26ShadowEngine, get_v26_shadow_engine
|
||||
from services.match_commentary import generate_match_commentary
|
||||
from utils.top_leagues import load_top_league_ids
|
||||
from utils.league_reliability import load_league_reliability
|
||||
from config.config_loader import build_threshold_dict, get_threshold_default
|
||||
from models.calibration import get_calibrator
|
||||
|
||||
|
||||
class UpperBrainMixin:
|
||||
def _apply_upper_brain_guards(self, package: Dict[str, Any]) -> Dict[str, Any]:
|
||||
return BettingBrain().judge(package)
|
||||
|
||||
v27_engine = package.get("v27_engine")
|
||||
if not isinstance(v27_engine, dict) or not v27_engine.get("triple_value"):
|
||||
return package
|
||||
|
||||
guarded = dict(package)
|
||||
vetoed_keys = set()
|
||||
guarded_keys = set()
|
||||
|
||||
def mark_guard(item: Dict[str, Any]) -> Dict[str, Any]:
|
||||
if not isinstance(item, dict):
|
||||
return item
|
||||
|
||||
out = dict(item)
|
||||
assessment = self._upper_brain_assessment(out, guarded)
|
||||
if not assessment.get("applies"):
|
||||
return out
|
||||
|
||||
key = f"{out.get('market')}:{out.get('pick')}"
|
||||
guarded_keys.add(key)
|
||||
out["upper_brain"] = assessment
|
||||
|
||||
reason_key = "decision_reasons" if "decision_reasons" in out else "reasons"
|
||||
reasons = list(out.get(reason_key) or [])
|
||||
for reason in assessment.get("reason_codes", []):
|
||||
if reason not in reasons:
|
||||
reasons.append(reason)
|
||||
out[reason_key] = reasons[:6]
|
||||
|
||||
if assessment.get("veto"):
|
||||
vetoed_keys.add(key)
|
||||
out["playable"] = False
|
||||
out["stake_units"] = 0.0
|
||||
out["bet_grade"] = "PASS"
|
||||
out["is_guaranteed"] = False
|
||||
out["pick_reason"] = "upper_brain_veto"
|
||||
if "signal_tier" in out:
|
||||
out["signal_tier"] = "PASS"
|
||||
elif assessment.get("downgrade"):
|
||||
out["is_guaranteed"] = False
|
||||
if out.get("signal_tier") == "CORE":
|
||||
out["signal_tier"] = "LEAN"
|
||||
if out.get("pick_reason") == "high_accuracy_market":
|
||||
out["pick_reason"] = "upper_brain_downgraded"
|
||||
|
||||
return out
|
||||
|
||||
main_pick = mark_guard(guarded.get("main_pick") or {})
|
||||
value_pick = mark_guard(guarded.get("value_pick") or {}) if guarded.get("value_pick") else None
|
||||
supporting = [
|
||||
mark_guard(row)
|
||||
for row in list(guarded.get("supporting_picks") or [])
|
||||
if isinstance(row, dict)
|
||||
]
|
||||
bet_summary = [
|
||||
mark_guard(row)
|
||||
for row in list(guarded.get("bet_summary") or [])
|
||||
if isinstance(row, dict)
|
||||
]
|
||||
|
||||
main_safe = bool(main_pick and main_pick.get("playable") and not main_pick.get("upper_brain", {}).get("veto"))
|
||||
if not main_safe:
|
||||
candidates = [
|
||||
row for row in supporting
|
||||
if row.get("playable")
|
||||
and not row.get("upper_brain", {}).get("veto")
|
||||
and float(row.get("odds", 0.0) or 0.0) >= 1.30
|
||||
]
|
||||
candidates.sort(key=lambda row: float(row.get("play_score", 0.0) or 0.0), reverse=True)
|
||||
if candidates:
|
||||
main_pick = dict(candidates[0])
|
||||
main_pick["is_guaranteed"] = False
|
||||
main_pick["pick_reason"] = "upper_brain_reselected"
|
||||
reasons = list(main_pick.get("decision_reasons") or [])
|
||||
if "upper_brain_reselected_after_veto" not in reasons:
|
||||
reasons.append("upper_brain_reselected_after_veto")
|
||||
main_pick["decision_reasons"] = reasons[:6]
|
||||
elif main_pick:
|
||||
main_pick["is_guaranteed"] = False
|
||||
main_pick["pick_reason"] = "upper_brain_no_safe_pick"
|
||||
|
||||
if main_pick:
|
||||
supporting = [
|
||||
row for row in supporting
|
||||
if not (
|
||||
row.get("market") == main_pick.get("market")
|
||||
and row.get("pick") == main_pick.get("pick")
|
||||
)
|
||||
][:6]
|
||||
|
||||
guarded["main_pick"] = main_pick if main_pick else None
|
||||
guarded["value_pick"] = value_pick
|
||||
guarded["supporting_picks"] = supporting
|
||||
guarded["bet_summary"] = bet_summary
|
||||
|
||||
playable = bool(main_pick and main_pick.get("playable") and not main_pick.get("upper_brain", {}).get("veto"))
|
||||
advice = dict(guarded.get("bet_advice") or {})
|
||||
advice["playable"] = playable
|
||||
advice["suggested_stake_units"] = float(main_pick.get("stake_units", 0.0)) if playable else 0.0
|
||||
if playable:
|
||||
advice["reason"] = "playable_pick_found"
|
||||
elif vetoed_keys:
|
||||
advice["reason"] = "upper_brain_no_safe_pick"
|
||||
else:
|
||||
advice["reason"] = "no_bet_conditions_met"
|
||||
guarded["bet_advice"] = advice
|
||||
|
||||
guarded["upper_brain"] = {
|
||||
"applied": True,
|
||||
"guarded_count": len(guarded_keys),
|
||||
"vetoed_count": len(vetoed_keys),
|
||||
"vetoed": sorted(vetoed_keys)[:8],
|
||||
"rules": {
|
||||
"min_band_sample": 8,
|
||||
"max_v25_v27_divergence": 0.18,
|
||||
"dc_requires_triple_value": True,
|
||||
},
|
||||
}
|
||||
guarded.setdefault("analysis_details", {})
|
||||
guarded["analysis_details"]["upper_brain_guards_applied"] = True
|
||||
guarded["analysis_details"]["upper_brain_vetoed_count"] = len(vetoed_keys)
|
||||
return guarded
|
||||
|
||||
def _upper_brain_assessment(
|
||||
self,
|
||||
item: Dict[str, Any],
|
||||
package: Dict[str, Any],
|
||||
) -> Dict[str, Any]:
|
||||
market = str(item.get("market") or "")
|
||||
pick = str(item.get("pick") or "")
|
||||
if not market or not pick:
|
||||
return {"applies": False}
|
||||
|
||||
v27_engine = package.get("v27_engine") or {}
|
||||
triple_value = v27_engine.get("triple_value") or {}
|
||||
model_prob = self._upper_brain_market_probability(item, package)
|
||||
v27_prob = self._upper_brain_v27_probability(market, pick, v27_engine)
|
||||
triple_key = self._upper_brain_triple_key(market, pick)
|
||||
triple = triple_value.get(triple_key) if triple_key else None
|
||||
|
||||
veto = False
|
||||
downgrade = False
|
||||
reasons: List[str] = []
|
||||
divergence = None
|
||||
|
||||
if model_prob is not None and v27_prob is not None:
|
||||
divergence = abs(float(model_prob) - float(v27_prob))
|
||||
if divergence >= 0.18:
|
||||
veto = True
|
||||
reasons.append("upper_brain_v25_v27_divergence")
|
||||
elif divergence >= 0.12:
|
||||
downgrade = True
|
||||
reasons.append("upper_brain_v25_v27_warning")
|
||||
|
||||
if isinstance(triple, dict):
|
||||
band_sample = int(float(triple.get("band_sample", 0) or 0))
|
||||
is_value = bool(triple.get("is_value"))
|
||||
if market == "DC":
|
||||
if band_sample < 8:
|
||||
veto = True
|
||||
reasons.append("upper_brain_band_sample_too_low")
|
||||
elif not is_value:
|
||||
veto = True
|
||||
reasons.append("upper_brain_triple_value_rejected")
|
||||
elif market in {"MS", "OU25"} and band_sample > 0 and band_sample < 8:
|
||||
downgrade = True
|
||||
reasons.append("upper_brain_band_sample_thin")
|
||||
elif market in {"OU15", "HT_OU05"} and band_sample < 8:
|
||||
downgrade = True
|
||||
reasons.append("upper_brain_band_sample_thin")
|
||||
|
||||
consensus = str(v27_engine.get("consensus") or "").upper()
|
||||
if consensus == "DISAGREE" and market in {"MS", "DC"} and not veto:
|
||||
downgrade = True
|
||||
reasons.append("upper_brain_consensus_disagree")
|
||||
|
||||
applies = bool(reasons or triple is not None or v27_prob is not None)
|
||||
return {
|
||||
"applies": applies,
|
||||
"veto": veto,
|
||||
"downgrade": downgrade,
|
||||
"reason_codes": reasons,
|
||||
"model_prob": round(float(model_prob), 4) if model_prob is not None else None,
|
||||
"v27_prob": round(float(v27_prob), 4) if v27_prob is not None else None,
|
||||
"divergence": round(float(divergence), 4) if divergence is not None else None,
|
||||
"triple_key": triple_key,
|
||||
"triple_value": triple,
|
||||
}
|
||||
|
||||
def _upper_brain_market_probability(
|
||||
self,
|
||||
item: Dict[str, Any],
|
||||
package: Dict[str, Any],
|
||||
) -> Optional[float]:
|
||||
raw_prob = item.get("probability")
|
||||
if raw_prob is not None:
|
||||
try:
|
||||
return float(raw_prob)
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
|
||||
market = str(item.get("market") or "")
|
||||
pick = str(item.get("pick") or "")
|
||||
board = package.get("market_board") or {}
|
||||
payload = board.get(market) if isinstance(board, dict) else None
|
||||
probs = payload.get("probs") if isinstance(payload, dict) else None
|
||||
if not isinstance(probs, dict):
|
||||
return None
|
||||
|
||||
prob_key = self._upper_brain_prob_key(market, pick)
|
||||
if prob_key is None:
|
||||
return None
|
||||
return self._safe_float(probs.get(prob_key))
|
||||
|
||||
def _upper_brain_v27_probability(
|
||||
self,
|
||||
market: str,
|
||||
pick: str,
|
||||
v27_engine: Dict[str, Any],
|
||||
) -> Optional[float]:
|
||||
predictions = v27_engine.get("predictions") or {}
|
||||
ms = predictions.get("ms") or {}
|
||||
ou25 = predictions.get("ou25") or {}
|
||||
|
||||
if market == "MS":
|
||||
ms_key = {"1": "home", "X": "draw", "2": "away"}.get(pick or "")
|
||||
return self._safe_float(ms.get(ms_key), 0.0) if ms_key else 0.0
|
||||
if market == "DC":
|
||||
if pick == "1X":
|
||||
return self._safe_float(ms.get("home"), 0.0) + self._safe_float(ms.get("draw"), 0.0)
|
||||
if pick == "X2":
|
||||
return self._safe_float(ms.get("draw"), 0.0) + self._safe_float(ms.get("away"), 0.0)
|
||||
if pick == "12":
|
||||
return self._safe_float(ms.get("home"), 0.0) + self._safe_float(ms.get("away"), 0.0)
|
||||
if market == "OU25":
|
||||
prob_key = self._upper_brain_prob_key(market, pick)
|
||||
return self._safe_float(ou25.get(prob_key), 0.0) if prob_key else 0.0
|
||||
return 0.0
|
||||
|
||||
@staticmethod
|
||||
def _upper_brain_prob_key(market: str, pick: str) -> Optional[str]:
|
||||
pick_norm = str(pick or "").strip().casefold()
|
||||
if market in {"MS", "HT", "HCAP"}:
|
||||
return pick if pick in {"1", "X", "2"} else None
|
||||
if market == "DC":
|
||||
return pick.upper() if pick.upper() in {"1X", "X2", "12"} else None
|
||||
if market in {"OU15", "OU25", "OU35", "HT_OU05", "HT_OU15", "CARDS"}:
|
||||
if "over" in pick_norm or "st" in pick_norm:
|
||||
return "over"
|
||||
if "under" in pick_norm or "alt" in pick_norm:
|
||||
return "under"
|
||||
if market == "BTTS":
|
||||
if "yes" in pick_norm or "var" in pick_norm:
|
||||
return "yes"
|
||||
if "no" in pick_norm or "yok" in pick_norm:
|
||||
return "no"
|
||||
if market == "OE":
|
||||
if "odd" in pick_norm or "tek" in pick_norm:
|
||||
return "odd"
|
||||
if "even" in pick_norm or "ift" in pick_norm:
|
||||
return "even"
|
||||
if market == "HTFT" and "/" in pick:
|
||||
return pick
|
||||
return None
|
||||
|
||||
def _upper_brain_triple_key(self, market: str, pick: str) -> Optional[str]:
|
||||
prob_key = self._upper_brain_prob_key(market, pick)
|
||||
if market == "MS":
|
||||
return {"1": "home", "2": "away"}.get(pick)
|
||||
if market == "DC":
|
||||
return f"dc_{pick.lower()}" if pick.upper() in {"1X", "X2", "12"} else None
|
||||
if market in {"OU15", "OU25", "OU35"} and prob_key == "over":
|
||||
return f"{market.lower()}_over"
|
||||
if market == "BTTS" and prob_key == "yes":
|
||||
return "btts_yes"
|
||||
if market == "HT":
|
||||
return {"1": "ht_home", "2": "ht_away"}.get(pick)
|
||||
if market in {"HT_OU05", "HT_OU15"} and prob_key == "over":
|
||||
return f"{market.lower()}_over"
|
||||
if market == "OE" and prob_key == "odd":
|
||||
return "oe_odd"
|
||||
if market == "CARDS" and prob_key == "over":
|
||||
return "cards_over"
|
||||
if market == "HTFT" and "/" in pick:
|
||||
return f"htft_{pick.replace('/', '').lower()}"
|
||||
return None
|
||||
@@ -0,0 +1,174 @@
|
||||
"""Utility Mixin — generic helpers (safe_float, label normalisation, JSON parsing).
|
||||
|
||||
Auto-extracted mixin module — split from services/single_match_orchestrator.py.
|
||||
All methods here are composed into SingleMatchOrchestrator via inheritance.
|
||||
`self` attributes (self.dsn, self.enrichment, self.v25_predictor, etc.) are
|
||||
initialised in the main __init__.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import math
|
||||
import os
|
||||
import pickle
|
||||
from collections import defaultdict
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple, overload
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
from data.db import get_clean_dsn
|
||||
from schemas.prediction import FullMatchPrediction
|
||||
from schemas.match_data import MatchData
|
||||
from models.v25_ensemble import V25Predictor, get_v25_predictor
|
||||
try:
|
||||
from models.v27_predictor import V27Predictor, compute_divergence, compute_value_edge
|
||||
except ImportError:
|
||||
class V27Predictor: # type: ignore[no-redef]
|
||||
def __init__(self): self.models = {}
|
||||
def load_models(self): return False
|
||||
def predict_all(self, features): return {}
|
||||
def compute_divergence(*args, **kwargs):
|
||||
return {}
|
||||
def compute_value_edge(*args, **kwargs):
|
||||
return {}
|
||||
from features.odds_band_analyzer import OddsBandAnalyzer
|
||||
try:
|
||||
from models.basketball_v25 import (
|
||||
BasketballMatchPrediction,
|
||||
get_basketball_v25_predictor,
|
||||
)
|
||||
except ImportError:
|
||||
BasketballMatchPrediction = Any # type: ignore[misc]
|
||||
def get_basketball_v25_predictor() -> Any:
|
||||
raise ImportError("Basketball predictor is not available")
|
||||
from core.engines.player_predictor import PlayerPrediction, get_player_predictor
|
||||
from services.feature_enrichment import FeatureEnrichmentService
|
||||
from services.betting_brain import BettingBrain
|
||||
from services.v26_shadow_engine import V26ShadowEngine, get_v26_shadow_engine
|
||||
from services.match_commentary import generate_match_commentary
|
||||
from utils.top_leagues import load_top_league_ids
|
||||
from utils.league_reliability import load_league_reliability
|
||||
from config.config_loader import build_threshold_dict, get_threshold_default
|
||||
from models.calibration import get_calibrator
|
||||
|
||||
|
||||
class UtilsMixin:
|
||||
@staticmethod
|
||||
@overload
|
||||
def _safe_float(value: Any, default: float) -> float: ...
|
||||
|
||||
@staticmethod
|
||||
@overload
|
||||
def _safe_float(value: Any, default: None = ...) -> Optional[float]: ...
|
||||
|
||||
@staticmethod
|
||||
def _safe_float(value: Any, default: Optional[float] = None) -> Optional[float]:
|
||||
try:
|
||||
return float(value)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
@staticmethod
|
||||
def _safe_float(value: Any, default: float = 0.0) -> float:
|
||||
try:
|
||||
return float(value)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
@staticmethod
|
||||
def _calibrator_key(market: str, pick: str) -> Optional[str]:
|
||||
"""Map (market, pick) → trained-calibrator key in models/calibration."""
|
||||
m = (market or "").upper()
|
||||
p = (pick or "").strip().casefold()
|
||||
if m == "MS":
|
||||
if p == "1":
|
||||
return "ms_home"
|
||||
if p == "x" or p == "0":
|
||||
return "ms_draw"
|
||||
if p == "2":
|
||||
return "ms_away"
|
||||
return None
|
||||
if m == "DC":
|
||||
return "dc"
|
||||
if m == "OU15" and ("over" in p or "üst" in p or "ust" in p):
|
||||
return "ou15"
|
||||
if m == "OU25" and ("over" in p or "üst" in p or "ust" in p):
|
||||
return "ou25"
|
||||
if m == "OU35" and ("over" in p or "üst" in p or "ust" in p):
|
||||
return "ou35"
|
||||
if m == "BTTS" and ("yes" in p or "var" in p):
|
||||
return "btts"
|
||||
if m == "HT":
|
||||
if p == "1":
|
||||
return "ht_home"
|
||||
if p == "x" or p == "0":
|
||||
return "ht_draw"
|
||||
if p == "2":
|
||||
return "ht_away"
|
||||
return None
|
||||
if m == "HTFT":
|
||||
return "ht_ft"
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _confidence_label(score: float) -> Tuple[str, str]:
|
||||
"""Turkish UX label + interpretation for a 0-100 confidence score."""
|
||||
if score >= 75:
|
||||
return "YUKSEK", "Bu sinyal güçlü ve güvenilir"
|
||||
if score >= 60:
|
||||
return "ORTA", "Sinyal makul, çelişen veri yok"
|
||||
if score >= 45:
|
||||
return "DUSUK", "Sinyal zayıf, dikkatli yorumla"
|
||||
return "COK_DUSUK", "Veri yetersiz veya çelişkili — bu motoru bu maç için ihmal et"
|
||||
|
||||
@staticmethod
|
||||
def _to_float(value: Any, default: float) -> float:
|
||||
try:
|
||||
if value is None:
|
||||
return default
|
||||
return float(value)
|
||||
except Exception:
|
||||
return default
|
||||
|
||||
@staticmethod
|
||||
def _normalize_text(value: Any) -> str:
|
||||
text = str(value or "").casefold().replace("i̇", "i")
|
||||
return " ".join(text.split())
|
||||
|
||||
def _selection_value(
|
||||
self,
|
||||
selections: Dict[str, Any],
|
||||
aliases: Tuple[str, ...],
|
||||
default: float,
|
||||
) -> float:
|
||||
if not isinstance(selections, dict):
|
||||
return default
|
||||
|
||||
normalized_aliases = {self._normalize_text(alias) for alias in aliases}
|
||||
for key, value in selections.items():
|
||||
key_norm = self._normalize_text(key)
|
||||
if key_norm in normalized_aliases:
|
||||
return self._to_float(value, default)
|
||||
|
||||
# Secondary match for entries like "2,5 Üst" or "Toplam Alt"
|
||||
for key, value in selections.items():
|
||||
key_norm = self._normalize_text(key)
|
||||
if any(alias in key_norm for alias in normalized_aliases):
|
||||
return self._to_float(value, default)
|
||||
|
||||
return default
|
||||
|
||||
def _parse_json_dict(self, payload: Any) -> Optional[Dict[str, Any]]:
|
||||
if isinstance(payload, str):
|
||||
try:
|
||||
payload = json.loads(payload)
|
||||
except Exception:
|
||||
return None
|
||||
return payload if isinstance(payload, dict) else None
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,75 +0,0 @@
|
||||
import sys
|
||||
import unittest
|
||||
from decimal import Decimal
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
AI_ENGINE_ROOT = Path(__file__).resolve().parents[1]
|
||||
if str(AI_ENGINE_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(AI_ENGINE_ROOT))
|
||||
|
||||
from core.engines.odds_predictor import OddsPredictorEngine
|
||||
from features.sidelined_analyzer import SidelinedAnalyzer
|
||||
|
||||
|
||||
class EngineNullSafetyTests(unittest.TestCase):
|
||||
def test_odds_predictor_accepts_decimal_inputs_without_crashing(self):
|
||||
engine = OddsPredictorEngine()
|
||||
|
||||
prediction = engine.predict(
|
||||
odds_data={
|
||||
"ms_h": Decimal("2.10"),
|
||||
"ms_d": Decimal("3.25"),
|
||||
"ms_a": Decimal("3.60"),
|
||||
"ou25_o": Decimal("1.90"),
|
||||
},
|
||||
)
|
||||
|
||||
self.assertGreater(prediction.market_home_prob, 0.0)
|
||||
self.assertGreater(prediction.market_draw_prob, 0.0)
|
||||
self.assertGreater(prediction.market_away_prob, 0.0)
|
||||
|
||||
def test_sidelined_analyzer_handles_non_numeric_fields(self):
|
||||
analyzer = SidelinedAnalyzer.__new__(SidelinedAnalyzer)
|
||||
analyzer.position_weights = {"K": 0.35, "D": 0.20, "O": 0.25, "F": 0.30}
|
||||
analyzer.max_rating = 10
|
||||
analyzer.adaptation_threshold = 10
|
||||
analyzer.adaptation_discount = 0.5
|
||||
analyzer.goalkeeper_penalty = 0.15
|
||||
analyzer.confidence_boost = 10
|
||||
analyzer.max_impact = 0.85
|
||||
analyzer.key_player_threshold = 3
|
||||
analyzer.recent_matches_lookback = 15
|
||||
analyzer._fetch_player_stats = MagicMock(return_value={})
|
||||
|
||||
result = analyzer.analyze(
|
||||
{
|
||||
"totalSidelined": 2,
|
||||
"players": [
|
||||
{
|
||||
"playerId": "p1",
|
||||
"playerName": "Player One",
|
||||
"positionShort": "O",
|
||||
"matchesMissed": "N/A",
|
||||
"average": "?",
|
||||
"type": "injury",
|
||||
},
|
||||
{
|
||||
"playerId": "p2",
|
||||
"playerName": "Player Two",
|
||||
"positionShort": "K",
|
||||
"matchesMissed": "12",
|
||||
"average": "6.7",
|
||||
"type": "suspension",
|
||||
},
|
||||
],
|
||||
},
|
||||
)
|
||||
|
||||
self.assertEqual(result.total_sidelined, 2)
|
||||
self.assertGreaterEqual(result.impact_score, 0.0)
|
||||
self.assertTrue(len(result.player_details) >= 2)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -8,9 +8,10 @@ AI_ENGINE_ROOT = Path(__file__).resolve().parents[1]
|
||||
if str(AI_ENGINE_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(AI_ENGINE_ROOT))
|
||||
|
||||
from models.v20_ensemble import FullMatchPrediction
|
||||
from schemas.prediction import FullMatchPrediction
|
||||
from schemas.match_data import MatchData
|
||||
from models.basketball_v25 import BasketballMatchPrediction
|
||||
from services.single_match_orchestrator import MatchData, SingleMatchOrchestrator
|
||||
from services.single_match_orchestrator import SingleMatchOrchestrator
|
||||
|
||||
|
||||
class _CursorContext:
|
||||
|
||||
Reference in New Issue
Block a user