main
Deploy Iddaai Backend / build-and-deploy (push) Successful in 37s

This commit is contained in:
2026-05-17 02:17:22 +03:00
parent 17ace9bd12
commit 94c7a4481a
53 changed files with 29602 additions and 7832 deletions
+11
View File
@@ -1,3 +1,14 @@
model_ensemble:
xgb_weight: 0.50
lgb_weight: 0.50
temperature: 1.5
default_ms_odds:
home: 2.65
draw: 3.20
away: 2.65
elo_staleness_days: 14
odds_staleness_hours: 48
engine_weights:
team: 0.30
player: 0.25
+1 -7
View File
@@ -1,16 +1,10 @@
# ai-engine/core/engines/__init__.py
"""
V20 Ensemble Prediction Engines
Prediction Engines
"""
from .team_predictor import TeamPredictorEngine, get_team_predictor
from .player_predictor import PlayerPredictorEngine, get_player_predictor
from .odds_predictor import OddsPredictorEngine, get_odds_predictor
from .referee_predictor import RefereePredictorEngine, get_referee_predictor
__all__ = [
"TeamPredictorEngine", "get_team_predictor",
"PlayerPredictorEngine", "get_player_predictor",
"OddsPredictorEngine", "get_odds_predictor",
"RefereePredictorEngine", "get_referee_predictor"
]
-237
View File
@@ -1,237 +0,0 @@
"""
Odds Predictor Engine - V20 Ensemble Component
Uses market odds and Poisson mathematics for predictions.
Weight: 30% in ensemble
"""
import os
import sys
from typing import Dict, Optional
from dataclasses import dataclass
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from features.poisson_engine import get_poisson_engine
from features.value_calculator import get_value_calculator
@dataclass
class OddsPrediction:
"""Odds engine prediction output."""
# Market-implied probabilities
market_home_prob: float = 0.33
market_draw_prob: float = 0.33
market_away_prob: float = 0.33
# Poisson xG
poisson_home_xg: float = 1.3
poisson_away_xg: float = 1.1
# Over/Under probabilities
over_15_prob: float = 0.75
over_25_prob: float = 0.55
over_35_prob: float = 0.30
# BTTS
btts_yes_prob: float = 0.50
# Most likely scores
most_likely_score: str = "1-1"
second_likely_score: str = "1-0"
third_likely_score: str = "2-1"
# Value bet opportunities
value_bets: Optional[list] = None
confidence: float = 0.0
def __post_init__(self):
if self.value_bets is None:
self.value_bets = []
def to_dict(self) -> dict:
return {
"market_home_prob": round(self.market_home_prob * 100, 1),
"market_draw_prob": round(self.market_draw_prob * 100, 1),
"market_away_prob": round(self.market_away_prob * 100, 1),
"poisson_home_xg": round(self.poisson_home_xg, 2),
"poisson_away_xg": round(self.poisson_away_xg, 2),
"over_15_prob": round(self.over_15_prob * 100, 1),
"over_25_prob": round(self.over_25_prob * 100, 1),
"over_35_prob": round(self.over_35_prob * 100, 1),
"btts_yes_prob": round(self.btts_yes_prob * 100, 1),
"most_likely_score": self.most_likely_score,
"second_likely_score": self.second_likely_score,
"third_likely_score": self.third_likely_score,
"value_bets": self.value_bets,
"confidence": round(self.confidence, 1)
}
class OddsPredictorEngine:
"""
Odds-based prediction engine.
Uses:
- Market odds to extract implied probabilities
- Poisson distribution for mathematical xG
- Value calculator for EV+ opportunities
"""
def __init__(self):
self.poisson_engine = get_poisson_engine()
try:
self.value_calc = get_value_calculator()
except Exception:
self.value_calc = None # type: ignore[assignment]
self.default_ms_h = 2.65
self.default_ms_d = 3.20
self.default_ms_a = 2.65
print("✅ OddsPredictorEngine initialized")
def _odds_to_prob(self, odds: float) -> float:
"""Convert decimal odds to probability."""
try:
odds = float(odds)
except (TypeError, ValueError):
return 0.0
if odds <= 1.0:
return 0.0
return 1.0 / odds
def predict(self,
odds_data: Dict[str, float],
home_goals_avg: float = 1.5,
home_conceded_avg: float = 1.2,
away_goals_avg: float = 1.2,
away_conceded_avg: float = 1.4) -> OddsPrediction:
"""
Generate odds-based prediction.
Args:
odds_data: Dict with keys like 'ms_h', 'ms_d', 'ms_a', 'ou25_o', 'btts_y'
home_goals_avg: Home team's average goals scored
home_conceded_avg: Home team's average goals conceded
away_goals_avg: Away team's average goals scored
away_conceded_avg: Away team's average goals conceded
Returns:
OddsPrediction with market and Poisson analysis
"""
# 1. Extract market probabilities from odds
ms_h = odds_data.get("ms_h", self.default_ms_h)
ms_d = odds_data.get("ms_d", self.default_ms_d)
ms_a = odds_data.get("ms_a", self.default_ms_a)
# Remove vig to get fair probabilities
raw_probs = [
self._odds_to_prob(ms_h),
self._odds_to_prob(ms_d),
self._odds_to_prob(ms_a)
]
total = sum(raw_probs) or 1
market_home = raw_probs[0] / total
market_draw = raw_probs[1] / total
market_away = raw_probs[2] / total
# 2. Poisson prediction
poisson_pred = self.poisson_engine.predict(
home_goals_avg, home_conceded_avg,
away_goals_avg, away_conceded_avg
)
# 3. Get most likely scores
likely_scores = poisson_pred.most_likely_scores[:3] if poisson_pred.most_likely_scores else []
score_1 = likely_scores[0]["score"] if len(likely_scores) > 0 else "1-1"
score_2 = likely_scores[1]["score"] if len(likely_scores) > 1 else "1-0"
score_3 = likely_scores[2]["score"] if len(likely_scores) > 2 else "2-1"
# 4. Value bet detection
value_bets = []
# Check if our Poisson model disagrees with market significantly
if abs(poisson_pred.home_win_prob - market_home) > 0.10:
if poisson_pred.home_win_prob > market_home:
value_bets.append({
"market": "MS 1",
"edge": round((poisson_pred.home_win_prob - market_home) * 100, 1),
"confidence": "medium"
})
else:
value_bets.append({
"market": "MS 2",
"edge": round((poisson_pred.away_win_prob - market_away) * 100, 1),
"confidence": "medium"
})
# O/U value check
ou25_o = odds_data.get("ou25_o", 1.9)
market_over25 = self._odds_to_prob(ou25_o)
if abs(poisson_pred.over_25_prob - market_over25) > 0.08:
pick = "2.5 Üst" if poisson_pred.over_25_prob > market_over25 else "2.5 Alt"
edge = abs(poisson_pred.over_25_prob - market_over25) * 100
value_bets.append({
"market": pick,
"edge": round(edge, 1),
"confidence": "high" if edge > 10 else "medium"
})
# Calculate confidence
# Higher when market and Poisson agree
agreement = 1.0 - abs(poisson_pred.home_win_prob - market_home)
confidence = 50.0 + (agreement * 40) + (len(value_bets) * 5)
return OddsPrediction(
market_home_prob=market_home,
market_draw_prob=market_draw,
market_away_prob=market_away,
poisson_home_xg=poisson_pred.home_xg,
poisson_away_xg=poisson_pred.away_xg,
over_15_prob=poisson_pred.over_15_prob,
over_25_prob=poisson_pred.over_25_prob,
over_35_prob=poisson_pred.over_35_prob,
btts_yes_prob=poisson_pred.btts_yes_prob,
most_likely_score=score_1,
second_likely_score=score_2,
third_likely_score=score_3,
value_bets=value_bets,
confidence=min(99.9, confidence)
)
# Singleton
_engine: Optional[OddsPredictorEngine] = None
def get_odds_predictor() -> OddsPredictorEngine:
global _engine
if _engine is None:
_engine = OddsPredictorEngine()
return _engine
if __name__ == "__main__":
engine = get_odds_predictor()
print("\n🧪 Odds Predictor Engine Test")
print("=" * 50)
pred = engine.predict(
odds_data={
"ms_h": 1.85,
"ms_d": 3.40,
"ms_a": 4.20,
"ou25_o": 1.90
},
home_goals_avg=1.8,
home_conceded_avg=1.0,
away_goals_avg=1.2,
away_conceded_avg=1.5
)
print(f"\n📊 Prediction:")
for k, v in pred.to_dict().items():
print(f" {k}: {v}")
+161 -53
View File
@@ -24,32 +24,29 @@ class PlayerPrediction:
extract_training_data.py so that inference values match the
distribution the model was trained on (~3-36 range).
"""
home_squad_quality: float = 12.0 # training-scale composite (~3-36)
home_squad_quality: float = 12.0
away_squad_quality: float = 12.0
squad_diff: float = 0.0 # home - away (training scale)
squad_diff: float = 0.0
home_key_players: int = 0
away_key_players: int = 0
home_missing_impact: float = 0.0 # 0-1, how much weaker due to missing players
home_missing_impact: float = 0.0
away_missing_impact: float = 0.0
home_goals_form: int = 0 # Goals in last 5 matches
home_goals_form: int = 0
away_goals_form: int = 0
home_lineup_goals_per90: float = 0.0
away_lineup_goals_per90: float = 0.0
home_lineup_assists_per90: float = 0.0
away_lineup_assists_per90: float = 0.0
home_squad_continuity: float = 0.5
away_squad_continuity: float = 0.5
home_top_scorer_form: int = 0
away_top_scorer_form: int = 0
home_avg_player_exp: float = 0.0
away_avg_player_exp: float = 0.0
home_goals_diversity: float = 0.0
away_goals_diversity: float = 0.0
lineup_available: bool = False
confidence: float = 0.0
def to_dict(self) -> dict:
return {
"home_squad_quality": round(self.home_squad_quality, 1),
"away_squad_quality": round(self.away_squad_quality, 1),
"squad_diff": round(self.squad_diff, 1),
"home_key_players": self.home_key_players,
"away_key_players": self.away_key_players,
"home_missing_impact": round(self.home_missing_impact, 2),
"away_missing_impact": round(self.away_missing_impact, 2),
"home_goals_form": self.home_goals_form,
"away_goals_form": self.away_goals_form,
"lineup_available": self.lineup_available,
"confidence": round(self.confidence, 1)
}
class PlayerPredictorEngine:
@@ -90,8 +87,9 @@ class PlayerPredictorEngine:
"""
# Get squad features
home_analysis = None
away_analysis = None
if home_lineup and away_lineup:
# Use provided lineups (for live matches)
home_analysis = self.squad_engine.analyze_squad_from_list(
home_lineup, home_team_id
)
@@ -99,7 +97,6 @@ class PlayerPredictorEngine:
away_lineup, away_team_id
)
lineup_available = True
# Build features dict from analysis objects
features = {
"home_starting_11": home_analysis.starting_count or 11,
"home_goals_last_5": home_analysis.total_goals_last_5,
@@ -113,7 +110,6 @@ class PlayerPredictorEngine:
"away_forwards": away_analysis.forward_count or 2,
}
elif match_id:
# Try to get from database
try:
features = self.squad_engine.get_features(
match_id, home_team_id, away_team_id
@@ -132,58 +128,42 @@ class PlayerPredictorEngine:
home_team_id, away_team_id
)
lineup_available = False
# Extract features
home_goals = int(features.get("home_goals_last_5", 0))
away_goals = int(features.get("away_goals_last_5", 0))
home_key = int(features.get("home_key_players", 0))
away_key = int(features.get("away_key_players", 0))
home_assists = features.get("home_assists_last_5", 0)
away_assists = features.get("away_assists_last_5", 0)
home_starting = features.get("home_starting_11", 11)
away_starting = features.get("away_starting_11", 11)
home_fwd = features.get("home_forwards", 2)
away_fwd = features.get("away_forwards", 2)
# Calculate squad quality — MUST match extract_training_data.py formula
# Formula: starting_count * 0.3 + goals * 2.0 + assists * 1.0
# + key_players * 3.0 + fwd_count * 1.5
# Typical range: ~3 36 (model trained on this distribution)
home_quality = (
home_starting * 0.3 +
home_goals * 2.0 +
home_assists * 1.0 +
home_key * 3.0 +
home_fwd * 1.5
)
away_quality = (
away_starting * 0.3 +
away_goals * 2.0 +
away_assists * 1.0 +
away_key * 3.0 +
away_fwd * 1.5
)
# Squad difference
# Squad quality — matches V25 extract_training_data.py:579
home_quality = home_starting * 0.3 + home_key * 3.0 + home_fwd * 1.5
away_quality = away_starting * 0.3 + away_key * 3.0 + away_fwd * 1.5
squad_diff = home_quality - away_quality
# Missing player impact
# Priority: sidelined data (position-weighted) > lineup count (basic)
if sidelined_data:
home_impact, away_impact = self.sidelined_analyzer.analyze_match(sidelined_data)
home_missing = min(1.0, max(0.0, home_impact.impact_score))
away_missing = min(1.0, max(0.0, away_impact.impact_score))
sidelined_available = True
else:
# Fallback: basic lineup count method
expected_xi = 11
actual_home_xi = features.get("home_starting_11", 11)
actual_away_xi = features.get("away_starting_11", 11)
home_missing = (expected_xi - actual_home_xi) / expected_xi if actual_home_xi < expected_xi else 0
away_missing = (expected_xi - actual_away_xi) / expected_xi if actual_away_xi < expected_xi else 0
sidelined_available = False
# Confidence: more data sources = higher confidence
# Player-level features (matches extract_training_data.py:594-650)
player_feats = self._compute_player_level_features(
home_lineup or [], away_lineup or [],
home_team_id, away_team_id,
home_analysis, away_analysis,
)
confidence = 70.0 if lineup_available else 35.0
if home_goals + away_goals > 10:
confidence += 15
@@ -191,7 +171,7 @@ class PlayerPredictorEngine:
confidence += self.sidelined_analyzer.config.get("sidelined.confidence_boost", 10)
if not lineup_available:
confidence -= 5.0
return PlayerPrediction(
home_squad_quality=home_quality,
away_squad_quality=away_quality,
@@ -202,9 +182,137 @@ class PlayerPredictorEngine:
away_missing_impact=away_missing,
home_goals_form=home_goals,
away_goals_form=away_goals,
home_lineup_goals_per90=player_feats['home_lineup_goals_per90'],
away_lineup_goals_per90=player_feats['away_lineup_goals_per90'],
home_lineup_assists_per90=player_feats['home_lineup_assists_per90'],
away_lineup_assists_per90=player_feats['away_lineup_assists_per90'],
home_squad_continuity=player_feats['home_squad_continuity'],
away_squad_continuity=player_feats['away_squad_continuity'],
home_top_scorer_form=player_feats['home_top_scorer_form'],
away_top_scorer_form=player_feats['away_top_scorer_form'],
home_avg_player_exp=player_feats['home_avg_player_exp'],
away_avg_player_exp=player_feats['away_avg_player_exp'],
home_goals_diversity=player_feats['home_goals_diversity'],
away_goals_diversity=player_feats['away_goals_diversity'],
lineup_available=lineup_available,
confidence=max(5.0, confidence)
)
def _compute_player_level_features(
self,
home_lineup: List[str],
away_lineup: List[str],
home_team_id: str,
away_team_id: str,
home_analysis,
away_analysis,
) -> Dict[str, float]:
defaults = {
'home_lineup_goals_per90': 0.0, 'away_lineup_goals_per90': 0.0,
'home_lineup_assists_per90': 0.0, 'away_lineup_assists_per90': 0.0,
'home_squad_continuity': 0.5, 'away_squad_continuity': 0.5,
'home_top_scorer_form': 0, 'away_top_scorer_form': 0,
'home_avg_player_exp': 0.0, 'away_avg_player_exp': 0.0,
'home_goals_diversity': 0.0, 'away_goals_diversity': 0.0,
}
conn = self.squad_engine.get_conn()
if conn is None:
return defaults
try:
from psycopg2.extras import RealDictCursor
result = {}
for prefix, lineup, team_id in [
('home', home_lineup, home_team_id),
('away', away_lineup, away_team_id),
]:
if not lineup:
for k in ('lineup_goals_per90', 'lineup_assists_per90',
'squad_continuity', 'top_scorer_form',
'avg_player_exp', 'goals_diversity'):
result[f'{prefix}_{k}'] = defaults[f'{prefix}_{k}']
continue
g90, a90, total_exp = 0.0, 0.0, 0
best_scorer_total, best_scorer_id = 0, None
scorers_in_lineup = 0
with conn.cursor(cursor_factory=RealDictCursor) as cur:
for pid in lineup:
cur.execute("""
SELECT
COUNT(*) as starts,
COALESCE(SUM(CASE WHEN e.event_type = 'goal'
AND (e.event_subtype IS NULL OR e.event_subtype NOT ILIKE '%%penaltı kaçırma%%')
THEN 1 ELSE 0 END), 0) as goals,
COALESCE((SELECT COUNT(*) FROM match_player_events
WHERE assist_player_id = %s), 0) as assists
FROM match_player_participation mpp
LEFT JOIN match_player_events e
ON e.match_id = mpp.match_id AND e.player_id = mpp.player_id
WHERE mpp.player_id = %s AND mpp.is_starting = true
""", (pid, pid))
row = cur.fetchone()
if not row or not row['starts']:
continue
starts = row['starts']
goals = row['goals'] or 0
assists = row['assists'] or 0
g90 += goals / starts
a90 += assists / starts
total_exp += starts
if goals > 0:
scorers_in_lineup += 1
if goals > best_scorer_total:
best_scorer_total = goals
best_scorer_id = pid
n_st = len(lineup) or 1
# Top scorer recent form (goals in last 5 starts)
top_scorer_form = 0
if best_scorer_id:
cur.execute("""
SELECT COUNT(*) as goals
FROM match_player_events mpe
WHERE mpe.player_id = %s AND mpe.event_type = 'goal'
AND mpe.match_id IN (
SELECT match_id FROM match_player_participation
WHERE player_id = %s AND is_starting = true
ORDER BY match_id DESC LIMIT 5
)
""", (best_scorer_id, best_scorer_id))
tsf_row = cur.fetchone()
if tsf_row:
top_scorer_form = tsf_row['goals'] or 0
# Squad continuity (overlap with previous match lineup)
squad_continuity = 0.5
cur.execute("""
SELECT mpp.player_id
FROM match_player_participation mpp
JOIN matches m ON mpp.match_id = m.id
WHERE mpp.team_id = %s AND mpp.is_starting = true
AND m.status = 'FT'
ORDER BY m.mst_utc DESC
LIMIT 11
""", (team_id,))
prev_starters = {r['player_id'] for r in cur.fetchall()}
if prev_starters:
overlap = len(set(lineup) & prev_starters)
squad_continuity = overlap / n_st
result[f'{prefix}_lineup_goals_per90'] = round(g90, 3)
result[f'{prefix}_lineup_assists_per90'] = round(a90, 3)
result[f'{prefix}_squad_continuity'] = round(squad_continuity, 3)
result[f'{prefix}_top_scorer_form'] = top_scorer_form
result[f'{prefix}_avg_player_exp'] = round(total_exp / n_st, 1)
result[f'{prefix}_goals_diversity'] = round(scorers_in_lineup / n_st, 3)
return result
except Exception as e:
print(f"[PlayerPredictor] Player-level features failed: {e}")
return defaults
def get_1x2_modifier(self, prediction: PlayerPrediction) -> Dict[str, float]:
"""
-188
View File
@@ -1,188 +0,0 @@
"""
Referee Predictor Engine - V20 Ensemble Component
Analyzes referee patterns for cards, goals, and home bias.
Weight: 15% in ensemble
"""
import os
import sys
from typing import Dict, Optional
from dataclasses import dataclass
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from features.referee_engine import get_referee_engine
@dataclass
class RefereePrediction:
"""Referee engine prediction output."""
referee_name: str = ""
matches_officiated: int = 0
# Card tendencies
avg_yellow_cards: float = 4.0
avg_red_cards: float = 0.2
is_card_heavy: bool = False # Above average cards
# Goal tendencies
avg_goals_per_match: float = 2.5
over_25_rate: float = 0.50
is_high_scoring: bool = False # Above average goals
# Home bias
home_win_rate: float = 0.45
home_bias: float = 0.0 # -1 to +1, positive = favors home
# Penalty tendency
penalty_rate: float = 0.15
confidence: float = 0.0
def to_dict(self) -> dict:
return {
"referee_name": self.referee_name,
"matches_officiated": self.matches_officiated,
"avg_yellow_cards": round(self.avg_yellow_cards, 1),
"avg_red_cards": round(self.avg_red_cards, 2),
"is_card_heavy": self.is_card_heavy,
"avg_goals_per_match": round(self.avg_goals_per_match, 2),
"over_25_rate": round(self.over_25_rate * 100, 1),
"is_high_scoring": self.is_high_scoring,
"home_win_rate": round(self.home_win_rate * 100, 1),
"home_bias": round(self.home_bias, 2),
"penalty_rate": round(self.penalty_rate * 100, 1),
"confidence": round(self.confidence, 1)
}
class RefereePredictorEngine:
"""
Referee-based prediction engine.
Analyzes:
- Card tendency (sarı/kırmızı kart ortalaması)
- Goal tendency (maç başına gol, 2.5 üst oranı)
- Home bias (ev sahibi lehine karar oranı)
- Penalty tendency (penaltı verme oranı)
"""
# League average benchmarks
LEAGUE_AVG_GOALS = 2.65
LEAGUE_AVG_YELLOW = 4.0
LEAGUE_HOME_WIN_RATE = 0.45
def __init__(self):
self.referee_engine = get_referee_engine()
print("✅ RefereePredictorEngine initialized")
def predict(self,
match_id: Optional[str] = None,
referee_name: Optional[str] = None,
league_id: Optional[str] = None) -> RefereePrediction:
"""
Generate referee-based prediction.
Args:
match_id: Match ID to find referee
referee_name: Or provide referee name directly
league_id: League ID to scope stats (prevents name collisions)
Returns:
RefereePrediction with referee analysis
"""
# Get referee features
if match_id:
features = self.referee_engine.get_features(match_id, league_id=league_id or "")
# Live flows may already have referee_name while match_officials table is sparse.
# Prefer the richer profile if direct-name lookup has more history.
if referee_name:
name_features = self.referee_engine.get_features_by_name(referee_name, league_id=league_id or "")
if (name_features.get("referee_matches", 0) or 0) > (features.get("referee_matches", 0) or 0):
features = name_features
elif referee_name:
features = self.referee_engine.get_features_by_name(referee_name, league_id=league_id or "")
else:
# Return default
return RefereePrediction(confidence=10.0)
ref_name = str(features.get("referee_name", "Unknown"))
matches = int(features.get("referee_matches", 0))
if matches < 5:
# Not enough data
return RefereePrediction(
referee_name=ref_name,
matches_officiated=matches,
confidence=20.0
)
# Extract features
avg_yellow = features.get("referee_avg_yellow", 4.0)
avg_red = features.get("referee_avg_red", 0.2)
avg_goals = features.get("referee_avg_goals", 2.5)
over25_rate = features.get("referee_over25_rate", 0.5)
home_win_rate = features.get("referee_home_win_rate", 0.45) if "referee_home_win_rate" in features else 0.45
home_bias = features.get("referee_home_bias", 0.0)
penalty_rate = features.get("referee_penalty_rate", 0.15)
# Determine tendencies
is_card_heavy = (avg_yellow + avg_red * 4) > (self.LEAGUE_AVG_YELLOW + 1)
is_high_scoring = avg_goals > self.LEAGUE_AVG_GOALS
# Confidence based on matches officiated
confidence = min(90.0, 30.0 + matches * 2)
return RefereePrediction(
referee_name=ref_name,
matches_officiated=matches,
avg_yellow_cards=avg_yellow,
avg_red_cards=avg_red,
is_card_heavy=is_card_heavy,
avg_goals_per_match=avg_goals,
over_25_rate=over25_rate,
is_high_scoring=is_high_scoring,
home_win_rate=home_win_rate,
home_bias=home_bias,
penalty_rate=penalty_rate,
confidence=confidence
)
def get_modifiers(self, prediction: RefereePrediction) -> Dict[str, float]:
"""
Get modifiers to apply to other predictions based on referee profile.
"""
return {
# Home team gets slight boost if referee has home bias
"home_modifier": 1.0 + (prediction.home_bias * 0.05),
# O/U modifier
"over_25_modifier": 1.0 + (prediction.avg_goals_per_match - self.LEAGUE_AVG_GOALS) * 0.1,
# Card modifier for card markets
"cards_modifier": 1.0 + (prediction.avg_yellow_cards - self.LEAGUE_AVG_YELLOW) * 0.05
}
# Singleton
_engine: Optional[RefereePredictorEngine] = None
def get_referee_predictor() -> RefereePredictorEngine:
global _engine
if _engine is None:
_engine = RefereePredictorEngine()
return _engine
if __name__ == "__main__":
engine = get_referee_predictor()
print("\n🧪 Referee Predictor Engine Test")
print("=" * 50)
pred = engine.predict(referee_name="Cüneyt Çakır")
print(f"\n📊 Prediction:")
for k, v in pred.to_dict().items():
print(f" {k}: {v}")
-286
View File
@@ -1,286 +0,0 @@
"""
Team Predictor Engine - V20 Ensemble Component
Combines ELO ratings, form stats, H2H records and team statistics.
Weight: 30% in ensemble
"""
import os
import sys
from typing import Dict, Optional, Tuple, Any
from dataclasses import dataclass, field
# Add parent to path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from features.elo_system import get_elo_system
from features.h2h_engine import get_h2h_engine
from features.momentum_engine import get_momentum_engine, MomentumData
from features.team_stats_engine import get_team_stats_engine
@dataclass
class TeamPrediction:
"""Team engine prediction output."""
home_win_prob: float = 0.33
draw_prob: float = 0.33
away_win_prob: float = 0.33
home_xg: float = 1.3
away_xg: float = 1.1
form_advantage: float = 0.0 # -1 to +1, positive = home advantage
h2h_advantage: float = 0.0 # -1 to +1
elo_diff: float = 0.0
confidence: float = 0.0
def to_dict(self) -> dict:
return {
"home_win_prob": round(self.home_win_prob * 100, 1),
"draw_prob": round(self.draw_prob * 100, 1),
"away_win_prob": round(self.away_win_prob * 100, 1),
"home_xg": round(self.home_xg, 2),
"away_xg": round(self.away_xg, 2),
"form_advantage": round(self.form_advantage, 2),
"h2h_advantage": round(self.h2h_advantage, 2),
"elo_diff": round(self.elo_diff, 0),
"confidence": round(self.confidence, 1)
}
raw_features: Dict[str, Any] = field(default_factory=dict)
class TeamPredictorEngine:
"""
Team-based prediction engine.
Uses:
- ELO Rating System (venue-adjusted, league-weighted)
- H2H Engine (head-to-head history)
- Momentum Engine (recent form)
- Team Stats Engine (possession, shots, corners)
"""
def __init__(self):
self.elo_system = get_elo_system()
self.h2h_engine = get_h2h_engine()
self.momentum_engine = get_momentum_engine()
self.team_stats_engine = get_team_stats_engine()
print("✅ TeamPredictorEngine initialized")
def predict(self,
home_team_id: str,
away_team_id: str,
match_date_ms: int,
home_team_name: str = "",
away_team_name: str = "") -> TeamPrediction:
"""
Generate team-based prediction.
Args:
home_team_id: Home team ID
away_team_id: Away team ID
match_date_ms: Match date in milliseconds
home_team_name: Home team name (for ELO)
away_team_name: Away team name (for ELO)
Returns:
TeamPrediction with 1X2 probabilities and xG
"""
# 1. Get ELO predictions
elo_pred = self.elo_system.predict_match(home_team_id, away_team_id)
elo_features = self.elo_system.get_match_features(home_team_id, away_team_id)
# 2. Get H2H features
try:
h2h_features = self.h2h_engine.get_features(
home_team_id, away_team_id, match_date_ms
)
except Exception:
h2h_features = {
"h2h_home_win_rate": 0.5,
"h2h_away_win_rate": 0.5,
"h2h_avg_goals": 2.5,
"h2h_btts_rate": 0.5
}
# 3. Get Momentum/Form features
try:
# key: form_score should be 0-1 derived from momentum_score (-1 to 1)
home_mom_data = self.momentum_engine.calculate_momentum(home_team_id, match_date_ms)
away_mom_data = self.momentum_engine.calculate_momentum(away_team_id, match_date_ms)
home_form_score = (home_mom_data.momentum_score + 1) / 2
away_form_score = (away_mom_data.momentum_score + 1) / 2
except Exception as e:
print(f"⚠️ MomentumEngine error: {e}")
home_mom_data = MomentumData()
away_mom_data = MomentumData()
home_form_score = 0.5
away_form_score = 0.5
# 4. Get Team Stats
home_stats = self.team_stats_engine.get_features(home_team_id, match_date_ms)
away_stats = self.team_stats_engine.get_features(away_team_id, match_date_ms)
# 5. Combine predictions
# ELO-based 1X2 (60% weight)
elo_home = elo_pred.get("home_win_prob", 0.33)
elo_draw = elo_pred.get("draw_prob", 0.33)
elo_away = elo_pred.get("away_win_prob", 0.33)
# Adjust based on H2H (20% weight)
h2h_home_rate = h2h_features.get("h2h_home_win_rate", 0.5)
h2h_away_rate = h2h_features.get("h2h_away_win_rate", 0.5)
# Adjust based on form (20% weight)
home_form = home_form_score
away_form = away_form_score
form_diff = (home_form - away_form) # -1 to +1
# Weighted combination
final_home = elo_home * 0.6 + h2h_home_rate * 0.2 + (0.5 + form_diff * 0.3) * 0.2
final_away = elo_away * 0.6 + h2h_away_rate * 0.2 + (0.5 - form_diff * 0.3) * 0.2
final_draw = 1.0 - final_home - final_away
# Normalize
total = final_home + final_draw + final_away
if total > 0:
final_home /= total
final_draw /= total
final_away /= total
# Calculate xG based on stats and form (conservative base)
home_conversion = home_stats.get("shot_conversion_rate", 0.1)
away_conversion = away_stats.get("shot_conversion_rate", 0.1)
base_home_xg = 1.35 + (home_conversion * 3.0)
base_away_xg = 1.10 + (away_conversion * 2.5)
# Defense weakness factor: opponent's defensive quality affects xG
# Higher shots on target against = weaker defense
away_def_weakness = away_stats.get("shot_accuracy", 0.35) # opponent's shot accuracy as proxy
home_def_weakness = home_stats.get("shot_accuracy", 0.35)
# Adjust xG: stronger opponent defense → lower xG
home_xg = base_home_xg * (1 + form_diff * 0.15) * (0.8 + away_def_weakness * 0.6)
away_xg = base_away_xg * (1 - form_diff * 0.15) * (0.8 + home_def_weakness * 0.6)
# Apply xG Underperformance Penalty directly to calculated xG
# If a team chronically underperforms its xG, we subtract that historical difference here
if hasattr(home_mom_data, 'xg_underperformance') and home_mom_data.xg_underperformance > 0.2:
home_xg -= min(0.5, home_mom_data.xg_underperformance * 0.5)
if hasattr(away_mom_data, 'xg_underperformance') and away_mom_data.xg_underperformance > 0.2:
away_xg -= min(0.5, away_mom_data.xg_underperformance * 0.5)
# H2H adjustment (more conservative)
h2h_avg_goals = h2h_features.get("h2h_avg_goals", 2.5)
if h2h_avg_goals > 3.0:
home_xg *= 1.05
away_xg *= 1.05
elif h2h_avg_goals < 2.0:
home_xg *= 0.95
away_xg *= 0.95
# Clamp xG to reasonable range
home_xg = max(0.5, min(3.5, home_xg))
away_xg = max(0.3, min(3.0, away_xg))
# Calculate confidence
# Higher when ELO, H2H, and Form all agree
elo_winner = "H" if elo_home > max(elo_draw, elo_away) else ("A" if elo_away > elo_draw else "D")
h2h_winner = "H" if h2h_home_rate > h2h_away_rate else "A"
form_winner = "H" if form_diff > 0.1 else ("A" if form_diff < -0.1 else "D")
agreement = sum([
elo_winner == h2h_winner,
elo_winner == form_winner,
h2h_winner == form_winner
])
max_prob = max(final_home, final_draw, final_away)
confidence = max_prob * 100 * (0.7 + agreement * 0.1)
# Collect Raw Features for XGBoost
# Note: home_mom_data is an object now
def get_rate(val): return val if val is not None else 0.5
raw_features = {
**elo_features, # 8 features
# Form Features (need key mapping to match extract_training_data.py)
"home_goals_avg": 1.5 + home_mom_data.goals_trend, # Proxy
"home_conceded_avg": 1.5 - home_mom_data.conceded_trend, # Proxy
"away_goals_avg": 1.5 + away_mom_data.goals_trend,
"away_conceded_avg": 1.5 - away_mom_data.conceded_trend,
"home_clean_sheet_rate": 0.2, # Not in new MomentumData
"away_clean_sheet_rate": 0.2,
"home_scoring_rate": 0.8,
"away_scoring_rate": 0.8,
"home_winning_streak": home_mom_data.winning_streak,
"away_winning_streak": away_mom_data.winning_streak,
"home_unbeaten_streak": home_mom_data.unbeaten_streak,
"away_unbeaten_streak": away_mom_data.unbeaten_streak,
# H2H Features
**h2h_features,
# Team Stats
"home_avg_possession": home_stats.get("avg_possession", 0.5),
"away_avg_possession": away_stats.get("avg_possession", 0.5),
"home_avg_shots_on_target": home_stats.get("avg_shots_on_target", 3.5),
"away_avg_shots_on_target": away_stats.get("avg_shots_on_target", 3.5),
"home_shot_conversion": home_stats.get("shot_conversion_rate", 0.1),
"away_shot_conversion": away_stats.get("shot_conversion_rate", 0.1),
"home_avg_corners": home_stats.get("avg_corners", 4.5),
"away_avg_corners": away_stats.get("avg_corners", 4.5),
# Derived
"home_xga": 1.5 - home_mom_data.conceded_trend, # reusing as proxy
"away_xga": 1.5 - away_mom_data.conceded_trend
}
return TeamPrediction(
home_win_prob=final_home,
draw_prob=final_draw,
away_win_prob=final_away,
home_xg=home_xg,
away_xg=away_xg,
form_advantage=form_diff,
h2h_advantage=h2h_home_rate - h2h_away_rate,
elo_diff=elo_features.get("elo_diff", 0),
confidence=confidence,
raw_features=raw_features
)
# Singleton
_engine: Optional[TeamPredictorEngine] = None
def get_team_predictor() -> TeamPredictorEngine:
global _engine
if _engine is None:
_engine = TeamPredictorEngine()
return _engine
if __name__ == "__main__":
engine = get_team_predictor()
print("\n🧪 Team Predictor Engine Test")
print("=" * 50)
# Test with sample IDs
pred = engine.predict(
home_team_id="test_home",
away_team_id="test_away",
match_date_ms=1707393600000
)
print(f"\n📊 Prediction:")
for k, v in pred.to_dict().items():
print(f" {k}: {v}")
-4
View File
@@ -15,13 +15,9 @@ Orijinal Faktörler:
- Tarihsel upset pattern
"""
import os
import sys
from typing import Dict, Any, Optional, Tuple, List
from dataclasses import dataclass, field
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
try:
import psycopg2
from psycopg2.extras import RealDictCursor
+46 -18
View File
@@ -21,6 +21,7 @@ except ImportError:
HAS_BASKETBALL = False
from services.single_match_orchestrator import get_single_match_orchestrator
from services.v26_shadow_engine import get_v26_shadow_engine
from models.league_model import get_league_model_loader
load_dotenv()
@@ -123,7 +124,15 @@ def health_check() -> dict[str, Any]:
try:
orchestrator = get_single_match_orchestrator()
shadow_engine = get_v26_shadow_engine()
# Per-market V25 model status
v25_readiness: dict[str, Any] = {"fully_loaded": False}
try:
v25_predictor = orchestrator._get_v25_predictor()
v25_readiness = v25_predictor.readiness_summary()
except Exception as v25_err:
v25_readiness = {"fully_loaded": False, "error": str(v25_err)}
if HAS_BASKETBALL:
basketball_predictor = get_basketball_v25_predictor()
basketball_readiness = basketball_predictor.readiness_summary()
@@ -131,35 +140,52 @@ def health_check() -> dict[str, Any]:
else:
basketball_readiness = {"fully_loaded": False, "error": "Basketball module not found"}
ready = True
league_readiness = get_league_model_loader().readiness_summary()
overall_ready = ready and v25_readiness.get("fully_loaded", False)
return {
"status": "healthy" if ready else "degraded",
"status": "healthy" if overall_ready else "degraded",
"engine": "v28.main",
"mode": os.getenv("AI_ENGINE_MODE", "v28"),
"ready": ready,
"ready": overall_ready,
"v25_football": v25_readiness,
"league_specific": league_readiness,
"basketball_v25": basketball_readiness,
"v26_shadow": shadow_engine.readiness_summary(),
"prediction_service_ready": True,
"model_loaded": ready,
"model_loaded": overall_ready,
"orchestrator_mode": getattr(orchestrator, "engine_mode", "v28"),
}
except Exception as error:
return {"status": "unhealthy", "ready": False, "error": str(error)}
_REQUIRED_RESPONSE_FIELDS = ("match_info", "market_board", "main_pick", "bet_summary", "data_quality")
@app.post("/v20plus/analyze/{match_id}")
async def analyze_match_v20plus(match_id: str) -> dict[str, Any]:
started_at = time.time()
orchestrator = get_single_match_orchestrator()
result = orchestrator.analyze_match(match_id)
result = await asyncio.to_thread(orchestrator.analyze_match, match_id)
elapsed_ms = int((time.time() - started_at) * 1000)
if not result:
raise HTTPException(status_code=404, detail=f"Match not found: {match_id}")
# Response validation: log missing required fields (non-fatal)
missing_fields = [f for f in _REQUIRED_RESPONSE_FIELDS if f not in result]
if missing_fields:
print(f"⚠️ [API] analyze/{match_id} response missing fields: {missing_fields} ({elapsed_ms}ms)")
result["timing_ms"] = elapsed_ms
return result
@app.get("/v20plus/analyze-htms/{match_id}")
async def analyze_match_htms_v20plus(match_id: str) -> dict[str, Any]:
orchestrator = get_single_match_orchestrator()
result = orchestrator.analyze_match_htms(match_id)
result = await asyncio.to_thread(orchestrator.analyze_match_htms, match_id)
if not result:
raise HTTPException(status_code=404, detail=f"Match not found: {match_id}")
return result
@@ -230,11 +256,12 @@ async def analyze_match_htft_v20plus(match_id: str, timeout_sec: int = 30) -> di
@app.post("/v20plus/coupon")
async def generate_coupon_v20plus(request: CouponRequest) -> dict[str, Any]:
orchestrator = get_single_match_orchestrator()
return orchestrator.build_coupon(
match_ids=request.match_ids,
strategy=request.strategy or "BALANCED",
max_matches=request.max_matches,
min_confidence=request.min_confidence,
return await asyncio.to_thread(
orchestrator.build_coupon,
request.match_ids,
request.strategy or "BALANCED",
request.max_matches,
request.min_confidence,
)
@@ -244,7 +271,7 @@ async def get_daily_banker_v20plus(count: int = 3) -> dict[str, Any]:
raise HTTPException(status_code=400, detail="count must be >= 1")
orchestrator = get_single_match_orchestrator()
bankers = orchestrator.get_daily_bankers(count=count)
bankers = await asyncio.to_thread(orchestrator.get_daily_bankers, count)
return {"count": len(bankers), "bankers": bankers}
@app.get("/v20plus/reversal-watchlist")
@@ -262,11 +289,12 @@ async def get_reversal_watchlist_v20plus(
raise HTTPException(status_code=400, detail="min_score must be between 0 and 100")
orchestrator = get_single_match_orchestrator()
return orchestrator.get_reversal_watchlist(
count=count,
horizon_hours=horizon_hours,
min_score=min_score,
top_leagues_only=top_leagues_only,
return await asyncio.to_thread(
orchestrator.get_reversal_watchlist,
count,
horizon_hours,
min_score,
top_leagues_only,
)
+6
View File
@@ -46,6 +46,9 @@ SUPPORTED_MARKETS = [
"ht_ft", # Half-Time/Full-Time
"dc", # Double Chance
"ht", # Half-Time Result
"ht_home", # Half-Time Home win
"ht_draw", # Half-Time Draw
"ht_away", # Half-Time Away win
]
@@ -111,6 +114,9 @@ class Calibrator:
"ht_ft": 0.92,
"dc": 0.97,
"ht": 0.92,
"ht_home": 0.92,
"ht_draw": 0.92,
"ht_away": 0.92,
}
self._load_calibrators()
+191
View File
@@ -0,0 +1,191 @@
"""
League-Specific Model Loader
=============================
Loads per-league XGBoost models + isotonic calibrators trained by
scripts/train_league_models.py and provides a unified prediction interface.
Falls back to general V25 for any market/league without a dedicated model.
"""
import os
import json
import pickle
from functools import lru_cache
from typing import Dict, Optional, Tuple
import numpy as np
import pandas as pd
import xgboost as xgb
AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
LEAGUE_MODEL_DIR = os.path.join(AI_ENGINE_DIR, "models", "league_specific")
# Market file name → (num_class, label_list)
MARKET_META: Dict[str, Tuple[int, list]] = {
"ms": (3, ["1", "X", "2"]),
"ou15": (2, ["Over", "Under"]),
"ou25": (2, ["Over", "Under"]),
"ou35": (2, ["Over", "Under"]),
"btts": (2, ["Yes", "No"]),
"ht": (3, ["1", "X", "2"]),
"ht_ou05": (2, ["Over", "Under"]),
"ht_ou15": (2, ["Over", "Under"]),
"htft": (9, ["1/1","1/X","1/2","X/1","X/X","X/2","2/1","2/X","2/2"]),
"oe": (2, ["Odd", "Even"]),
"cards": (2, ["Over", "Under"]),
"handicap": (3, ["1", "X", "2"]),
}
# Signal key map (file key → uppercase signal key used in _get_v25_signal)
FILE_TO_SIGNAL = {
"ms": "MS", "ou15": "OU15", "ou25": "OU25", "ou35": "OU35",
"btts": "BTTS", "ht": "HT", "ht_ou05": "HT_OU05", "ht_ou15": "HT_OU15",
"htft": "HTFT", "oe": "OE", "cards": "CARDS", "handicap": "HCAP",
}
class LeagueModel:
"""Holds XGBoost models + isotonic calibrators for one league."""
def __init__(self, league_id: str):
self.league_id = league_id
self.league_dir = os.path.join(LEAGUE_MODEL_DIR, league_id)
self.models: Dict[str, xgb.Booster] = {} # market_key → booster
self.calibrators: Dict[str, object] = {} # cal_key → isotonic
self.feature_cols: Optional[list] = None
self._loaded = False
def load(self) -> bool:
if not os.path.isdir(self.league_dir):
return False
try:
fc_path = os.path.join(self.league_dir, "feature_cols.json")
if os.path.exists(fc_path):
with open(fc_path) as f:
self.feature_cols = json.load(f)
for mkey in MARKET_META:
xgb_path = os.path.join(self.league_dir, f"xgb_{mkey}.json")
if os.path.exists(xgb_path) and os.path.getsize(xgb_path) > 100:
b = xgb.Booster()
b.load_model(xgb_path)
self.models[mkey] = b
for fname in os.listdir(self.league_dir):
if fname.startswith("cal_") and fname.endswith(".pkl"):
cal_key = fname[4:-4] # strip cal_ and .pkl
with open(os.path.join(self.league_dir, fname), "rb") as f:
self.calibrators[cal_key] = pickle.load(f)
self._loaded = bool(self.models or self.calibrators)
return self._loaded
except Exception as e:
print(f"[LeagueModel] Load failed for {self.league_id}: {e}")
return False
def has_market(self, mkey: str) -> bool:
return mkey in self.models
def predict_market(
self,
mkey: str,
feature_row: Dict[str, float],
) -> Optional[Dict[str, float]]:
"""
Predict one market using league-specific XGBoost + isotonic calibration.
Returns {label: prob} dict or None if no model available.
"""
if mkey not in self.models:
return None
num_class, labels = MARKET_META[mkey]
fc = self.feature_cols
if fc is None:
# Fallback to whatever the booster expects (it knows its feature names)
fc = list(self.models[mkey].feature_names or [])
try:
X = pd.DataFrame([{col: feature_row.get(col, 0.0) for col in fc}])
dmat = xgb.DMatrix(X)
raw = self.models[mkey].predict(dmat)
if num_class > 2:
probs_arr = raw.reshape(-1, num_class)[0]
probs = {labels[i]: float(probs_arr[i]) for i in range(num_class)}
# Apply isotonic calibration per class
cal_total = 0.0
for i, label in enumerate(labels):
cal_key = f"{mkey}_{i}"
if cal_key in self.calibrators:
p_cal = float(self.calibrators[cal_key].predict([probs_arr[i]])[0])
probs[label] = max(0.01, min(0.99, p_cal))
cal_total += probs[label]
if cal_total > 0:
probs = {k: v / cal_total for k, v in probs.items()}
else:
p = float(raw[0])
cal_key = mkey
if cal_key in self.calibrators:
p = float(self.calibrators[cal_key].predict([p])[0])
p = max(0.01, min(0.99, p))
probs = {labels[0]: p, labels[1]: 1.0 - p}
return probs
except Exception as e:
print(f"[LeagueModel] predict_market({mkey}) failed for {self.league_id}: {e}")
return None
class LeagueModelLoader:
"""
In-memory cache for league-specific models.
Thread-safe for single-process async servers (FastAPI/uvicorn).
"""
def __init__(self, max_cached: int = 80):
self._cache: Dict[str, Optional[LeagueModel]] = {}
self._max_cached = max_cached
def get(self, league_id: str) -> Optional[LeagueModel]:
"""Return loaded LeagueModel for this league, or None if unavailable."""
if league_id in self._cache:
return self._cache[league_id]
# Evict oldest entry if cache is full
if len(self._cache) >= self._max_cached:
oldest = next(iter(self._cache))
del self._cache[oldest]
model = LeagueModel(league_id)
loaded = model.load()
self._cache[league_id] = model if loaded else None
if loaded:
n_models = len(model.models)
n_cals = len(model.calibrators)
print(f"[LeagueModel] Loaded {league_id}: {n_models} XGB models, {n_cals} calibrators")
return self._cache[league_id]
def available_leagues(self) -> list:
if not os.path.isdir(LEAGUE_MODEL_DIR):
return []
return [d for d in os.listdir(LEAGUE_MODEL_DIR)
if os.path.isdir(os.path.join(LEAGUE_MODEL_DIR, d))]
def readiness_summary(self) -> dict:
leagues = self.available_leagues()
return {
"league_specific_dir": LEAGUE_MODEL_DIR,
"available_leagues": len(leagues),
"cached": len([v for v in self._cache.values() if v is not None]),
}
# ── Singleton ──────────────────────────────────────────────────────
_loader: Optional[LeagueModelLoader] = None
def get_league_model_loader() -> LeagueModelLoader:
global _loader
if _loader is None:
_loader = LeagueModelLoader()
return _loader
File diff suppressed because it is too large Load Diff
+78 -18
View File
@@ -20,6 +20,13 @@ from dataclasses import dataclass, field
import xgboost as xgb
import lightgbm as lgb
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
try:
from config.config_loader import get_config as _get_cfg
except ImportError:
_get_cfg = None # type: ignore[assignment]
# CatBoost is optional
try:
from catboost import CatBoostClassifier
@@ -228,7 +235,7 @@ class V25Predictor:
print(f"[V25] Using fallback feature columns ({len(V25Predictor._FALLBACK_FEATURE_COLS)} features)")
return V25Predictor._FALLBACK_FEATURE_COLS
# Model weights for ensemble
# Model weights for ensemble (overridden from config in __init__)
DEFAULT_WEIGHTS = {
'xgb': 0.50,
'lgb': 0.50,
@@ -245,6 +252,16 @@ class V25Predictor:
self.models = {} # market -> {'xgb': model, 'lgb': model}
self._loaded = False
self.FEATURE_COLS = self._load_feature_cols()
# Load weights from config (falls back to class default 0.50/0.50)
if _get_cfg is not None:
try:
cfg = _get_cfg()
self.DEFAULT_WEIGHTS = {
'xgb': float(cfg.get('model_ensemble.xgb_weight', 0.50)),
'lgb': float(cfg.get('model_ensemble.lgb_weight', 0.50)),
}
except Exception:
pass # keep class-level defaults
# All trained market models available in V25
ALL_MARKETS = [
@@ -275,21 +292,34 @@ class V25Predictor:
xgb_content = f.read()
booster = xgb.Booster()
booster.load_model(bytearray(xgb_content, 'utf-8'))
self.models[market]['xgb'] = booster
loaded_count += 1
# Corruption detection: verify model can run a dummy prediction
try:
_dummy = pd.DataFrame([{col: 0.0 for col in self.FEATURE_COLS}])
booster.predict(xgb.DMatrix(_dummy))
self.models[market]['xgb'] = booster
loaded_count += 1
except Exception as _ce:
print(f"[V25] ⚠️ XGB model for {market} failed integrity check: {_ce} — skipping")
# Load LightGBM (read content in Python to avoid non-ASCII path issues)
lgb_path = os.path.join(self.models_dir, f'lgb_v25_{market}.txt')
if os.path.exists(lgb_path) and os.path.getsize(lgb_path) > 0:
with open(lgb_path, 'r', encoding='utf-8') as f:
model_str = f.read()
self.models[market]['lgb'] = lgb.Booster(model_str=model_str)
loaded_count += 1
lgb_model = lgb.Booster(model_str=model_str)
# Corruption detection: verify model can run a dummy prediction
try:
_dummy = pd.DataFrame([{col: 0.0 for col in self.FEATURE_COLS}])
lgb_model.predict(_dummy)
self.models[market]['lgb'] = lgb_model
loaded_count += 1
except Exception as _ce:
print(f"[V25] ⚠️ LGB model for {market} failed integrity check: {_ce} — skipping")
# Remove empty entries
if not self.models[market]:
del self.models[market]
print(f"[V25] Loaded {loaded_count} model files across {len(self.models)} markets: {list(self.models.keys())}")
self._loaded = loaded_count > 0
return self._loaded
@@ -305,7 +335,27 @@ class V25Predictor:
if not self._loaded:
if not self.load_models():
raise RuntimeError("Failed to load V25 models")
def readiness_summary(self) -> Dict[str, Any]:
"""Return per-market model status for health check endpoint."""
if not self._loaded:
self.load_models()
market_status = {}
for market in self.ALL_MARKETS:
m = self.models.get(market, {})
market_status[market] = {
"xgb": "xgb" in m,
"lgb": "lgb" in m,
"ready": bool(m),
}
loaded_markets = [k for k, v in market_status.items() if v["ready"]]
return {
"fully_loaded": len(loaded_markets) == len(self.ALL_MARKETS),
"loaded_markets": loaded_markets,
"missing_markets": [m for m in self.ALL_MARKETS if m not in loaded_markets],
"weights": self.DEFAULT_WEIGHTS,
}
def _prepare_features(self, features: Dict[str, float]) -> pd.DataFrame:
"""Prepare feature vector for prediction."""
X = pd.DataFrame([{col: features.get(col, 0.0) for col in self.FEATURE_COLS}])
@@ -563,13 +613,23 @@ class V25Predictor:
) -> List[ValueBet]:
"""Detect value bets based on model vs market odds."""
value_bets = []
min_edge = 0.05 # 5% minimum edge
# Market-specific minimum edge thresholds
# MS: higher variance → require more edge
# OU/BTTS: binary markets → tighter edge acceptable
EDGE_THRESHOLDS = {
'MS': 0.06,
'OU25': 0.04,
'BTTS': 0.04,
}
ms_edge = EDGE_THRESHOLDS['MS']
ou_edge = EDGE_THRESHOLDS['OU25']
btts_edge = EDGE_THRESHOLDS['BTTS']
# MS value bets
if 'ms_h' in odds and odds['ms_h'] > 0:
implied = 1 / odds['ms_h']
edge = home_prob - implied
if edge > min_edge:
if edge > ms_edge:
value_bets.append(ValueBet(
market_type='MS',
pick='1',
@@ -582,7 +642,7 @@ class V25Predictor:
if 'ms_d' in odds and odds['ms_d'] > 0:
implied = 1 / odds['ms_d']
edge = draw_prob - implied
if edge > min_edge:
if edge > ms_edge:
value_bets.append(ValueBet(
market_type='MS',
pick='X',
@@ -595,7 +655,7 @@ class V25Predictor:
if 'ms_a' in odds and odds['ms_a'] > 0:
implied = 1 / odds['ms_a']
edge = away_prob - implied
if edge > min_edge:
if edge > ms_edge:
value_bets.append(ValueBet(
market_type='MS',
pick='2',
@@ -609,7 +669,7 @@ class V25Predictor:
if 'ou25_o' in odds and odds['ou25_o'] > 0:
implied = 1 / odds['ou25_o']
edge = over_prob - implied
if edge > min_edge:
if edge > ou_edge:
value_bets.append(ValueBet(
market_type='OU25',
pick='Over',
@@ -622,7 +682,7 @@ class V25Predictor:
if 'ou25_u' in odds and odds['ou25_u'] > 0:
implied = 1 / odds['ou25_u']
edge = under_prob - implied
if edge > min_edge:
if edge > ou_edge:
value_bets.append(ValueBet(
market_type='OU25',
pick='Under',
@@ -636,7 +696,7 @@ class V25Predictor:
if 'btts_y' in odds and odds['btts_y'] > 0:
implied = 1 / odds['btts_y']
edge = btts_yes_prob - implied
if edge > min_edge:
if edge > btts_edge:
value_bets.append(ValueBet(
market_type='BTTS',
pick='Yes',
@@ -649,7 +709,7 @@ class V25Predictor:
if 'btts_n' in odds and odds['btts_n'] > 0:
implied = 1 / odds['btts_n']
edge = btts_no_prob - implied
if edge > min_edge:
if edge > btts_edge:
value_bets.append(ValueBet(
market_type='BTTS',
pick='No',
+160
View File
@@ -0,0 +1,160 @@
{
"total_test": 23039,
"thresholds": {
"0.0": {
"n_matches": 22227,
"pct": 96.5,
"markets": {
"ms": {
"hit_rate": 0.5363,
"avg_roi": -0.0046,
"total_roi": -103.02
},
"ou15": {
"hit_rate": 0.7463,
"avg_roi": 0.0144,
"total_roi": 319.02
},
"ou25": {
"hit_rate": 0.6111,
"avg_roi": -0.006,
"total_roi": -134.41
},
"ou35": {
"hit_rate": 0.7302,
"avg_roi": -0.014,
"total_roi": -310.51
},
"btts": {
"hit_rate": 0.5848,
"avg_roi": 0.0031,
"total_roi": 69.5
}
}
},
"0.1": {
"n_matches": 23033,
"pct": 100.0,
"markets": {
"ms": {
"hit_rate": 0.546,
"avg_roi": -0.0045,
"total_roi": -104.38
},
"ou15": {
"hit_rate": 0.7533,
"avg_roi": 0.0145,
"total_roi": 335.02
},
"ou25": {
"hit_rate": 0.6193,
"avg_roi": -0.0042,
"total_roi": -96.97
},
"ou35": {
"hit_rate": 0.7277,
"avg_roi": -0.0147,
"total_roi": -338.57
},
"btts": {
"hit_rate": 0.5886,
"avg_roi": 0.0025,
"total_roi": 57.21
}
}
},
"0.2": {
"n_matches": 23034,
"pct": 100.0,
"markets": {
"ms": {
"hit_rate": 0.5459,
"avg_roi": -0.0046,
"total_roi": -105.38
},
"ou15": {
"hit_rate": 0.7533,
"avg_roi": 0.0146,
"total_roi": 335.26
},
"ou25": {
"hit_rate": 0.6193,
"avg_roi": -0.0043,
"total_roi": -97.97
},
"ou35": {
"hit_rate": 0.7276,
"avg_roi": -0.0147,
"total_roi": -339.57
},
"btts": {
"hit_rate": 0.5887,
"avg_roi": 0.0025,
"total_roi": 57.62
}
}
},
"0.3": {
"n_matches": 23039,
"pct": 100.0,
"markets": {
"ms": {
"hit_rate": 0.546,
"avg_roi": -0.0045,
"total_roi": -103.45
},
"ou15": {
"hit_rate": 0.7534,
"avg_roi": 0.0146,
"total_roi": 335.6
},
"ou25": {
"hit_rate": 0.6194,
"avg_roi": -0.0042,
"total_roi": -97.44
},
"ou35": {
"hit_rate": 0.7277,
"avg_roi": -0.0147,
"total_roi": -339.26
},
"btts": {
"hit_rate": 0.5887,
"avg_roi": 0.0025,
"total_roi": 58.61
}
}
},
"0.5": {
"n_matches": 23039,
"pct": 100.0,
"markets": {
"ms": {
"hit_rate": 0.546,
"avg_roi": -0.0045,
"total_roi": -103.45
},
"ou15": {
"hit_rate": 0.7534,
"avg_roi": 0.0146,
"total_roi": 335.6
},
"ou25": {
"hit_rate": 0.6194,
"avg_roi": -0.0042,
"total_roi": -97.44
},
"ou35": {
"hit_rate": 0.7277,
"avg_roi": -0.0147,
"total_roi": -339.26
},
"btts": {
"hit_rate": 0.5887,
"avg_roi": 0.0025,
"total_roi": 58.61
}
}
}
}
}
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,5 @@
[
{
"market": "MS-Ev",
"min_edge": 0.02,
"n":
+267
View File
@@ -0,0 +1,267 @@
{
"generated_at": "2026-05-15T21:40:57.995899",
"matches_processed": 3000,
"matches_skipped": 0,
"markets": {
"MS": {
"overall_accuracy": 54.97,
"total_matches": 3000,
"by_confidence_band": {
"<50%": {
"accuracy": 38.87,
"count": 759,
"mean_confidence": 45.58
},
"50-65%": {
"accuracy": 52.62,
"count": 1300,
"mean_confidence": 57.19
},
"65-75%": {
"accuracy": 66.99,
"count": 624,
"mean_confidence": 69.49
},
"75%+": {
"accuracy": 79.5,
"count": 317,
"mean_confidence": 80.69
}
},
"by_league": {
"Bundesliga": {
"accuracy": 46.77,
"count": 62
},
"Ligue 1": {
"accuracy": 58.73,
"count": 63
},
"Serie A": {
"accuracy": 56.25,
"count": 64
},
"Other": {
"accuracy": 55.03,
"count": 2811
}
},
"by_pick_direction": {
"1": {
"accuracy": 58.38,
"count": 1946,
"mean_confidence": 60.84
},
"2": {
"accuracy": 48.72,
"count": 1053,
"mean_confidence": 56.44
},
"X": {
"accuracy": 0.0,
"count": 1,
"mean_confidence": 56.07
}
}
},
"OU15": {
"overall_accuracy": 74.4,
"total_matches": 3000,
"by_confidence_band": {
"50-65%": {
"accuracy": 70.97,
"count": 62,
"mean_confidence": 59.63
},
"65-75%": {
"accuracy": 68.0,
"count": 275,
"mean_confidence": 71.1
},
"75%+": {
"accuracy": 75.14,
"count": 2663,
"mean_confidence": 89.44
}
},
"by_league": {
"Bundesliga": {
"accuracy": 67.74,
"count": 62
},
"Ligue 1": {
"accuracy": 76.19,
"count": 63
},
"Serie A": {
"accuracy": 70.31,
"count": 64
},
"Other": {
"accuracy": 74.6,
"count": 2811
}
},
"by_pick_direction": {
"Over": {
"accuracy": 74.4,
"count": 3000,
"mean_confidence": 87.14
}
}
},
"OU25": {
"overall_accuracy": 51.77,
"total_matches": 3000,
"by_confidence_band": {
"50-65%": {
"accuracy": 49.33,
"count": 1267,
"mean_confidence": 57.13
},
"65-75%": {
"accuracy": 54.53,
"count": 453,
"mean_confidence": 69.42
},
"75%+": {
"accuracy": 53.2,
"count": 1280,
"mean_confidence": 90.2
}
},
"by_league": {
"Bundesliga": {
"accuracy": 41.94,
"count": 62
},
"Ligue 1": {
"accuracy": 50.79,
"count": 63
},
"Serie A": {
"accuracy": 43.75,
"count": 64
},
"Other": {
"accuracy": 52.19,
"count": 2811
}
},
"by_pick_direction": {
"Over": {
"accuracy": 51.03,
"count": 2432,
"mean_confidence": 76.11
},
"Under": {
"accuracy": 54.93,
"count": 568,
"mean_confidence": 60.17
}
}
},
"BTTS": {
"overall_accuracy": 51.83,
"total_matches": 3000,
"by_confidence_band": {
"50-65%": {
"accuracy": 48.74,
"count": 2214,
"mean_confidence": 58.66
},
"65-75%": {
"accuracy": 60.42,
"count": 758,
"mean_confidence": 68.19
},
"75%+": {
"accuracy": 64.29,
"count": 28,
"mean_confidence": 77.44
}
},
"by_league": {
"Bundesliga": {
"accuracy": 54.84,
"count": 62
},
"Ligue 1": {
"accuracy": 50.79,
"count": 63
},
"Serie A": {
"accuracy": 57.81,
"count": 64
},
"Other": {
"accuracy": 51.65,
"count": 2811
}
},
"by_pick_direction": {
"No": {
"accuracy": 50.26,
"count": 2099,
"mean_confidence": 61.56
},
"Yes": {
"accuracy": 55.49,
"count": 901,
"mean_confidence": 60.51
}
}
}
},
"calibration": {
"ms_home": {
"brier_score": 0.2054,
"calibration_error": 0.0,
"sample_count": 3000,
"last_trained": "2026-05-15T21:40:58.026574",
"mean_predicted": 0.4942,
"mean_actual": 0.46
},
"ms_draw": {
"brier_score": 0.1846,
"calibration_error": 0.0,
"sample_count": 3000,
"last_trained": "2026-05-15T21:40:58.030886",
"mean_predicted": 0.149,
"mean_actual": 0.2493
},
"ms_away": {
"brier_score": 0.1726,
"calibration_error": 0.0,
"sample_count": 3000,
"last_trained": "2026-05-15T21:40:58.033980",
"mean_predicted": 0.3567,
"mean_actual": 0.2907
},
"ou15": {
"brier_score": 0.1884,
"calibration_error": 0.0,
"sample_count": 3000,
"last_trained": "2026-05-15T21:40:58.037204",
"mean_predicted": 0.8714,
"mean_actual": 0.744
},
"ou25": {
"brier_score": 0.247,
"calibration_error": 0.0,
"sample_count": 3000,
"last_trained": "2026-05-15T21:40:58.041152",
"mean_predicted": 0.6924,
"mean_actual": 0.499
},
"btts": {
"brier_score": 0.2453,
"calibration_error": 0.0,
"sample_count": 3000,
"last_trained": "2026-05-15T21:40:58.044344",
"mean_predicted": 0.4506,
"mean_actual": 0.5147
}
},
"runtime_seconds": 94.1
}
File diff suppressed because it is too large Load Diff
+40
View File
@@ -0,0 +1,40 @@
"""
MatchData dataclass — core data transfer object used throughout the engine.
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import Any, Dict, List, Optional
@dataclass
class MatchData:
match_id: str
home_team_id: str
away_team_id: str
home_team_name: str
away_team_name: str
match_date_ms: int
sport: str
league_id: Optional[str]
league_name: str
referee_name: Optional[str]
odds_data: Dict[str, float]
home_lineup: Optional[List[str]]
away_lineup: Optional[List[str]]
sidelined_data: Optional[Dict[str, Any]]
home_goals_avg: float
home_conceded_avg: float
away_goals_avg: float
away_conceded_avg: float
home_position: int
away_position: int
lineup_source: str
status: str = ""
state: Optional[str] = None
substate: Optional[str] = None
current_score_home: Optional[int] = None
current_score_away: Optional[int] = None
lineup_confidence: float = 0.0
source_table: str = "matches"
+292
View File
@@ -0,0 +1,292 @@
"""
Shared prediction dataclasses used across the AI engine.
These were originally defined in models/v20_ensemble.py and are extracted here
so they can be used without importing the full V20 ensemble.
"""
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional
from core.calculators.score_calculator import ScorePrediction
@dataclass
class MarketPrediction:
"""Prediction for a single betting market."""
market_type: str
pick: str
probability: float
confidence: float
odds: float = 0.0
is_recommended: bool = False
is_value_bet: bool = False
edge: float = 0.0 # Expected edge over market
def to_dict(self) -> dict:
return {
"market_type": self.market_type,
"pick": self.pick,
"probability": round(self.probability * 100, 1),
"confidence": round(self.confidence, 1),
"odds": self.odds,
"is_recommended": self.is_recommended,
"is_value_bet": self.is_value_bet,
"edge": round(self.edge, 1)
}
@dataclass
class FullMatchPrediction:
"""Complete prediction for a match with ALL markets."""
match_id: str
home_team: str
away_team: str
match_date: str = ""
# === MAÇ SONUCU (1X2) ===
ms_home_prob: float = 0.33
ms_draw_prob: float = 0.33
ms_away_prob: float = 0.33
ms_pick: str = ""
ms_confidence: float = 0.0
# === ÇİFTE ŞANS ===
dc_1x_prob: float = 0.66
dc_x2_prob: float = 0.66
dc_12_prob: float = 0.66
dc_pick: str = ""
dc_confidence: float = 0.0
# === ALT/ÜST GOLLER ===
# 1.5
over_15_prob: float = 0.70
under_15_prob: float = 0.30
ou15_pick: str = ""
ou15_confidence: float = 0.0
# 2.5
over_25_prob: float = 0.50
under_25_prob: float = 0.50
ou25_pick: str = ""
ou25_confidence: float = 0.0
# 3.5
over_35_prob: float = 0.30
under_35_prob: float = 0.70
ou35_pick: str = ""
ou35_confidence: float = 0.0
# === KARŞILIKLI GOL (BTTS) ===
btts_yes_prob: float = 0.50
btts_no_prob: float = 0.50
btts_pick: str = ""
btts_confidence: float = 0.0
# === İLK YARI SONUCU ===
ht_home_prob: float = 0.30
ht_draw_prob: float = 0.40
ht_away_prob: float = 0.30
ht_pick: str = ""
ht_confidence: float = 0.0
# === SKOR TAHMİNLERİ ===
score: Optional[ScorePrediction] = None
predicted_ft_score: str = "1-1"
predicted_ht_score: str = "0-0"
ft_scores_top5: List[Dict] = field(default_factory=list)
# === xG (Expected Goals) ===
home_xg: float = 1.3
away_xg: float = 1.1
total_xg: float = 2.4
# === RISK DEĞERLENDİRMESİ ===
risk_level: str = "MEDIUM" # LOW, MEDIUM, HIGH, EXTREME
risk_score: float = 0.0
is_surprise_risk: bool = False
surprise_type: str = ""
risk_warnings: List[str] = field(default_factory=list)
ht_ft_probs: Dict[str, float] = field(default_factory=dict)
# === GLM-5 SÜRPRİZ SKORU ===
upset_score: int = 0 # 0-100 arası sürpriz skoru
upset_level: str = "LOW" # LOW, MEDIUM, HIGH, EXTREME
upset_reasons: List[str] = field(default_factory=list)
# === SÜRPRİZ PROFİLİ ===
surprise_score: float = 0.0 # 0-100 overall surprise risk score
surprise_comment: str = "" # Human-readable surprise commentary
surprise_reasons: List[str] = field(default_factory=list) # Flagged risk reasons
surprise_breakdown: List[Dict[str, Any]] = field(default_factory=list) # Per-factor {code, points, label}
# === ENGINE KATKILARI ===
team_confidence: float = 0.0
player_confidence: float = 0.0
odds_confidence: float = 0.0
referee_confidence: float = 0.0
# === KORNER & KART & DİĞER ===
total_corners_pred: float = 9.5
corner_pick: str = "9.5 Üst"
total_cards_pred: float = 4.5
card_pick: str = "4.5 Alt"
cards_over_prob: float = 0.50
cards_under_prob: float = 0.50
cards_confidence: float = 0.0
handicap_pick: str = ""
handicap_home_prob: float = 0.33
handicap_draw_prob: float = 0.34
handicap_away_prob: float = 0.33
handicap_confidence: float = 0.0
ht_over_05_prob: float = 0.65
ht_under_05_prob: float = 0.35
ht_over_15_prob: float = 0.30
ht_under_15_prob: float = 0.70
ht_ou_pick: str = "İY 0.5 Üst"
ht_ou15_pick: str = "İY 1.5 Alt"
odd_even_pick: str = "Çift"
odd_prob: float = 0.50 # Tek olasılığı
even_prob: float = 0.50 # Çift olasılığı
# === TAVSİYELER (RECOMMENDATIONS) ===
best_bet: Optional[MarketPrediction] = None
recommended_bets: List[MarketPrediction] = field(default_factory=list)
alternative_bet: Optional[MarketPrediction] = None
expert_recommendation: Dict[str, Any] = field(default_factory=dict)
# === DETAILED ANALYSIS ===
analysis_details: Dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> dict:
return {
"match_info": {
"match_id": self.match_id,
"home_team": self.home_team,
"away_team": self.away_team,
"match_date": self.match_date
},
"predictions": {
"match_result": {
"1": round(self.ms_home_prob * 100, 1),
"X": round(self.ms_draw_prob * 100, 1),
"2": round(self.ms_away_prob * 100, 1),
"pick": self.ms_pick,
"confidence": round(self.ms_confidence, 1)
},
"double_chance": {
"1X": round(self.dc_1x_prob * 100, 1),
"X2": round(self.dc_x2_prob * 100, 1),
"12": round(self.dc_12_prob * 100, 1),
"pick": self.dc_pick,
"confidence": round(self.dc_confidence, 1)
},
"over_under": {
"1.5": {
"over": round(self.over_15_prob * 100, 1),
"under": round(self.under_15_prob * 100, 1),
"pick": self.ou15_pick,
"confidence": round(self.ou15_confidence, 1)
},
"2.5": {
"over": round(self.over_25_prob * 100, 1),
"under": round(self.under_25_prob * 100, 1),
"pick": self.ou25_pick,
"confidence": round(self.ou25_confidence, 1)
},
"3.5": {
"over": round(self.over_35_prob * 100, 1),
"under": round(self.under_35_prob * 100, 1),
"pick": self.ou35_pick,
"confidence": round(self.ou35_confidence, 1)
}
},
"btts": {
"yes": round(self.btts_yes_prob * 100, 1),
"no": round(self.btts_no_prob * 100, 1),
"pick": self.btts_pick,
"confidence": round(self.btts_confidence, 1)
},
"first_half": {
"1": round(self.ht_home_prob * 100, 1),
"X": round(self.ht_draw_prob * 100, 1),
"2": round(self.ht_away_prob * 100, 1),
"pick": self.ht_pick,
"confidence": round(self.ht_confidence, 1),
"over_under_05": {
"over": round(self.ht_over_05_prob * 100, 1),
"under": round(self.ht_under_05_prob * 100, 1),
"pick": self.ht_ou_pick
},
"over_under_15": {
"over": round(self.ht_over_15_prob * 100, 1),
"under": round(self.ht_under_15_prob * 100, 1),
"pick": self.ht_ou15_pick
}
},
"scores": {
"predicted_ft": self.predicted_ft_score,
"predicted_ht": self.predicted_ht_score,
"top_5_ft_scores": self.ft_scores_top5
},
"others": {
"handicap": {
"pick": self.handicap_pick,
"confidence": round(self.handicap_confidence, 1),
"home": round(self.handicap_home_prob * 100, 1),
"draw": round(self.handicap_draw_prob * 100, 1),
"away": round(self.handicap_away_prob * 100, 1)
},
"corners": {
"total": round(self.total_corners_pred, 1),
"pick": self.corner_pick
},
"cards": {
"total": round(self.total_cards_pred, 1),
"pick": self.card_pick,
"confidence": round(self.cards_confidence, 1),
"over": round(self.cards_over_prob * 100, 1),
"under": round(self.cards_under_prob * 100, 1)
},
"odd_even": {
"pick": self.odd_even_pick,
"tek": round(self.odd_prob * 100, 1),
"cift": round(self.even_prob * 100, 1)
}
},
"xg": {
"home": round(self.home_xg, 2),
"away": round(self.away_xg, 2),
"total": round(self.total_xg, 2)
}
},
"risk": {
"level": self.risk_level,
"score": round(self.risk_score, 1),
"is_surprise_risk": self.is_surprise_risk,
"surprise_type": self.surprise_type,
"ht_ft_probs": {k: round(v * 100, 1) for k, v in self.ht_ft_probs.items()} if self.ht_ft_probs else {},
"warnings": self.risk_warnings
},
"upset_analysis": {
"score": self.upset_score,
"level": self.upset_level,
"reasons": self.upset_reasons
},
"engine_breakdown": {
"team_engine": round(self.team_confidence, 1),
"player_engine": round(self.player_confidence, 1),
"odds_engine": round(self.odds_confidence, 1),
"referee_engine": round(self.referee_confidence, 1)
},
"recommendations": {
"best_bet": self.best_bet.to_dict() if self.best_bet else None,
"all_recommended": [b.to_dict() for b in self.recommended_bets] if self.recommended_bets else [],
"alternative_bet": self.alternative_bet.to_dict() if self.alternative_bet else None
},
"analysis_details": self.analysis_details
}
+510
View File
@@ -0,0 +1,510 @@
"""
Calibration Backfill Script
============================
Runs V25 model against historical matches (using pre-computed ai_features + odds)
to generate calibration training data, then trains isotonic calibration models.
Usage:
python ai-engine/scripts/backfill_calibration.py
python ai-engine/scripts/backfill_calibration.py --limit 5000
python ai-engine/scripts/backfill_calibration.py --min-samples 50
"""
import argparse
import json
import os
import sys
import time
from typing import Any, Dict, List, Optional, Tuple
import numpy as np
import pandas as pd
import psycopg2
from psycopg2.extras import RealDictCursor
from dotenv import load_dotenv
AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, AI_ENGINE_DIR)
from models.v25_ensemble import V25Predictor
from models.calibration import get_calibrator
load_dotenv()
def _normalize_pick(pick) -> str:
return str(pick or "").strip().casefold()
def resolve_actual(market, pick, score_home, score_away, ht_home, ht_away):
if score_home is None or score_away is None:
return None
market = (market or "").upper()
p = _normalize_pick(pick)
total = score_home + score_away
ht_total = (ht_home or 0) + (ht_away or 0) if ht_home is not None else None
if market == "MS":
if p == "1": return int(score_home > score_away)
if p in {"x", "0"}: return int(score_home == score_away)
if p == "2": return int(score_away > score_home)
return None
if market in {"OU15", "OU25", "OU35"}:
line = {"OU15": 1.5, "OU25": 2.5, "OU35": 3.5}[market]
if "over" in p or "üst" in p or "ust" in p: return int(total > line)
if "under" in p or "alt" in p: return int(total < line)
return None
if market == "BTTS":
both = score_home > 0 and score_away > 0
if "yes" in p or "var" in p: return int(both)
if "no" in p or "yok" in p: return int(not both)
return None
if market == "HT":
if ht_home is None or ht_away is None: return None
if p == "1": return int(ht_home > ht_away)
if p in {"x", "0"}: return int(ht_home == ht_away)
if p == "2": return int(ht_away > ht_home)
return None
if market == "HTFT":
if ht_home is None or ht_away is None or "/" not in p: return None
ht_p, ft_p = p.split("/")
ht_actual = "1" if ht_home > ht_away else "2" if ht_away > ht_home else "x"
ft_actual = "1" if score_home > score_away else "2" if score_away > score_home else "x"
return int(ht_p.strip() == ht_actual and ft_p.strip() == ft_actual)
if market == "DC":
norm = p.replace("-", "").upper()
if norm == "1X": return int(score_home >= score_away)
if norm == "X2": return int(score_away >= score_home)
if norm == "12": return int(score_home != score_away)
return None
return None
def calibrator_key(market, pick):
m = (market or "").upper()
p = _normalize_pick(pick)
if m == "MS":
if p == "1": return "ms_home"
if p in {"x", "0"}: return "ms_draw"
if p == "2": return "ms_away"
return None
if m == "DC": return "dc"
if m == "OU15" and ("over" in p or "üst" in p): return "ou15"
if m == "OU25" and ("over" in p or "üst" in p): return "ou25"
if m == "OU35" and ("over" in p or "üst" in p): return "ou35"
if m == "BTTS" and ("yes" in p or "var" in p): return "btts"
if m == "HT":
if p == "1": return "ht_home"
if p in {"x", "0"}: return "ht_draw"
if p == "2": return "ht_away"
return None
if m == "HTFT": return "ht_ft"
return None
def get_conn():
db_url = os.getenv("DATABASE_URL", "")
if "?schema=" in db_url:
db_url = db_url.split("?schema=")[0]
if not db_url:
raise ValueError("DATABASE_URL not set")
return psycopg2.connect(db_url, cursor_factory=RealDictCursor)
ODD_CAT_MAP = {
"maç sonucu": {"1": "ms_h", "0": "ms_d", "x": "ms_d", "2": "ms_a"},
"1. yarı sonucu": {"1": "ht_ms_h", "0": "ht_ms_d", "x": "ht_ms_d", "2": "ht_ms_a"},
}
ODD_CAT_KEYWORD_MAP = {
"karşılıklı gol": {"var": "btts_y", "yok": "btts_n"},
"0,5 alt/üst": {"alt": "ou05_u", "üst": "ou05_o"},
"1,5 alt/üst": {"alt": "ou15_u", "üst": "ou15_o"},
"2,5 alt/üst": {"alt": "ou25_u", "üst": "ou25_o"},
"3,5 alt/üst": {"alt": "ou35_u", "üst": "ou35_o"},
"ilk yarı 0,5 alt/üst": {"alt": "ht_ou05_u", "üst": "ht_ou05_o"},
"ilk yarı 1,5 alt/üst": {"alt": "ht_ou15_u", "üst": "ht_ou15_o"},
}
def load_matches(cur, limit: int) -> List[Dict]:
cur.execute("""
SELECT m.id, m.score_home, m.score_away,
m.ht_score_home, m.ht_score_away
FROM matches m
JOIN football_ai_features f ON f.match_id = m.id
WHERE m.status = 'FT'
AND m.sport = 'football'
AND m.score_home IS NOT NULL
AND m.score_away IS NOT NULL
ORDER BY m.mst_utc DESC
LIMIT %s
""", (limit,))
return cur.fetchall()
def load_ai_features_batch(cur, match_ids: List[str]) -> Dict[str, Dict]:
if not match_ids:
return {}
ph = ",".join(["%s"] * len(match_ids))
cur.execute(f"""
SELECT match_id,
home_elo AS home_overall_elo,
away_elo AS away_overall_elo,
elo_diff,
home_home_elo, away_away_elo,
home_form_elo, away_form_elo,
(home_form_elo - away_form_elo) AS form_elo_diff,
home_goals_avg_5 AS home_goals_avg,
home_conceded_avg_5 AS home_conceded_avg,
away_goals_avg_5 AS away_goals_avg,
away_conceded_avg_5 AS away_conceded_avg,
home_clean_sheet_rate, away_clean_sheet_rate,
home_scoring_rate, away_scoring_rate,
home_win_streak AS home_winning_streak,
away_win_streak AS away_winning_streak,
0 AS home_unbeaten_streak,
0 AS away_unbeaten_streak,
h2h_total AS h2h_total_matches,
h2h_home_win_rate,
(1.0 - h2h_home_win_rate - 0.33) AS h2h_draw_rate,
h2h_avg_goals,
h2h_btts_rate, h2h_over25_rate,
home_avg_possession, away_avg_possession,
home_avg_shots_on_target, away_avg_shots_on_target,
home_shot_conversion, away_shot_conversion,
0.0 AS home_avg_corners, 0.0 AS away_avg_corners,
implied_home, implied_draw, implied_away,
league_avg_goals,
0.0 AS league_zero_goal_rate,
0.0 AS home_xga, 0.0 AS away_xga,
0.0 AS upset_atmosphere, 0.0 AS upset_motivation,
0.0 AS upset_fatigue, 0.0 AS upset_potential,
referee_home_bias, referee_avg_goals,
referee_avg_cards AS referee_cards_total,
0.0 AS referee_avg_yellow,
0.0 AS referee_experience,
0.0 AS home_momentum_score, 0.0 AS away_momentum_score,
0.0 AS momentum_diff,
0.0 AS home_squad_quality, 0.0 AS away_squad_quality,
0.0 AS squad_diff,
0 AS home_key_players, 0 AS away_key_players,
missing_players_impact AS home_missing_impact,
0.0 AS away_missing_impact,
home_goals_avg_5 AS home_goals_form,
away_goals_avg_5 AS away_goals_form
FROM football_ai_features
WHERE match_id IN ({ph})
""", match_ids)
return {str(row["match_id"]): dict(row) for row in cur.fetchall()}
def load_odds_batch(cur, match_ids: List[str]) -> Dict[str, Dict[str, float]]:
if not match_ids:
return {}
ph = ",".join(["%s"] * len(match_ids))
cur.execute(f"""
SELECT oc.match_id, oc.name AS cat_name,
os.name AS sel_name, os.odd_value
FROM odd_selections os
JOIN odd_categories oc ON os.odd_category_db_id = oc.db_id
WHERE oc.match_id IN ({ph})
""", match_ids)
odds: Dict[str, Dict[str, float]] = {}
for row in cur.fetchall():
mid = str(row["match_id"])
cat = (row["cat_name"] or "").lower().strip()
sel = (row["sel_name"] or "").strip()
val = float(row["odd_value"]) if row["odd_value"] else 0
if val <= 0:
continue
if mid not in odds:
odds[mid] = {}
if cat in ODD_CAT_MAP:
key = ODD_CAT_MAP[cat].get(sel.lower())
if key:
odds[mid][key] = val
else:
for cat_pattern, kw_map in ODD_CAT_KEYWORD_MAP.items():
if cat == cat_pattern:
for keyword, key in kw_map.items():
if keyword in sel.lower():
odds[mid][key] = val
break
return odds
MARKETS_TO_PREDICT = [
("MS", "1", lambda p: p[0]),
("MS", "X", lambda p: p[1]),
("MS", "2", lambda p: p[2]),
("OU25", "Over 2.5", lambda p: p[0]),
("BTTS", "Yes", lambda p: p[0]),
("OU15", "Over 1.5", lambda p: p[0]),
("OU35", "Over 3.5", lambda p: p[0]),
("HT", "1", lambda p: p[0]),
("HT", "X", lambda p: p[1]),
("HT", "2", lambda p: p[2]),
]
def run_backfill(args):
print("=" * 70)
print("CALIBRATION BACKFILL")
print("=" * 70)
conn = get_conn()
cur = conn.cursor(cursor_factory=RealDictCursor)
t0 = time.time()
print(f"Loading matches (limit={args.limit})...")
matches = load_matches(cur, args.limit)
print(f" Found {len(matches)} finished matches with ai_features")
match_ids = [str(m["id"]) for m in matches]
match_map = {str(m["id"]): m for m in matches}
print("Loading ai_features...")
features_map = load_ai_features_batch(cur, match_ids)
print(f" Loaded features for {len(features_map)} matches")
print("Loading odds...")
odds_map = load_odds_batch(cur, match_ids)
print(f" Loaded odds for {len(odds_map)} matches")
print(f"Data loading: {time.time() - t0:.1f}s")
print("\nLoading V25 model...")
predictor = V25Predictor()
predictor.load_models()
feature_cols = predictor.FEATURE_COLS
samples: List[Dict[str, Any]] = []
skipped = 0
processed = 0
print(f"\nRunning predictions on {len(match_ids)} matches...")
t1 = time.time()
for i, mid in enumerate(match_ids):
if mid not in features_map:
skipped += 1
continue
feat_row = features_map[mid]
odds_row = odds_map.get(mid, {})
match_row = match_map[mid]
feat_dict = {}
for col in feature_cols:
if col in feat_row and feat_row[col] is not None:
feat_dict[col] = float(feat_row[col])
elif col.startswith("odds_") and not col.endswith("_present"):
odds_key = col.replace("odds_", "")
feat_dict[col] = float(odds_row.get(odds_key, 0))
elif col.endswith("_present"):
base = col.replace("_present", "")
odds_key = base.replace("odds_", "")
feat_dict[col] = 1.0 if odds_row.get(odds_key, 0) > 0 else 0.0
else:
feat_dict[col] = 0.0
if odds_row.get("ms_h", 0) > 0:
feat_dict["odds_ms_h"] = odds_row["ms_h"]
if odds_row.get("ms_d", 0) > 0:
feat_dict["odds_ms_d"] = odds_row["ms_d"]
if odds_row.get("ms_a", 0) > 0:
feat_dict["odds_ms_a"] = odds_row["ms_a"]
ms_h = feat_dict.get("odds_ms_h", 0)
ms_d = feat_dict.get("odds_ms_d", 0)
ms_a = feat_dict.get("odds_ms_a", 0)
if ms_h > 0 and ms_d > 0 and ms_a > 0:
raw_sum = 1/ms_h + 1/ms_d + 1/ms_a
feat_dict["implied_home"] = (1/ms_h) / raw_sum
feat_dict["implied_draw"] = (1/ms_d) / raw_sum
feat_dict["implied_away"] = (1/ms_a) / raw_sum
sh = match_row["score_home"]
sa = match_row["score_away"]
ht_h = match_row.get("ht_score_home")
ht_a = match_row.get("ht_score_away")
try:
X = pd.DataFrame([{c: feat_dict.get(c, 0.0) for c in feature_cols}])
for market_name, model_key, market_list in [
("ms", "ms", ["MS"]),
("ou25", "ou25", ["OU25"]),
("btts", "btts", ["BTTS"]),
("ou15", "ou15", ["OU15"]),
("ou35", "ou35", ["OU35"]),
("ht_result", "ht_result", ["HT"]),
]:
if model_key not in predictor.models:
continue
probs = predictor.predict_market(model_key, feat_dict)
if probs is None:
continue
if model_key == "ms":
for pick, prob in [("1", probs[0]), ("X", probs[1]), ("2", probs[2])]:
actual = resolve_actual("MS", pick, sh, sa, ht_h, ht_a)
key = calibrator_key("MS", pick)
if actual is not None and key:
samples.append({
"match_id": mid,
"market": "MS",
"pick": pick,
"key": key,
"raw_prob": float(prob),
"actual": int(actual),
})
elif model_key == "ht_result":
if ht_h is None or ht_a is None:
continue
for pick, prob in [("1", probs[0]), ("X", probs[1]), ("2", probs[2])]:
actual = resolve_actual("HT", pick, sh, sa, ht_h, ht_a)
key = calibrator_key("HT", pick)
if actual is not None and key:
samples.append({
"match_id": mid,
"market": "HT",
"pick": pick,
"key": key,
"raw_prob": float(prob),
"actual": int(actual),
})
elif model_key in ("ou25", "ou15", "ou35"):
market_upper = model_key.upper()
over_prob = float(probs[0]) if len(probs) > 0 else 0.5
pick = f"Over"
actual = resolve_actual(market_upper, "Over", sh, sa, ht_h, ht_a)
key = calibrator_key(market_upper, "Over")
if actual is not None and key:
samples.append({
"match_id": mid,
"market": market_upper,
"pick": pick,
"key": key,
"raw_prob": over_prob,
"actual": int(actual),
})
elif model_key == "btts":
yes_prob = float(probs[0]) if len(probs) > 0 else 0.5
actual = resolve_actual("BTTS", "Yes", sh, sa, ht_h, ht_a)
key = calibrator_key("BTTS", "Yes")
if actual is not None and key:
samples.append({
"match_id": mid,
"market": "BTTS",
"pick": "Yes",
"key": key,
"raw_prob": yes_prob,
"actual": int(actual),
})
processed += 1
except Exception as e:
skipped += 1
if skipped <= 5:
print(f" Error on {mid}: {e}")
if (i + 1) % 5000 == 0:
elapsed = time.time() - t1
rate = (i + 1) / elapsed
print(f" Processed {i+1}/{len(match_ids)} ({rate:.0f} matches/s)")
elapsed = time.time() - t1
print(f"\nPrediction complete: {processed} matches, {skipped} skipped, {elapsed:.1f}s")
if not samples:
print("No calibration samples generated!")
cur.close()
conn.close()
return
df = pd.DataFrame(samples)
print(f"\nTotal calibration samples: {len(df)}")
print(f"Unique matches: {df['match_id'].nunique()}")
print(f"\nPer-key counts:")
for key, count in df["key"].value_counts().items():
print(f" {key:<14} {count}")
print(f"\nTraining isotonic calibration models (min_samples={args.min_samples})...")
calibrator = get_calibrator()
results: Dict[str, Any] = {}
keys = sorted(df["key"].unique())
for key in keys:
sub = df[df["key"] == key].copy()
sub = sub.drop_duplicates(subset=["match_id", "key"], keep="first")
sub = sub.dropna(subset=["raw_prob", "actual"])
sub = sub[(sub["raw_prob"] > 0.0) & (sub["raw_prob"] < 1.0)]
n = len(sub)
if n < args.min_samples:
results[key] = {"status": "skipped", "samples": n}
continue
metrics = calibrator.train_calibration(
df=sub,
market=key,
prob_col="raw_prob",
actual_col="actual",
min_samples=args.min_samples,
save=True,
)
results[key] = {
"status": "trained",
"samples": metrics.sample_count,
"brier": round(metrics.brier_score, 4),
"ece": round(metrics.calibration_error, 4),
"mean_predicted": round(metrics.mean_predicted, 4),
"mean_actual": round(metrics.mean_actual, 4),
}
print("\n" + "=" * 70)
print("CALIBRATION RESULTS")
print("=" * 70)
print(f"{'market':<14} {'status':<10} {'n':<8} {'brier':<9} {'ece':<8} {'pred_avg':<9} {'actual_avg'}")
print("-" * 70)
for key, info in sorted(results.items()):
if info["status"] == "trained":
print(
f"{key:<14} {'OK':<10} {info['samples']:<8} "
f"{info['brier']:<9.4f} {info['ece']:<8.4f} "
f"{info['mean_predicted']:<9.4f} {info['mean_actual']}"
)
else:
print(f"{key:<14} {'SKIP':<10} {info['samples']:<8}")
print("=" * 70)
total_time = time.time() - t0
print(f"\nTotal time: {total_time:.1f}s")
print(f"Calibration models saved to: {os.path.join(AI_ENGINE_DIR, 'models', 'calibration')}/")
cur.close()
conn.close()
def main():
parser = argparse.ArgumentParser(description="Backfill calibration from historical matches")
parser.add_argument("--limit", type=int, default=50000,
help="Max matches to process (default: 50000)")
parser.add_argument("--min-samples", type=int, default=100,
help="Min samples per market for calibration (default: 100)")
args = parser.parse_args()
run_backfill(args)
if __name__ == "__main__":
main()
+352
View File
@@ -0,0 +1,352 @@
"""
Tutarsızlık Bazlı Backtest
============================
Modeller arası tutarsızlığı ölçer, tutarlı maçlarda bahis açılsaydı
ROI ne olurdu hesaplar.
Mantık:
- Her maç için market'ler arası çelişkileri tespit et
- Tutarsız maçları filtrele
- Tutarlı maçlarda hit rate ve ROI hesapla
Usage:
python scripts/backtest_consistency.py
"""
import os, sys, json
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.metrics import accuracy_score
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
'data', 'training_data.csv')
MODELS_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
'models', 'v25')
SKIP_COLS = {
'match_id','home_team_id','away_team_id','league_id','mst_utc',
'score_home','score_away','total_goals','ht_score_home','ht_score_away','ht_total_goals',
'label_ms','label_ou05','label_ou15','label_ou25','label_ou35','label_btts',
'label_ht_result','label_ht_ou05','label_ht_ou15','label_ht_ft',
'label_odd_even','label_yellow_cards','label_cards_ou45','label_handicap_ms',
}
def load_model(market: str):
path = os.path.join(MODELS_DIR, f'xgb_v25_{market}.json')
if not os.path.exists(path):
return None
b = xgb.Booster()
b.load_model(path)
return b
def predict_proba(model, X: np.ndarray, feature_cols: list, n_class: int):
dmat = xgb.DMatrix(pd.DataFrame(X, columns=feature_cols))
raw = model.predict(dmat)
if n_class > 2:
return raw.reshape(-1, n_class)
return np.column_stack([1 - raw, raw])
def consistency_score(probs: dict) -> tuple[float, list]:
"""
Market'ler arası tutarsızlığı hesapla.
0 = tamamen tutarlı, 1 = tamamen çelişkili.
Kontrol edilen çelişkiler:
1. OU15 üst yüksek ama OU25 üst de yüksek → ok
OU15 üst yüksek ama OU25 alt yüksek → ÇELISKI (1 gol bekleniyor ama 2.5+ da bekleniyor?)
2. HT_OU05 üst yüksek ama HT sonucu draw yüksek → ÇELISKI
3. OU35 üst yüksek ama BTTS düşük → şüpheli
4. MS home yüksek ama HT away yüksek → çelişkili
"""
conflicts = []
total_weight = 0
total_conflict = 0
# OU tutarlılığı: P(OU25>0.5) <= P(OU15>0.5) matematiksel zorunluluk
ou15_over = probs.get('ou15_over', 0.5)
ou25_over = probs.get('ou25_over', 0.5)
ou35_over = probs.get('ou35_over', 0.5)
# OU hiyerarşisi: ou35 <= ou25 <= ou15 olmalı
if ou25_over > ou15_over + 0.05:
gap = ou25_over - ou15_over
conflicts.append(f'OU25>{ou25_over:.0%} > OU15>{ou15_over:.0%} (imkansız)')
total_conflict += gap * 2
total_weight += 1
if ou35_over > ou25_over + 0.05:
gap = ou35_over - ou25_over
conflicts.append(f'OU35>{ou35_over:.0%} > OU25>{ou25_over:.0%} (imkansız)')
total_conflict += gap * 2
total_weight += 1
# HT_OU05 ve HT sonuç tutarlılığı
ht_ou05_over = probs.get('ht_ou05_over', 0.5)
ht_draw_prob = probs.get('ht_draw', 0.34)
# İlk yarıda gol bekleniyor ama beraberlik de bekleniyor (0-0 draw?)
# HT_OU05 >%70 ama HT draw >%50 → çelişkili (0-0 berabere çok?)
if ht_ou05_over > 0.70 and ht_draw_prob > 0.50:
conflict = min(ht_ou05_over - 0.5, ht_draw_prob - 0.4)
conflicts.append(f'HT_OU05>{ht_ou05_over:.0%} ama HT_Draw>{ht_draw_prob:.0%}')
total_conflict += conflict
total_weight += 1
# HT_OU05 ve HT_OU15 tutarlılığı
ht_ou15_over = probs.get('ht_ou15_over', 0.3)
if ht_ou15_over > ht_ou05_over + 0.05:
gap = ht_ou15_over - ht_ou05_over
conflicts.append(f'HT_OU15>{ht_ou15_over:.0%} > HT_OU05>{ht_ou05_over:.0%} (imkansız)')
total_conflict += gap * 2
total_weight += 1
# MS ve OU tutarlılığı
ms_home = probs.get('ms_home', 0.33)
ms_away = probs.get('ms_away', 0.33)
btts_yes = probs.get('btts_yes', 0.5)
# Tek takım galibiyeti kuvvetli ama BTTS yüksek → şüpheli
dominant = max(ms_home, ms_away)
if dominant > 0.65 and btts_yes > 0.65:
conflict = (dominant - 0.5) * (btts_yes - 0.5)
conflicts.append(f'MS dominant>{dominant:.0%} ama BTTS_Yes>{btts_yes:.0%}')
total_conflict += conflict * 0.5
total_weight += 1
# OU25 ve BTTS tutarlılığı
# BTTS yüksekse en az 2 gol → OU25 üst de yüksek olmalı
if btts_yes > 0.65 and ou25_over < 0.45:
conflict = btts_yes - ou25_over
conflicts.append(f'BTTS_Yes>{btts_yes:.0%} ama OU25>{ou25_over:.0%} düşük')
total_conflict += conflict
total_weight += 1
# OU35 üst yüksek ama BTTS düşük → şüpheli (3+ gol ama tek takım mı?)
if ou35_over > 0.45 and btts_yes < 0.40:
conflict = (ou35_over - 0.35) * (0.5 - btts_yes)
conflicts.append(f'OU35>{ou35_over:.0%} ama BTTS_Yes<{btts_yes:.0%}')
total_conflict += conflict
total_weight += 1
score = min(1.0, total_conflict / max(total_weight * 0.3, 0.1))
return score, conflicts
def main():
print('Loading data...')
df = pd.read_csv(DATA_PATH, low_memory=False)
# Son %20 = test seti (kronolojik)
df = df.sort_values('mst_utc')
n_test = int(len(df) * 0.20)
df_test = df.tail(n_test).copy()
print(f'Test seti: {len(df_test):,} maç')
feature_cols = [c for c in df.columns if c not in SKIP_COLS]
# Modelleri yükle
print('Modeller yükleniyor...')
models = {
'ms': (load_model('ms'), 3),
'ou15': (load_model('ou15'), 2),
'ou25': (load_model('ou25'), 2),
'ou35': (load_model('ou35'), 2),
'btts': (load_model('btts'), 2),
'ht_result':(load_model('ht_result'), 3),
'ht_ou05': (load_model('ht_ou05'), 2),
'ht_ou15': (load_model('ht_ou15'), 2),
}
models = {k: v for k, v in models.items() if v[0] is not None}
print(f'Yüklenen model: {list(models.keys())}')
X = df_test[feature_cols].fillna(0).values
# Tüm tahminleri al
print('Tahminler yapılıyor...')
preds = {}
for mkey, (model, n_class) in models.items():
p = predict_proba(model, X, feature_cols, n_class)
preds[mkey] = p
# Her maç için tutarsızlık skoru ve tahmin kararı
results = []
for i in range(len(df_test)):
row = df_test.iloc[i]
# Olasılıkları topla
probs = {}
if 'ms' in preds:
probs['ms_home'] = preds['ms'][i][0]
probs['ms_draw'] = preds['ms'][i][1]
probs['ms_away'] = preds['ms'][i][2]
if 'ou15' in preds:
probs['ou15_over'] = preds['ou15'][i][1]
if 'ou25' in preds:
probs['ou25_over'] = preds['ou25'][i][1]
if 'ou35' in preds:
probs['ou35_over'] = preds['ou35'][i][1]
if 'btts' in preds:
probs['btts_yes'] = preds['btts'][i][1]
if 'ht_result' in preds:
probs['ht_home'] = preds['ht_result'][i][0]
probs['ht_draw'] = preds['ht_result'][i][1]
probs['ht_away'] = preds['ht_result'][i][2]
if 'ht_ou05' in preds:
probs['ht_ou05_over'] = preds['ht_ou05'][i][1]
if 'ht_ou15' in preds:
probs['ht_ou15_over'] = preds['ht_ou15'][i][1]
c_score, conflicts = consistency_score(probs)
# Gerçek sonuçlar
actual = {
'ms': int(row.get('label_ms', -1)),
'ou15': int(row.get('label_ou15', -1)),
'ou25': int(row.get('label_ou25', -1)),
'ou35': int(row.get('label_ou35', -1)),
'btts': int(row.get('label_btts', -1)),
}
# Her market için tahmin ve doğruluk
market_results = {}
for mkt, label_key in [('ms','ms'),('ou15','ou15'),('ou25','ou25'),
('ou35','ou35'),('btts','btts')]:
if mkt not in preds or actual[label_key] < 0:
continue
pred_class = int(np.argmax(preds[mkt][i]))
correct = int(pred_class == actual[label_key])
# Odds (implied prob → odds = 1/prob)
pred_prob = float(preds[mkt][i][pred_class])
implied_odds = 1 / pred_prob if pred_prob > 0.01 else 10.0
# ROI hesabı: 1 birim bahis, kazanırsa (odds-1) kazanç, kaybederse -1
roi = (implied_odds - 1) * correct - (1 - correct)
market_results[mkt] = {
'pred': pred_class,
'actual': actual[label_key],
'correct': correct,
'prob': pred_prob,
'roi': roi,
}
results.append({
'idx': i,
'consistency_score': c_score,
'conflicts': conflicts,
'probs': probs,
'market_results': market_results,
})
df_results = pd.DataFrame([{
'consistency_score': r['consistency_score'],
'n_conflicts': len(r['conflicts']),
**{f'{m}_correct': r['market_results'].get(m, {}).get('correct', None)
for m in ['ms','ou15','ou25','ou35','btts']},
**{f'{m}_roi': r['market_results'].get(m, {}).get('roi', None)
for m in ['ms','ou15','ou25','ou35','btts']},
} for r in results])
# ── Analiz ──────────────────────────────────────────────────────────
print(f'\n{"="*70}')
print('TUTARSIZLIK ANALİZİ')
print(f'{"="*70}')
thresholds = [0.0, 0.1, 0.2, 0.3, 0.5]
markets = ['ms', 'ou15', 'ou25', 'ou35', 'btts']
for t in thresholds:
mask = df_results['consistency_score'] <= t
n = mask.sum()
if n < 50:
continue
print(f'\n[Tutarsızlık <= {t:.1f}] → {n:,} maç ({n/len(df_results)*100:.0f}%)')
print(f' {"Market":<8} {"HitRate":>8} {"ROI/bahis":>10} {"Toplam ROI":>12}')
print(f' {"-"*42}')
for m in markets:
col_c = f'{m}_correct'
col_r = f'{m}_roi'
if col_c not in df_results.columns:
continue
sub = df_results[mask][col_c].dropna()
roi_sub = df_results[mask][col_r].dropna()
if len(sub) < 20:
continue
hit = sub.mean()
avg_roi = roi_sub.mean()
total_roi = roi_sub.sum()
print(f' {m:<8} {hit:>7.1%} {avg_roi:>+9.3f} {total_roi:>+11.1f}')
# Çelişki türlerine göre breakdown
print(f'\n{"="*70}')
print('EN SIK ÇELIŞKILER')
print(f'{"="*70}')
all_conflicts = [c for r in results for c in r['conflicts']]
from collections import Counter
for conflict, cnt in Counter(all_conflicts).most_common(10):
print(f' {cnt:>5}x {conflict}')
# Tutarsızlık dağılımı
print(f'\n{"="*70}')
print('TUTARSIZLIK DAĞILIMI')
print(f'{"="*70}')
for label, lo, hi in [
('Tamamen tutarlı', 0.0, 0.05),
('Çok tutarlı', 0.05, 0.15),
('Orta', 0.15, 0.30),
('Tutarsız', 0.30, 0.50),
('Çok tutarsız', 0.50, 1.01),
]:
mask = (df_results['consistency_score'] >= lo) & (df_results['consistency_score'] < hi)
n = mask.sum()
ou25_hit = df_results[mask]['ou25_correct'].mean()
ms_hit = df_results[mask]['ms_correct'].mean()
print(f' {label:<20} {n:>6,} maç ({n/len(df_results)*100:>4.0f}%) | '
f'MS={ms_hit:.0%} OU25={ou25_hit:.0%}')
# Raporu kaydet
report = {
'total_test': len(df_results),
'thresholds': {},
}
for t in thresholds:
mask = df_results['consistency_score'] <= t
n = mask.sum()
report['thresholds'][str(t)] = {
'n_matches': int(n),
'pct': round(n/len(df_results)*100, 1),
'markets': {},
}
for m in markets:
col_c = f'{m}_correct'
col_r = f'{m}_roi'
if col_c not in df_results.columns:
continue
sub_c = df_results[mask][col_c].dropna()
sub_r = df_results[mask][col_r].dropna()
if len(sub_c) > 0:
report['thresholds'][str(t)]['markets'][m] = {
'hit_rate': round(float(sub_c.mean()), 4),
'avg_roi': round(float(sub_r.mean()), 4),
'total_roi': round(float(sub_r.sum()), 2),
}
out_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
'reports', 'backtest_consistency.json')
with open(out_path, 'w') as f:
json.dump(report, f, indent=2)
print(f'\nRapor: {out_path}')
if __name__ == '__main__':
main()
+310
View File
@@ -0,0 +1,310 @@
"""
League Model Backtest — Son 100+ Maç
======================================
Her lig için en son 100-200 maçı (eğitim datasından bağımsız, test seti)
lig bazlı modelle tahmin eder ve gerçek sonuçla karşılaştırır.
Usage:
python scripts/backtest_league_models.py
python scripts/backtest_league_models.py --min-matches 150
"""
import os, sys, json, warnings, argparse
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.metrics import accuracy_score
warnings.filterwarnings("ignore")
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from models.league_model import get_league_model_loader, MARKET_META, FILE_TO_SIGNAL
AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
DATA_PATH = os.path.join(AI_ENGINE_DIR, "data", "training_data.csv")
REPORTS_DIR = os.path.join(AI_ENGINE_DIR, "reports")
QL_PATH = os.path.join(os.path.dirname(AI_ENGINE_DIR), "qualified_leagues.json")
# Gerçek label kolonları (CSV'den)
LABEL_COLS = {
"MS": "label_ms",
"OU15": "label_ou15",
"OU25": "label_ou25",
"OU35": "label_ou35",
"BTTS": "label_btts",
"HT": "label_ht_result",
"HT_OU05": "label_ht_ou05",
"HT_OU15": "label_ht_ou15",
"HTFT": "label_ht_ft",
"OE": "label_odd_even",
"CARDS": "label_cards_ou45",
"HCAP": "label_handicap_ms",
}
# Model dosya adı → signal key eşlemesi
SIGNAL_TO_FILE = {v: k for k, v in FILE_TO_SIGNAL.items()}
SKIP_COLS = {
"match_id","home_team_id","away_team_id","league_id","mst_utc",
"score_home","score_away","total_goals","ht_score_home","ht_score_away","ht_total_goals",
"label_ms","label_ou05","label_ou15","label_ou25","label_ou35","label_btts",
"label_ht_result","label_ht_ou05","label_ht_ou15","label_ht_ft",
"label_odd_even","label_yellow_cards","label_cards_ou45","label_handicap_ms",
}
def backtest_league(
league_id: str,
df_league: pd.DataFrame,
feature_cols: list,
league_model,
n_test: int,
) -> dict:
"""Son n_test maçı backtest et, her market için doğruluk döndür."""
df_sorted = df_league.sort_values("mst_utc")
df_test = df_sorted.tail(n_test)
X = df_test[feature_cols].fillna(0)
results = {}
for sig_key, mfile_key in SIGNAL_TO_FILE.items():
label_col = LABEL_COLS.get(sig_key)
if not label_col or label_col not in df_test.columns:
continue
y_true = df_test[label_col].dropna().values
if len(y_true) < 30:
continue
# League-specific model varsa kullan
if league_model and league_model.has_market(mfile_key):
probs_list = []
preds = []
for _, row in df_test.iterrows():
feat = row[feature_cols].fillna(0).to_dict()
probs = league_model.predict_market(mfile_key, feat)
if probs:
best = max(probs, key=probs.__getitem__)
meta = MARKET_META[mfile_key]
labels = meta[1]
pred_idx = labels.index(best)
preds.append(pred_idx)
probs_list.append(list(probs.values()))
if not preds:
continue
y_valid = df_test[label_col].dropna()
if len(preds) != len(y_valid):
min_len = min(len(preds), len(y_valid))
preds = preds[:min_len]
y_valid = y_valid.values[:min_len]
else:
y_valid = y_valid.values
acc = accuracy_score(y_valid, preds)
results[sig_key] = {
"accuracy": round(acc, 4),
"n": len(preds),
"source": "league_specific",
}
return results
def backtest_with_general_v25(
df_test: pd.DataFrame,
feature_cols: list,
) -> dict:
"""Genel V25 modeli ile backtest."""
try:
from models.v25_ensemble import get_v25_predictor
v25 = get_v25_predictor()
if not v25._loaded:
v25.load_models()
except Exception as e:
return {}
X = df_test[feature_cols].fillna(0)
results = {}
mkey_map = {
"MS": ("ms", {"1": 0, "X": 1, "2": 2}),
"OU15": ("ou15", {"Over": 0, "Under": 1}),
"OU25": ("ou25", {"Over": 0, "Under": 1}),
"OU35": ("ou35", {"Over": 0, "Under": 1}),
"BTTS": ("btts", {"Yes": 0, "No": 1}),
}
for sig_key, (mkey, label_to_idx) in mkey_map.items():
label_col = LABEL_COLS.get(sig_key)
if not label_col or label_col not in df_test.columns:
continue
y_true = df_test[label_col].dropna().values
if len(y_true) < 30 or not v25.has_market(mkey):
continue
try:
dmat = xgb.DMatrix(X.values, feature_names=feature_cols)
models_v25 = v25.models.get(mkey, {})
if "xgb" not in models_v25:
continue
raw = models_v25["xgb"].predict(dmat)
num_class = list(MARKET_META.get(mkey, (2,)))[0]
if num_class > 2:
raw = raw.reshape(-1, num_class)
preds = np.argmax(raw, axis=1)
else:
preds = (raw >= 0.5).astype(int)
acc = accuracy_score(y_true, preds)
results[sig_key] = {
"accuracy": round(acc, 4),
"n": len(preds),
"source": "general_v25",
}
except Exception:
continue
return results
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--min-matches", type=int, default=100)
parser.add_argument("--test-size", type=int, default=150,
help="Son kaç maçı test için kullan (min 100)")
args = parser.parse_args()
n_test = max(args.min_matches, args.test_size)
print(f"Loading training data ...")
df = pd.read_csv(DATA_PATH, low_memory=False)
feature_cols = [c for c in df.columns if c not in SKIP_COLS]
print(f" {len(df):,} maç | {len(feature_cols)} feature")
qualified = json.load(open(QL_PATH)) if os.path.exists(QL_PATH) else []
loader = get_league_model_loader()
try:
import psycopg2
from data.db import get_clean_dsn
conn = psycopg2.connect(get_clean_dsn())
cur = conn.cursor()
cur.execute("SELECT id, name FROM leagues WHERE id = ANY(%s)", (qualified,))
league_names = {r[0]: r[1] for r in cur.fetchall()}
conn.close()
except Exception:
league_names = {}
counts = df[df["league_id"].isin(qualified)].groupby("league_id").size()
leagues_to_test = counts[counts >= n_test].index.tolist()
print(f"\nBacktest: {len(leagues_to_test)} lig (>={n_test} maç) | son {n_test} maç kullanılacak\n")
all_results = []
markets_order = ["MS", "OU15", "OU25", "OU35", "BTTS", "HT", "HT_OU05", "HT_OU15", "HTFT", "OE", "CARDS", "HCAP"]
header = f"{'Liga':<35} {'Maç':>5} | " + " | ".join(f"{m:>7}" for m in markets_order)
print(header)
print("-" * len(header))
for league_id in leagues_to_test:
df_league = df[df["league_id"] == league_id].copy()
name = league_names.get(league_id, league_id[:20])
league_model = loader.get(league_id)
if league_model and league_model.models:
# Batch predict from CSV features (fast)
df_test = df_league.sort_values("mst_utc").tail(n_test)
X = df_test[feature_cols].fillna(0)
mkt_results = {}
for mfile_key in list(league_model.models.keys()):
sig_key = FILE_TO_SIGNAL.get(mfile_key)
if not sig_key:
continue
label_col = LABEL_COLS.get(sig_key)
if not label_col or label_col not in df_test.columns:
continue
y_true = df_test[label_col].dropna().values
if len(y_true) < 30:
continue
try:
dmat = xgb.DMatrix(X.values, feature_names=feature_cols)
raw = league_model.models[mfile_key].predict(dmat)
nc = MARKET_META[mfile_key][0]
if nc > 2:
preds = np.argmax(raw.reshape(-1, nc), axis=1)
else:
preds = (raw >= 0.5).astype(int)
acc = accuracy_score(y_true[:len(preds)], preds[:len(y_true)])
mkt_results[sig_key] = {"accuracy": round(float(acc), 4), "n": len(preds), "source": "league_xgb"}
except Exception as e:
mkt_results[sig_key] = {"error": str(e)}
# Fill missing markets with general V25
missing_mkts_df = df_league.sort_values("mst_utc").tail(n_test)
gen_results = backtest_with_general_v25(missing_mkts_df, feature_cols)
for k, v in gen_results.items():
if k not in mkt_results:
mkt_results[k] = {**v, "source": "general_v25_fallback"}
else:
# No league model — use general V25
df_test = df_league.sort_values("mst_utc").tail(n_test)
mkt_results = backtest_with_general_v25(df_test, feature_cols)
for k in mkt_results:
mkt_results[k]["source"] = "general_v25"
n_used = min(n_test, len(df_league))
# Print row
accs = []
for m in markets_order:
r = mkt_results.get(m, {})
if "accuracy" in r:
accs.append(f"{r['accuracy']*100:>6.1f}%")
else:
accs.append(f"{'':>7}")
print(f"{name:<35} {n_used:>5} | " + " | ".join(accs))
all_results.append({
"league_id": league_id,
"league_name": name,
"n_tested": n_used,
"markets": mkt_results,
})
# ── Özet ──────────────────────────────────────────────────────
print("\n" + "=" * len(header))
print("ORTALAMA DOĞRULUK (tüm ligler):")
for m in markets_order:
accs = [r["markets"][m]["accuracy"] for r in all_results if m in r["markets"] and "accuracy" in r["markets"][m]]
if accs:
print(f" {m:<10}: {np.mean(accs)*100:.1f}% (min={min(accs)*100:.1f}% max={max(accs)*100:.1f}% n_leagues={len(accs)})")
# En iyi / en kötü MS ligleri
ms_sorted = sorted(
[(r["league_name"], r["markets"].get("MS",{}).get("accuracy",0), r["n_tested"])
for r in all_results if "MS" in r["markets"] and "accuracy" in r["markets"]["MS"]],
key=lambda x: x[1], reverse=True
)
print("\nEN İYİ MS (Top 10):")
for name, acc, n in ms_sorted[:10]:
print(f" {name:<35} {acc*100:.1f}% ({n} maç)")
print("\nEN KÖTÜ MS (Bottom 10):")
for name, acc, n in ms_sorted[-10:]:
print(f" {name:<35} {acc*100:.1f}% ({n} maç)")
# Save
report = {"generated_at": pd.Timestamp.now().isoformat(), "n_test_per_league": n_test, "results": all_results}
out_path = os.path.join(REPORTS_DIR, "backtest_league_results.json")
with open(out_path, "w") as f:
json.dump(report, f, indent=2)
print(f"\nRapor: {out_path}")
if __name__ == "__main__":
main()
+113 -200
View File
@@ -1,223 +1,136 @@
"""
Real AI Engine Backtest Script
==============================
Uses the ACTUAL models (V20/V25 Ensemble) to predict historical matches.
Usage:
python ai-engine/scripts/backtest_real.py
Gerçek Odds Bazlı Backtest
============================
Model olasılığı vs gerçek bookmaker odds karşılaştırır.
Edge varsa bahis açıldığı varsayılır, gerçek ROI hesaplanır.
"""
import os
import sys
import json
import time
import psycopg2
from psycopg2.extras import RealDictCursor
from datetime import datetime
import os, sys, json
import numpy as np
import pandas as pd
import xgboost as xgb
# Add paths
AI_DIR = os.path.dirname(os.path.abspath(__file__))
ROOT_DIR = os.path.dirname(AI_DIR)
sys.path.insert(0, ROOT_DIR)
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# Fix for Windows path issues in scripts
if "scripts" in os.path.basename(AI_DIR):
ROOT_DIR = os.path.dirname(ROOT_DIR) # One level up if inside scripts folder
DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data', 'training_data.csv')
MODELS_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'models', 'v25')
REPORT_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'reports')
from services.single_match_orchestrator import get_single_match_orchestrator, MatchData
SKIP_COLS = {
'match_id','home_team_id','away_team_id','league_id','mst_utc',
'score_home','score_away','total_goals','ht_score_home','ht_score_away','ht_total_goals',
'label_ms','label_ou05','label_ou15','label_ou25','label_ou35','label_btts',
'label_ht_result','label_ht_ou05','label_ht_ou15','label_ht_ft',
'label_odd_even','label_yellow_cards','label_cards_ou45','label_handicap_ms',
}
def get_clean_dsn() -> str:
return "postgresql://suggestbet:SuGGesT2026SecuRe@localhost:15432/boilerplate_db"
# (model_key, n_class, pred_class, label_col, odds_col, isim)
MARKETS = [
('ms', 3, 0, 'label_ms', 'odds_ms_h', 'MS-Ev'),
('ms', 3, 1, 'label_ms', 'odds_ms_d', 'MS-Ber'),
('ms', 3, 2, 'label_ms', 'odds_ms_a', 'MS-Dep'),
('ou15', 2, 1, 'label_ou15', 'odds_ou15_o', 'OU15-Ust'),
('ou15', 2, 0, 'label_ou15', 'odds_ou15_u', 'OU15-Alt'),
('ou25', 2, 1, 'label_ou25', 'odds_ou25_o', 'OU25-Ust'),
('ou25', 2, 0, 'label_ou25', 'odds_ou25_u', 'OU25-Alt'),
('ou35', 2, 1, 'label_ou35', 'odds_ou35_o', 'OU35-Ust'),
('ou35', 2, 0, 'label_ou35', 'odds_ou35_u', 'OU35-Alt'),
('btts', 2, 1, 'label_btts', 'odds_btts_y', 'BTTS-Var'),
('btts', 2, 0, 'label_btts', 'odds_btts_n', 'BTTS-Yok'),
]
def run_backtest():
print("🚀 REAL AI BACKTEST: Sept 13, 2024 - Top Leagues")
print("🧠 Engine: V30 Ensemble (V20+V25)")
print("="*60)
MIN_ODDS = 1.10
MAX_ODDS = 10.0
# Load Top Leagues
leagues_path = os.path.join(ROOT_DIR, "top_leagues.json")
try:
with open(leagues_path, 'r') as f:
top_leagues = json.load(f)
league_ids = tuple(str(lid) for lid in top_leagues)
print(f"📋 Loaded {len(top_leagues)} top leagues.")
except Exception as e:
print(f"❌ Error loading top_leagues.json: {e}")
return
# Date Range (Sept 13, 2024)
start_dt = datetime(2024, 9, 13, 0, 0, 0)
end_dt = datetime(2024, 9, 13, 23, 59, 59)
start_ts = int(start_dt.timestamp() * 1000)
end_ts = int(end_dt.timestamp() * 1000)
def load_model(market):
path = os.path.join(MODELS_DIR, f'xgb_v25_{market}.json')
if not os.path.exists(path):
return None
b = xgb.Booster()
b.load_model(path)
return b
dsn = get_clean_dsn()
conn = psycopg2.connect(dsn)
cur = conn.cursor(cursor_factory=RealDictCursor)
# Fetch Matches
cur.execute("""
SELECT m.id, m.match_name, m.home_team_id, m.away_team_id,
m.mst_utc, m.league_id, m.status, m.score_home, m.score_away,
t1.name as home_team, t2.name as away_team,
l.name as league_name
FROM matches m
LEFT JOIN teams t1 ON m.home_team_id = t1.id
LEFT JOIN teams t2 ON m.away_team_id = t2.id
LEFT JOIN leagues l ON m.league_id = l.id
WHERE m.mst_utc BETWEEN %s AND %s
AND m.league_id IN %s
AND m.status = 'FT'
ORDER BY m.mst_utc ASC
LIMIT 20 -- Limit to 20 matches to avoid running for hours on a single backtest
""", (start_ts, end_ts, league_ids))
rows = cur.fetchall()
print(f"📊 Found {len(rows)} finished matches. Starting AI Analysis...")
def main():
print('Veri yukleniyor...')
df = pd.read_csv(DATA_PATH, low_memory=False)
df = df.sort_values('mst_utc')
n_test = int(len(df) * 0.20)
df_test = df.tail(n_test).copy().reset_index(drop=True)
print(f'Test seti: {len(df_test):,} mac')
if not rows:
print("⚠️ No matches found for this date.")
cur.close()
conn.close()
return
feature_cols = [c for c in df.columns if c not in SKIP_COLS]
X = df_test[feature_cols].fillna(0).values
# Initialize AI Engine
try:
orchestrator = get_single_match_orchestrator()
print("✅ AI Engine (SingleMatchOrchestrator) Loaded.")
except Exception as e:
print(f"❌ Failed to load AI Engine: {e}")
print("💡 Make sure models are trained/present in ai-engine/models/")
cur.close()
conn.close()
return
# Modelleri yukle
loaded = {}
for mkey, n_class, *_ in MARKETS:
if mkey not in loaded:
m = load_model(mkey)
if m:
loaded[mkey] = (m, n_class)
print(f'Modeller: {list(loaded.keys())}')
# ─── Backtest Loop ───
total_matches_analyzed = 0
bets_skipped = 0
bets_played = 0
bets_won = 0
total_profit = 0.0
# Thresholds matching the NEW Skip Logic
MIN_CONF = 45.0
# Toplu tahmin
raw_preds = {}
for mkey, (model, n_class) in loaded.items():
dmat = xgb.DMatrix(pd.DataFrame(X, columns=feature_cols))
raw = model.predict(dmat)
raw_preds[mkey] = raw.reshape(-1, n_class) if n_class > 2 else np.column_stack([1-raw, raw])
start_time = time.time()
# Backtest
all_results = []
print(f'\n{"Market":<12} {"Edge>=":>7} {"Bahis":>7} {"Hit%":>7} {"AvgOdds":>9} {"ROI/b":>8} {"Toplam":>10}')
print('-' * 65)
for i, row in enumerate(rows):
match_id = str(row['id'])
home_team = row['home_team']
away_team = row['away_team']
home_score = row['score_home']
away_score = row['score_away']
print(f"\n[{i+1}/{len(rows)}] Analyzing: {home_team} vs {away_team} ...")
for mkey, n_class, pred_cls, label_col, odds_col, isim in MARKETS:
if mkey not in raw_preds or label_col not in df_test.columns or odds_col not in df_test.columns:
continue
try:
# 1. AI PREDICTION (Actual Model Call)
prediction = orchestrator.analyze_match(match_id)
if not prediction:
print(f" ⚠️ AI returned no prediction.")
mp = raw_preds[mkey][:, pred_cls]
act = pd.to_numeric(df_test[label_col], errors='coerce').values
bko = pd.to_numeric(df_test[odds_col], errors='coerce').values
valid = (~np.isnan(act) & ~np.isnan(bko) &
(bko >= MIN_ODDS) & (bko <= MAX_ODDS))
mp, act, bko = mp[valid], act[valid].astype(int), bko[valid]
implied = 1.0 / bko
edge = mp - implied
print(f'\n{isim}:')
for min_e in [0.02, 0.03, 0.05, 0.07, 0.10]:
mask = edge >= min_e
n = mask.sum()
if n < 20:
continue
won = (act[mask] == pred_cls).astype(int)
roi = (bko[mask] - 1) * won - (1 - won)
hit = won.mean()
avg_roi = roi.mean()
total = roi.sum()
avg_odds = bko[mask].mean()
sign = '+' if total > 0 else ''
print(f' edge>={min_e:+.0%} n={n:>5,} hit={hit:.1%} odds={avg_odds:.2f} roi/b={avg_roi:+.3f} toplam={sign}{total:.1f}')
all_results.append({'market': isim, 'min_edge': min_e, 'n': n,
'hit': round(hit, 4), 'avg_odds': round(avg_odds, 3),
'avg_roi': round(avg_roi, 4), 'total_roi': round(total, 2)})
total_matches_analyzed += 1
# 2. Extract Main Pick
main_pick = prediction.get("main_pick") or {}
pick_name = main_pick.get("pick")
confidence = main_pick.get("confidence", 0)
odds = main_pick.get("odds", 0)
# En iyi
winners = sorted([r for r in all_results if r['total_roi'] > 0],
key=lambda x: x['avg_roi'], reverse=True)
print(f'\n{"="*65}')
print('KAZANCLI KOMBINASYONLAR (total_roi > 0):')
print(f'{"="*65}')
for r in winners[:20]:
print(f' {r["market"]:<12} edge>={r["min_edge"]:+.0%} | n={r["n"]:>5,} | '
f'hit={r["hit"]:.0%} | roi/b={r["avg_roi"]:+.3f} | toplam={r["total_roi"]:+.1f}')
if not pick_name or not confidence:
print(f" ⚠️ No main pick found in prediction.")
continue
os.makedirs(REPORT_DIR, exist_ok=True)
with open(os.path.join(REPORT_DIR, 'backtest_real_odds.json'), 'w') as f:
json.dump(all_results, f, indent=2)
print(f'\nRapor kaydedildi.')
print(f" 🤖 Pick: {pick_name} | Conf: {confidence}% | Odds: {odds}")
# 3. Apply Skip Logic (New Backtest Logic)
if confidence < MIN_CONF:
print(f" 🚫 SKIPPED (Confidence {confidence}% < {MIN_CONF}%)")
bets_skipped += 1
continue
if odds > 0:
implied_prob = 1.0 / odds
my_prob = confidence / 100.0
if my_prob - implied_prob < -0.03: # Negative edge
print(f" 🚫 SKIPPED (Negative Edge)")
bets_skipped += 1
continue
# 4. Bet Played
bets_played += 1
print(f" 🎲 BET PLAYED: {pick_name} @ {odds}")
# 5. Resolve Bet
won = False
# Basic resolution logic (Need to parse pick_name like "1", "X", "2", "2.5 Üst", etc.)
pick_clean = str(pick_name).upper()
# MS
if pick_clean in ["1", "MS 1"] and home_score > away_score: won = True
elif pick_clean in ["X", "MS X"] and home_score == away_score: won = True
elif pick_clean in ["2", "MS 2"] and away_score > home_score: won = True
# OU25
elif "ÜST" in pick_clean or "OVER" in pick_clean:
if (home_score + away_score) > 2.5: won = True
elif "ALT" in pick_clean or "UNDER" in pick_clean:
if (home_score + away_score) < 2.5: won = True
# BTTS
elif "VAR" in pick_clean and home_score > 0 and away_score > 0: won = True
elif "YOK" in pick_clean and (home_score == 0 or away_score == 0): won = True
if won:
bets_won += 1
profit = odds - 1.0
print(f" ✅ WON! (+{profit:.2f} units)")
else:
profit = -1.0
print(f" ❌ LOST! (-1.00 units)")
total_profit += profit
except Exception as e:
print(f" 💥 Error during analysis: {e}")
elapsed = time.time() - start_time
# ─── FINAL REPORT ───
print("\n" + "="*60)
print("📈 REAL AI BACKTEST RESULTS")
print(f"🕒 Time taken: {elapsed:.1f} seconds")
print("="*60)
print(f"📊 Matches Analyzed: {total_matches_analyzed}")
print(f"🚫 Bets SKIPPED: {bets_skipped}")
print(f"✅ Bets PLAYED: {bets_played}")
if bets_played > 0:
win_rate = (bets_won / bets_played) * 100
roi = (total_profit / bets_played) * 100
yield_val = total_profit # Net Units
print(f"🏆 Bets Won: {bets_won}")
print(f"💀 Bets Lost: {bets_played - bets_won}")
print("-" * 40)
print(f" Win Rate: {win_rate:.2f}%")
print(f"💰 Total Profit (Units): {total_profit:.2f}")
print(f"📊 ROI: {roi:.2f}%")
if roi > 0:
print("🟢 STRATEGY IS PROFITABLE!")
else:
print("🔴 STRATEGY IS LOSING")
else:
print("⚠️ No bets were played. All were skipped or failed.")
cur.close()
conn.close()
if __name__ == "__main__":
run_backtest()
if __name__ == '__main__':
main()
+176 -15
View File
@@ -128,7 +128,40 @@ FEATURE_COLS = [
"home_top_scorer_form", "away_top_scorer_form",
"home_avg_player_exp", "away_avg_player_exp",
"home_goals_diversity", "away_goals_diversity",
# V27 H2H Expanded (4)
"h2h_home_goals_avg", "h2h_away_goals_avg",
"h2h_recent_trend", "h2h_venue_advantage",
# V27 Rolling Stats (13)
"home_rolling5_goals", "home_rolling5_conceded",
"home_rolling10_goals", "home_rolling10_conceded",
"home_rolling20_goals", "home_rolling20_conceded",
"away_rolling5_goals", "away_rolling5_conceded",
"away_rolling10_goals", "away_rolling10_conceded",
"home_rolling5_cs", "away_rolling5_cs",
# V27 Venue Stats (4)
"home_venue_goals", "home_venue_conceded",
"away_venue_goals", "away_venue_conceded",
# V27 Goal Trend (2)
"home_goal_trend", "away_goal_trend",
# V27 Calendar (5)
"home_days_rest", "away_days_rest",
"match_month", "is_season_start", "is_season_end",
# V27 Interaction (6)
"attack_vs_defense_home", "attack_vs_defense_away",
"xg_diff", "form_momentum_interaction",
"elo_form_consistency", "upset_x_elo_gap",
# V27 League Expanded (5)
"league_home_win_rate", "league_draw_rate",
"league_btts_rate", "league_ou25_rate",
"league_reliability_score",
# Labels
"score_home", "score_away", "total_goals",
"ht_score_home", "ht_score_away", "ht_total_goals",
@@ -296,6 +329,10 @@ class BatchDataLoader:
SELECT league_id,
AVG(score_home + score_away) as avg_goals,
AVG(CASE WHEN score_home = 0 AND score_away = 0 THEN 1.0 ELSE 0.0 END) as zero_rate,
AVG(CASE WHEN score_home > score_away THEN 1.0 ELSE 0.0 END) as home_win_rate,
AVG(CASE WHEN score_home = score_away THEN 1.0 ELSE 0.0 END) as draw_rate,
AVG(CASE WHEN score_home > 0 AND score_away > 0 THEN 1.0 ELSE 0.0 END) as btts_rate,
AVG(CASE WHEN score_home + score_away > 2.5 THEN 1.0 ELSE 0.0 END) as ou25_rate,
COUNT(*) as match_count
FROM matches
WHERE status = 'FT'
@@ -304,12 +341,17 @@ class BatchDataLoader:
AND league_id IN ({ph})
GROUP BY league_id
""", self.top_league_ids)
for league_id, avg_goals, zero_rate, cnt in self.cur.fetchall():
for row in self.cur.fetchall():
league_id, avg_goals, zero_rate, home_win_rate, draw_rate, btts_rate, ou25_rate, cnt = row
self.league_stats_cache[league_id] = {
"avg_goals": float(avg_goals) if avg_goals else 2.5,
"zero_rate": float(zero_rate) if zero_rate else 0.07,
"match_count": cnt
"home_win_rate": float(home_win_rate) if home_win_rate else 0.45,
"draw_rate": float(draw_rate) if draw_rate else 0.25,
"btts_rate": float(btts_rate) if btts_rate else 0.50,
"ou25_rate": float(ou25_rate) if ou25_rate else 0.50,
"match_count": cnt,
}
def _load_team_history(self):
@@ -666,6 +708,9 @@ class FeatureExtractor:
print(f"\n🔄 Extracting features for {total} matches...", flush=True)
_last_print = t_start
_PRINT_INTERVAL = 60 # her dakika bir ilerleme
# Process chronologically — ELO grows as we go
for i, m in enumerate(matches):
(
@@ -683,17 +728,25 @@ class FeatureExtractor:
league_name,
) = m
if i % 100 == 0 and i > 0:
elapsed = time.time() - t_start
rate = i / elapsed # matches per second
now = time.time()
if now - _last_print >= _PRINT_INTERVAL and i > 0:
elapsed = now - t_start
rate = i / elapsed
remaining = (total - i) / rate if rate > 0 else 0
pct = i / total * 100
pct = i / total * 100
eta_h = int(remaining // 3600)
eta_m = int((remaining % 3600) // 60)
eta_s = int(remaining % 60)
eta_str = (f"{eta_h}s {eta_m}dk" if eta_h else f"{eta_m}dk {eta_s}s")
print(
f" [{i}/{total}] ({pct:.0f}%) | {rate:.1f} maç/s | "
f"ETA: {remaining/60:.1f} dk | skipped: {skipped} | "
f"dq_rejected: {dq_rejected}",
f" [{i:>6}/{total}] %{pct:>4.1f} | "
f"{rate:.1f} maç/s | "
f"bitti: {len(rows):,} | "
f"atlanan: {skipped+dq_rejected} | "
f"ETA: {eta_str}",
flush=True,
)
_last_print = now
row = self._extract_one(
mid, hid, aid, sh, sa, hth, hta, mst, lid,
@@ -882,7 +935,10 @@ class FeatureExtractor:
}
# === LEAGUE FEATURES ===
league = self.loader.league_stats_cache.get(lid, {"avg_goals": 2.5, "zero_rate": 0.07})
league = self.loader.league_stats_cache.get(lid, {
"avg_goals": 2.5, "zero_rate": 0.07, "home_win_rate": 0.45,
"draw_rate": 0.25, "btts_rate": 0.50, "ou25_rate": 0.50, "match_count": 0,
})
league_features = {
"league_avg_goals": league["avg_goals"],
"league_zero_goal_rate": league["zero_rate"],
@@ -953,6 +1009,11 @@ class FeatureExtractor:
home_goals_form = home_sq.get('goals_form', 0)
away_goals_form = away_sq.get('goals_form', 0)
# === V27 ROLLING / VENUE / CALENDAR FEATURES ===
v27 = self._compute_v27_features(hid, aid, mst, elo_features, form_features,
home_momentum_score, away_momentum_score,
upset_feats, h2h_features, league)
# === ASSEMBLE ROW ===
row = {
"match_id": mid,
@@ -960,13 +1021,13 @@ class FeatureExtractor:
"away_team_id": aid,
"league_id": lid,
"mst_utc": mst,
**elo_features,
**form_features,
**h2h_features,
**stats_features,
**odds_features,
"home_xga": form_features["home_conceded_avg"],
"away_xga": form_features["away_conceded_avg"],
**league_features,
@@ -1007,7 +1068,10 @@ class FeatureExtractor:
"away_avg_player_exp": away_sq.get('avg_player_exp', 0.0),
"home_goals_diversity": home_sq.get('goals_diversity', 0.0),
"away_goals_diversity": away_sq.get('goals_diversity', 0.0),
# V27 Features
**v27,
# Labels
"score_home": sh,
"score_away": sa,
@@ -1033,6 +1097,103 @@ class FeatureExtractor:
return row
def _compute_v27_features(self, hid, aid, mst, elo_features, form_features,
home_momentum, away_momentum, upset_feats, h2h_features, league):
"""Compute V27 rolling, venue, calendar, interaction features from pre-loaded data."""
home_history = self.loader.team_matches.get(hid, [])
away_history = self.loader.team_matches.get(aid, [])
def _rolling(history, n):
recent = [m for m in history if m[0] < mst][-n:]
if not recent:
return 1.3, 1.1, 0.0
goals = sum(m[2] for m in recent) / len(recent)
conceded = sum(m[3] for m in recent) / len(recent)
cs = sum(1 for m in recent if m[3] == 0) / len(recent)
return round(goals, 3), round(conceded, 3), round(cs, 3)
def _venue(history, is_home):
recent = [m for m in history if m[0] < mst and m[1] == is_home][-10:]
if not recent:
return 1.3, 1.1
goals = sum(m[2] for m in recent) / len(recent)
conceded = sum(m[3] for m in recent) / len(recent)
return round(goals, 3), round(conceded, 3)
def _days_rest(history):
prior = [m[0] for m in history if m[0] < mst]
if not prior:
return 7.0
last = prior[-1]
return round(min((mst - last) / 86400000.0, 30.0), 1)
h5g, h5c, h5cs = _rolling(home_history, 5)
h10g, h10c, _ = _rolling(home_history, 10)
h20g, h20c, _ = _rolling(home_history, 20)
a5g, a5c, a5cs = _rolling(away_history, 5)
a10g, a10c, _ = _rolling(away_history, 10)
hvg, hvc = _venue(home_history, True)
avg, avc = _venue(away_history, False)
home_rest = _days_rest(home_history)
away_rest = _days_rest(away_history)
import datetime
match_dt = datetime.datetime.utcfromtimestamp(mst / 1000)
match_month = match_dt.month
elo_diff = elo_features["elo_diff"]
form_elo_diff = elo_features["form_elo_diff"]
mom_diff = home_momentum - away_momentum
home_conceded = form_features["home_conceded_avg"]
away_conceded = form_features["away_conceded_avg"]
home_goals = form_features["home_goals_avg"]
away_goals = form_features["away_goals_avg"]
upset_potential = upset_feats.get("upset_potential", 0.0)
h2h_prior = [m for m in home_history if m[0] < mst and m[4] == aid]
h2h_home_goals_avg = sum(m[2] for m in h2h_prior) / len(h2h_prior) if h2h_prior else 1.3
h2h_away_goals_avg = sum(m[3] for m in h2h_prior) / len(h2h_prior) if h2h_prior else 1.1
recent_h2h = h2h_prior[-3:]
h2h_recent_trend = sum(1 if m[2] > m[3] else -1 if m[2] < m[3] else 0 for m in recent_h2h) / max(len(recent_h2h), 1)
venue_h2h = [m for m in h2h_prior if m[1]]
h2h_venue_advantage = sum(1 if m[2] > m[3] else 0 for m in venue_h2h) / max(len(venue_h2h), 1) if venue_h2h else 0.5
league_count = league.get("match_count", 0)
return {
"h2h_home_goals_avg": round(h2h_home_goals_avg, 3),
"h2h_away_goals_avg": round(h2h_away_goals_avg, 3),
"h2h_recent_trend": round(h2h_recent_trend, 3),
"h2h_venue_advantage": round(h2h_venue_advantage, 3),
"home_rolling5_goals": h5g, "home_rolling5_conceded": h5c,
"home_rolling10_goals": h10g, "home_rolling10_conceded": h10c,
"home_rolling20_goals": h20g, "home_rolling20_conceded": h20c,
"away_rolling5_goals": a5g, "away_rolling5_conceded": a5c,
"away_rolling10_goals": a10g, "away_rolling10_conceded": a10c,
"home_rolling5_cs": h5cs, "away_rolling5_cs": a5cs,
"home_venue_goals": hvg, "home_venue_conceded": hvc,
"away_venue_goals": avg, "away_venue_conceded": avc,
"home_goal_trend": round(h5g - h10g, 3),
"away_goal_trend": round(a5g - a10g, 3),
"home_days_rest": home_rest, "away_days_rest": away_rest,
"match_month": float(match_month),
"is_season_start": 1.0 if match_month in (7, 8, 9) else 0.0,
"is_season_end": 1.0 if match_month in (5, 6) else 0.0,
"attack_vs_defense_home": round(home_goals - away_conceded, 3),
"attack_vs_defense_away": round(away_goals - home_conceded, 3),
"xg_diff": round(home_conceded - away_conceded, 3),
"form_momentum_interaction": round(mom_diff * form_elo_diff / 1000.0, 4),
"elo_form_consistency": round(1.0 - abs(elo_diff - form_elo_diff) / max(abs(elo_diff), 100.0), 4),
"upset_x_elo_gap": round(upset_potential * abs(elo_diff) / 500.0, 4),
"league_home_win_rate": league.get("home_win_rate", 0.45),
"league_draw_rate": league.get("draw_rate", 0.25),
"league_btts_rate": league.get("btts_rate", 0.50),
"league_ou25_rate": league.get("ou25_rate", 0.50),
"league_reliability_score": min(1.0, league_count / 500.0) if league_count else 0.3,
}
def _validate_row_quality(
self,
row: dict,
@@ -0,0 +1,166 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": ["# Training Data Extraction — Google Colab\n", "SSH tunnel ile sunucuya bağlanır, DB'den 270K+ maç çeker, Drive'a kaydeder.\n"]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 1. Gerekli paketler\n",
"!pip install sshtunnel psycopg2-binary pandas numpy -q\n",
"print('Paketler hazır')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 2. Drive bağla\n",
"from google.colab import drive\n",
"drive.mount('/content/drive')\n",
"import os\n",
"DRIVE_DIR = '/content/drive/MyDrive/iddaai'\n",
"os.makedirs(DRIVE_DIR, exist_ok=True)\n",
"print('Drive hazır:', DRIVE_DIR)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 3. SSH private key upload\n",
"# Mac'te terminalde şunu çalıştır, çıktıyı kopyala:\n",
"# cat ~/.ssh/id_ed25519\n",
"# Aşağıya yapıştır (BEGIN ve END satırları dahil)\n",
"\n",
"SSH_PRIVATE_KEY = \"\"\"-----BEGIN OPENSSH PRIVATE KEY-----\n",
"BURAYA_KEY_ICERIGINI_YAPISTIR\n",
"-----END OPENSSH PRIVATE KEY-----\"\"\"\n",
"\n",
"# Key dosyasına yaz\n",
"key_path = '/root/.ssh/id_ed25519'\n",
"os.makedirs('/root/.ssh', exist_ok=True)\n",
"with open(key_path, 'w') as f:\n",
" f.write(SSH_PRIVATE_KEY.strip() + '\\n')\n",
"os.chmod(key_path, 0o600)\n",
"print('SSH key hazır')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 4. SSH Tunnel aç + DB bağlantısını test et\n",
"from sshtunnel import SSHTunnelForwarder\n",
"import psycopg2\n",
"\n",
"tunnel = SSHTunnelForwarder(\n",
" ('95.70.252.214', 2222),\n",
" ssh_username='haruncan',\n",
" ssh_pkey=key_path,\n",
" remote_bind_address=('localhost', 5432),\n",
" local_bind_address=('localhost', 15432),\n",
")\n",
"tunnel.start()\n",
"print(f'Tunnel açık: localhost:{tunnel.local_bind_port}')\n",
"\n",
"conn = psycopg2.connect(\n",
" host='localhost',\n",
" port=15432,\n",
" dbname='iddaai_db',\n",
" user='iddaai_user',\n",
" password='IddaA1_S4crET!',\n",
")\n",
"cur = conn.cursor()\n",
"cur.execute(\"SELECT COUNT(*) FROM matches WHERE status='FT' AND score_home IS NOT NULL\")\n",
"print(f'DB bağlantısı OK — FT maç sayısı: {cur.fetchone()[0]:,}')\n",
"conn.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 5. extract_training_data.py kodunu Drive'dan veya doğrudan çalıştır\n",
"# Önce repo'yu Drive'a kopyala (yoksa)\n",
"import subprocess\n",
"\n",
"REPO_DIR = f'{DRIVE_DIR}/ai-engine'\n",
"SCRIPT = f'{REPO_DIR}/scripts/extract_training_data.py'\n",
"\n",
"if not os.path.exists(SCRIPT):\n",
" print('Script bulunamadı — ai-engine klasörünü Drive a yükle:')\n",
" print(' Yerel makinede: cp -r /Users/piton/Documents/GitHub/iddaai/iddaai-be/ai-engine ~/Google\\ Drive/MyDrive/iddaai/')\n",
"else:\n",
" print('Script hazır:', SCRIPT)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 6. Extraction'ı çalıştır\n",
"import sys, os\n",
"sys.path.insert(0, REPO_DIR)\n",
"\n",
"# DB URL'i tunnel üzerinden ayarla\n",
"os.environ['DATABASE_URL'] = 'postgresql://iddaai_user:IddaA1_S4crET!@localhost:15432/iddaai_db'\n",
"\n",
"# Output CSV'yi Drive'a kaydet\n",
"OUTPUT_CSV = f'{DRIVE_DIR}/training_data_full.csv'\n",
"\n",
"# Script'i import et ve main'i çalıştır\n",
"import importlib.util\n",
"spec = importlib.util.spec_from_file_location('extract', SCRIPT)\n",
"mod = importlib.util.load_from_spec(spec)\n",
"spec.loader.exec_module(mod)\n",
"\n",
"# OUTPUT_CSV'yi override et\n",
"mod.OUTPUT_CSV = OUTPUT_CSV\n",
"mod.TOP_LEAGUES_PATH = f'{DRIVE_DIR}/qualified_leagues.json'\n",
"\n",
"mod.main()\n",
"print(f'\\nKaydedildi: {OUTPUT_CSV}')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 7. Tunnel kapat\n",
"tunnel.stop()\n",
"print('Tunnel kapatıldı')\n",
"\n",
"# Dosya boyutunu kontrol et\n",
"size_mb = os.path.getsize(OUTPUT_CSV) / 1024 / 1024\n",
"import pandas as pd\n",
"df = pd.read_csv(OUTPUT_CSV, nrows=5)\n",
"print(f'CSV: {size_mb:.1f} MB')\n",
"print(f'Kolonlar: {len(df.columns)}')"
]
}
],
"metadata": {
"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"},
"language_info": {"name": "python", "version": "3.10.0"}
},
"nbformat": 4,
"nbformat_minor": 4
}
@@ -0,0 +1,806 @@
"""
V25 Backtest + Calibration Training Script
==========================================
Runs a full backtest on historical football matches, measures model accuracy
by market / confidence band / league, and trains isotonic calibration models
for MS, OU15, OU25, and BTTS markets.
Usage:
venv/bin/python scripts/run_backtest_and_calibrate.py
"""
from __future__ import annotations
import os
import sys
import json
import pickle
import time
from collections import defaultdict
from datetime import datetime
from typing import Dict, List, Optional, Tuple, Any
import numpy as np
import pandas as pd
import psycopg2
from psycopg2.extras import RealDictCursor
# ---------------------------------------------------------------------------
# Path setup — works whether executed from ai-engine/ or project root
# ---------------------------------------------------------------------------
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
AI_ENGINE_DIR = os.path.dirname(SCRIPT_DIR)
sys.path.insert(0, AI_ENGINE_DIR)
from data.db import get_clean_dsn
from models.calibration import Calibrator
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
QUALIFIED_LEAGUES_PATH = os.path.join(AI_ENGINE_DIR, "..", "qualified_leagues.json")
CALIBRATION_DIR = os.path.join(AI_ENGINE_DIR, "models", "calibration")
REPORTS_DIR = os.path.join(AI_ENGINE_DIR, "reports")
MAX_MATCHES = 3000 # target upper bound
PROGRESS_INTERVAL = 100 # print every N matches
os.makedirs(CALIBRATION_DIR, exist_ok=True)
os.makedirs(REPORTS_DIR, exist_ok=True)
# Mapping: Turkish category name -> internal feature key
ODDS_CATEGORY_MAP = {
"Maç Sonucu": {
"1": "odds_ms_h",
"X": "odds_ms_d",
"2": "odds_ms_a",
},
"1,5 Alt/Üst": {
"Üst": "odds_ou15_o",
"Alt": "odds_ou15_u",
},
"2,5 Alt/Üst": {
"Üst": "odds_ou25_o",
"Alt": "odds_ou25_u",
},
"3,5 Alt/Üst": {
"Üst": "odds_ou35_o",
"Alt": "odds_ou35_u",
},
"0,5 Alt/Üst": {
"Üst": "odds_ou05_o",
"Alt": "odds_ou05_u",
},
"Karşılıklı Gol": {
"Var": "odds_btts_y",
"Yok": "odds_btts_n",
},
"1. Yarı Sonucu": {
"1": "odds_ht_ms_h",
"X": "odds_ht_ms_d",
"2": "odds_ht_ms_a",
},
"1. Yarı 0,5 Alt/Üst": {
"Üst": "odds_ht_ou05_o",
"Alt": "odds_ht_ou05_u",
},
"1. Yarı 1,5 Alt/Üst": {
"Üst": "odds_ht_ou15_o",
"Alt": "odds_ht_ou15_u",
},
}
# Top 5 leagues by name for individual breakdown (will be matched by league_id)
TOP5_LEAGUE_NAMES = {
"Premier League",
"La Liga",
"Bundesliga",
"Serie A",
"Ligue 1",
}
# ============================================================================
# STEP 1 — Load qualified league IDs
# ============================================================================
def load_qualified_leagues() -> List[str]:
path = os.path.abspath(QUALIFIED_LEAGUES_PATH)
with open(path, "r") as f:
leagues = json.load(f)
print(f"[Step 1] Loaded {len(leagues)} qualified league IDs.")
return [str(lid) for lid in leagues]
# ============================================================================
# STEP 1b — Fetch matches + pre-computed features in batch
# ============================================================================
def fetch_matches(conn, league_ids: List[str]) -> pd.DataFrame:
"""
Single batch query: matches + football_ai_features + league name.
Only returns matches that also have odds data (inner join on odd_categories).
Returns a DataFrame with one row per match.
"""
print("[Step 1b] Fetching matches with pre-computed features and odds ...")
cur = conn.cursor(cursor_factory=RealDictCursor)
cur.execute(
"""
SELECT
m.id AS match_id,
m.league_id,
l.name AS league_name,
m.score_home,
m.score_away,
m.mst_utc,
-- From football_ai_features
f.home_elo AS home_overall_elo,
f.away_elo AS away_overall_elo,
f.elo_diff,
f.home_home_elo,
f.away_away_elo,
f.home_form_elo,
f.away_form_elo,
f.home_goals_avg_5 AS home_goals_avg,
f.away_goals_avg_5 AS away_goals_avg,
f.home_conceded_avg_5 AS home_conceded_avg,
f.away_conceded_avg_5 AS away_conceded_avg,
f.home_clean_sheet_rate,
f.away_clean_sheet_rate,
f.home_scoring_rate,
f.away_scoring_rate,
f.home_win_streak AS home_winning_streak,
f.away_win_streak AS away_winning_streak,
f.home_avg_possession,
f.away_avg_possession,
f.home_avg_shots_on_target,
f.away_avg_shots_on_target,
f.home_shot_conversion,
f.away_shot_conversion,
f.home_avg_corners,
f.away_avg_corners,
f.h2h_total AS h2h_total_matches,
f.h2h_home_win_rate,
f.h2h_avg_goals,
f.h2h_over25_rate,
f.h2h_btts_rate,
f.league_avg_goals,
f.league_home_win_pct AS league_home_win_rate,
f.league_over25_pct AS league_ou25_rate,
f.referee_avg_cards AS referee_cards_total,
f.referee_home_bias,
f.referee_avg_goals,
f.missing_players_impact AS home_missing_impact,
f.implied_home,
f.implied_draw,
f.implied_away
FROM matches m
JOIN football_ai_features f ON f.match_id = m.id
-- Only matches that have odds data
JOIN (SELECT DISTINCT match_id FROM odd_categories WHERE sport = 'football') oc
ON oc.match_id = m.id
LEFT JOIN leagues l ON l.id = m.league_id
WHERE m.status = 'FT'
AND m.score_home IS NOT NULL
AND m.score_away IS NOT NULL
AND m.league_id = ANY(%s)
ORDER BY m.mst_utc DESC
LIMIT %s
""",
(league_ids, MAX_MATCHES),
)
rows = cur.fetchall()
cur.close()
df = pd.DataFrame([dict(r) for r in rows])
print(f"[Step 1b] Fetched {len(df)} matches with features + odds coverage.")
return df
# ============================================================================
# STEP 1c — Fetch all odds for the matched match IDs in one query
# ============================================================================
def fetch_odds_bulk(conn, match_ids: List[str]) -> Dict[str, Dict[str, float]]:
"""
Returns {match_id: {feature_key: odd_value, ...}} for all known categories.
"""
print(f"[Step 1c] Fetching odds for {len(match_ids)} matches ...")
cur = conn.cursor(cursor_factory=RealDictCursor)
# Build a set of known category names
known_cats = tuple(ODDS_CATEGORY_MAP.keys())
cur.execute(
"""
SELECT oc.match_id, oc.name AS cat_name, os.name AS sel_name, os.odd_value
FROM odd_categories oc
JOIN odd_selections os ON os.odd_category_db_id = oc.db_id
WHERE oc.match_id = ANY(%s)
AND oc.name = ANY(%s)
AND oc.sport = 'football'
AND os.odd_value IS NOT NULL
AND os.odd_value ~ '^[0-9]+(\.[0-9]+)?$'
""",
(match_ids, list(known_cats)),
)
rows = cur.fetchall()
cur.close()
# Build nested dict: match_id -> {feature_key -> value}
odds_map: Dict[str, Dict[str, float]] = defaultdict(dict)
for r in rows:
cat_name = r["cat_name"]
sel_name = r["sel_name"]
if cat_name in ODDS_CATEGORY_MAP and sel_name in ODDS_CATEGORY_MAP[cat_name]:
feat_key = ODDS_CATEGORY_MAP[cat_name][sel_name]
try:
val = float(r["odd_value"])
if val > 1.0:
# Keep first encountered (most recent or primary bookmaker)
if feat_key not in odds_map[r["match_id"]]:
odds_map[r["match_id"]][feat_key] = val
except (TypeError, ValueError):
pass
print(f"[Step 1c] Odds loaded for {len(odds_map)} matches.")
return dict(odds_map)
# ============================================================================
# STEP 2 — Build 114-feature vector per match
# ============================================================================
def load_feature_cols() -> List[str]:
path = os.path.join(AI_ENGINE_DIR, "models", "v25", "feature_cols.json")
with open(path, "r") as f:
return json.load(f)
def build_feature_vector(
match_row: pd.Series,
odds: Dict[str, float],
feature_cols: List[str],
) -> Dict[str, float]:
"""
Construct the full feature dict for one match.
Falls back to 0.0 for any missing feature.
"""
feat: Dict[str, float] = {col: 0.0 for col in feature_cols}
# ---- Direct columns from match row ----
direct_map = {
"home_overall_elo": "home_overall_elo",
"away_overall_elo": "away_overall_elo",
"elo_diff": "elo_diff",
"home_home_elo": "home_home_elo",
"away_away_elo": "away_away_elo",
"home_form_elo": "home_form_elo",
"away_form_elo": "away_form_elo",
"home_goals_avg": "home_goals_avg",
"away_goals_avg": "away_goals_avg",
"home_conceded_avg": "home_conceded_avg",
"away_conceded_avg": "away_conceded_avg",
"home_clean_sheet_rate": "home_clean_sheet_rate",
"away_clean_sheet_rate": "away_clean_sheet_rate",
"home_scoring_rate": "home_scoring_rate",
"away_scoring_rate": "away_scoring_rate",
"home_winning_streak": "home_winning_streak",
"away_winning_streak": "away_winning_streak",
"home_avg_possession": "home_avg_possession",
"away_avg_possession": "away_avg_possession",
"home_avg_shots_on_target": "home_avg_shots_on_target",
"away_avg_shots_on_target": "away_avg_shots_on_target",
"home_shot_conversion": "home_shot_conversion",
"away_shot_conversion": "away_shot_conversion",
"home_avg_corners": "home_avg_corners",
"away_avg_corners": "away_avg_corners",
"h2h_total_matches": "h2h_total_matches",
"h2h_home_win_rate": "h2h_home_win_rate",
"h2h_avg_goals": "h2h_avg_goals",
"h2h_over25_rate": "h2h_over25_rate",
"h2h_btts_rate": "h2h_btts_rate",
"league_avg_goals": "league_avg_goals",
"league_home_win_rate": "league_home_win_rate",
"league_ou25_rate": "league_ou25_rate",
"referee_cards_total": "referee_cards_total",
"referee_home_bias": "referee_home_bias",
"referee_avg_goals": "referee_avg_goals",
"home_missing_impact": "home_missing_impact",
"implied_home": "implied_home",
"implied_draw": "implied_draw",
"implied_away": "implied_away",
}
for src_col, feat_col in direct_map.items():
if feat_col in feat and src_col in match_row.index:
val = match_row.get(src_col)
if val is not None and not (isinstance(val, float) and np.isnan(val)):
feat[feat_col] = float(val)
# ---- Derived elo features ----
if feat.get("home_form_elo", 0) and feat.get("away_form_elo", 0):
feat["form_elo_diff"] = feat["home_form_elo"] - feat["away_form_elo"]
# ---- Odds features from relational tables ----
odds_features = [
"odds_ms_h", "odds_ms_d", "odds_ms_a",
"odds_ht_ms_h", "odds_ht_ms_d", "odds_ht_ms_a",
"odds_ou05_o", "odds_ou05_u",
"odds_ou15_o", "odds_ou15_u",
"odds_ou25_o", "odds_ou25_u",
"odds_ou35_o", "odds_ou35_u",
"odds_ht_ou05_o", "odds_ht_ou05_u",
"odds_ht_ou15_o", "odds_ht_ou15_u",
"odds_btts_y", "odds_btts_n",
]
for ok in odds_features:
if ok in odds:
feat[ok] = odds[ok]
presence_key = f"{ok}_present"
if presence_key in feat:
feat[presence_key] = 1.0
# Recompute implied probabilities from odds if available and not already set
if feat.get("odds_ms_h", 0) > 1 and feat.get("odds_ms_d", 0) > 1 and feat.get("odds_ms_a", 0) > 1:
raw_h = 1.0 / feat["odds_ms_h"]
raw_d = 1.0 / feat["odds_ms_d"]
raw_a = 1.0 / feat["odds_ms_a"]
total = raw_h + raw_d + raw_a
if total > 0:
feat["implied_home"] = raw_h / total
feat["implied_draw"] = raw_d / total
feat["implied_away"] = raw_a / total
# ---- Derived match metadata ----
mst = match_row.get("mst_utc")
if mst is not None:
try:
ts_s = int(mst) / 1000 # stored as epoch ms
dt = datetime.utcfromtimestamp(ts_s)
if "match_month" in feat:
feat["match_month"] = float(dt.month)
# Season markers: Sept-Oct = start, April-May = end
if "is_season_start" in feat:
feat["is_season_start"] = 1.0 if dt.month in (8, 9, 10) else 0.0
if "is_season_end" in feat:
feat["is_season_end"] = 1.0 if dt.month in (4, 5) else 0.0
except Exception:
pass
# ---- Interaction features ----
if "attack_vs_defense_home" in feat:
feat["attack_vs_defense_home"] = feat.get("home_goals_avg", 0) - feat.get("away_conceded_avg", 0)
if "attack_vs_defense_away" in feat:
feat["attack_vs_defense_away"] = feat.get("away_goals_avg", 0) - feat.get("home_conceded_avg", 0)
if "form_momentum_interaction" in feat:
feat["form_momentum_interaction"] = (
feat.get("home_momentum_score", 0) * feat.get("home_goals_avg", 0)
- feat.get("away_momentum_score", 0) * feat.get("away_goals_avg", 0)
)
if "elo_form_consistency" in feat:
feat["elo_form_consistency"] = feat.get("elo_diff", 0) * feat.get("home_goals_avg", 0)
return feat
# ============================================================================
# STEP 3 — Run V25 predictions
# ============================================================================
def load_predictor():
from models.v25_ensemble import get_v25_predictor
print("[Step 3] Loading V25 predictor ...")
pred = get_v25_predictor()
print("[Step 3] V25 predictor ready.")
return pred
# ============================================================================
# STEP 4 — Compute actual outcomes from scores
# ============================================================================
def compute_actuals(score_home: int, score_away: int) -> Dict[str, Any]:
total = score_home + score_away
return {
"ms_actual": "1" if score_home > score_away else ("X" if score_home == score_away else "2"),
"ou15_actual": "Over" if total >= 2 else "Under",
"ou25_actual": "Over" if total >= 3 else "Under",
"btts_actual": "Yes" if score_home > 0 and score_away > 0 else "No",
}
# ============================================================================
# STEP 5 — Accuracy helpers
# ============================================================================
def confidence_band(prob: float) -> str:
if prob < 0.50:
return "<50%"
elif prob < 0.65:
return "50-65%"
elif prob < 0.75:
return "65-75%"
else:
return "75%+"
def pick_from_ms(home_prob: float, draw_prob: float, away_prob: float) -> Tuple[str, float]:
picks = {"1": home_prob, "X": draw_prob, "2": away_prob}
best = max(picks, key=picks.__getitem__)
return best, picks[best]
def pick_from_binary(yes_prob: float, no_prob: float, yes_label: str, no_label: str) -> Tuple[str, float]:
if yes_prob >= no_prob:
return yes_label, yes_prob
return no_label, no_prob
# ============================================================================
# MAIN
# ============================================================================
def main():
t_start = time.time()
print("=" * 70)
print(" V25 Backtest + Calibration Training")
print(f" Run at: {datetime.utcnow().isoformat()} UTC")
print("=" * 70)
# ------------------------------------------------------------------
# Step 1 — Load qualified leagues
# ------------------------------------------------------------------
league_ids = load_qualified_leagues()
# ------------------------------------------------------------------
# Step 1b — Fetch matches with features
# ------------------------------------------------------------------
conn = psycopg2.connect(get_clean_dsn())
try:
matches_df = fetch_matches(conn, league_ids)
if matches_df.empty:
print("[ERROR] No matches found. Check DB connection and league IDs.")
return
match_ids = matches_df["match_id"].tolist()
# ------------------------------------------------------------------
# Step 1c — Fetch odds in bulk
# ------------------------------------------------------------------
odds_map = fetch_odds_bulk(conn, match_ids)
finally:
conn.close()
# ------------------------------------------------------------------
# Step 2 — Build feature vectors
# ------------------------------------------------------------------
print(f"\n[Step 2] Building feature vectors for {len(matches_df)} matches ...")
feature_cols = load_feature_cols()
# ------------------------------------------------------------------
# Step 3 — Load V25 predictor
# ------------------------------------------------------------------
predictor = load_predictor()
# ------------------------------------------------------------------
# Main loop — predict each match, collect results
# ------------------------------------------------------------------
print(f"\n[Loop] Running predictions ...")
# Storage for calibration training
calib_data: Dict[str, List[Tuple[float, int]]] = {
"ms_home": [], # (prob, 1 if home win)
"ms_draw": [],
"ms_away": [],
"ou15": [],
"ou25": [],
"btts": [],
}
# Storage for accuracy reporting
records = []
skipped = 0
processed = 0
for idx, row in matches_df.iterrows():
match_id = row["match_id"]
score_home = row.get("score_home")
score_away = row.get("score_away")
# Validate scores
try:
score_home = int(score_home)
score_away = int(score_away)
except (TypeError, ValueError):
skipped += 1
continue
# Build features
match_odds = odds_map.get(match_id, {})
feat = build_feature_vector(row, match_odds, feature_cols)
# Run predictions
try:
home_prob, draw_prob, away_prob = predictor.predict_ms(feat)
over25_prob, under25_prob = predictor.predict_ou25(feat)
btts_yes_prob, btts_no_prob = predictor.predict_btts(feat)
# ou15 is loaded via predict_market (returns np.ndarray for binary)
ou15_arr = predictor.predict_market("ou15", feat)
if ou15_arr is not None and len(ou15_arr) > 0:
over15_prob = float(ou15_arr[0])
under15_prob = 1.0 - over15_prob
else:
over15_prob = 0.5
under15_prob = 0.5
except Exception as e:
skipped += 1
continue
# Compute actuals
actuals = compute_actuals(score_home, score_away)
# MS picks
ms_pick, ms_conf = pick_from_ms(home_prob, draw_prob, away_prob)
ms_correct = int(ms_pick == actuals["ms_actual"])
# OU15
ou15_pick, ou15_conf = pick_from_binary(over15_prob, under15_prob, "Over", "Under")
ou15_correct = int(ou15_pick == actuals["ou15_actual"])
# OU25
ou25_pick, ou25_conf = pick_from_binary(over25_prob, under25_prob, "Over", "Under")
ou25_correct = int(ou25_pick == actuals["ou25_actual"])
# BTTS
btts_pick, btts_conf = pick_from_binary(btts_yes_prob, btts_no_prob, "Yes", "No")
btts_correct = int(btts_pick == actuals["btts_actual"])
# Collect calibration data
calib_data["ms_home"].append((home_prob, int(actuals["ms_actual"] == "1")))
calib_data["ms_draw"].append((draw_prob, int(actuals["ms_actual"] == "X")))
calib_data["ms_away"].append((away_prob, int(actuals["ms_actual"] == "2")))
calib_data["ou15"].append((over15_prob, int(actuals["ou15_actual"] == "Over")))
calib_data["ou25"].append((over25_prob, int(actuals["ou25_actual"] == "Over")))
calib_data["btts"].append((btts_yes_prob, int(actuals["btts_actual"] == "Yes")))
# Determine league group
league_name = str(row.get("league_name", "Other") or "Other")
league_group = league_name if league_name in TOP5_LEAGUE_NAMES else "Other"
records.append({
"match_id": match_id,
"league_name": league_name,
"league_group": league_group,
"score_home": score_home,
"score_away": score_away,
# MS
"ms_pick": ms_pick,
"ms_actual": actuals["ms_actual"],
"ms_conf": ms_conf,
"ms_conf_band": confidence_band(ms_conf),
"ms_correct": ms_correct,
"ms_home_prob": home_prob,
"ms_draw_prob": draw_prob,
"ms_away_prob": away_prob,
# OU15
"ou15_pick": ou15_pick,
"ou15_actual": actuals["ou15_actual"],
"ou15_conf": ou15_conf,
"ou15_conf_band": confidence_band(ou15_conf),
"ou15_correct": ou15_correct,
"ou15_over_prob": over15_prob,
# OU25
"ou25_pick": ou25_pick,
"ou25_actual": actuals["ou25_actual"],
"ou25_conf": ou25_conf,
"ou25_conf_band": confidence_band(ou25_conf),
"ou25_correct": ou25_correct,
"ou25_over_prob": over25_prob,
# BTTS
"btts_pick": btts_pick,
"btts_actual": actuals["btts_actual"],
"btts_conf": btts_conf,
"btts_conf_band": confidence_band(btts_conf),
"btts_correct": btts_correct,
"btts_yes_prob": btts_yes_prob,
})
processed += 1
if processed % PROGRESS_INTERVAL == 0:
elapsed = time.time() - t_start
print(f" [Progress] {processed}/{len(matches_df)} matches | "
f"skipped={skipped} | elapsed={elapsed:.1f}s")
print(f"\n[Loop] Done. Processed={processed}, Skipped={skipped}")
if not records:
print("[ERROR] No records to analyze. Exiting.")
return
results_df = pd.DataFrame(records)
# ------------------------------------------------------------------
# Step 5 — Accuracy report
# ------------------------------------------------------------------
print("\n" + "=" * 70)
print(" ACCURACY REPORT")
print("=" * 70)
markets = [
("MS", "ms_correct", "ms_conf", "ms_conf_band", "ms_pick"),
("OU15", "ou15_correct", "ou15_conf", "ou15_conf_band", "ou15_pick"),
("OU25", "ou25_correct", "ou25_conf", "ou25_conf_band", "ou25_pick"),
("BTTS", "btts_correct", "btts_conf", "btts_conf_band", "btts_pick"),
]
summary: Dict[str, Any] = {
"generated_at": datetime.utcnow().isoformat(),
"matches_processed": processed,
"matches_skipped": skipped,
"markets": {},
}
for market_label, correct_col, conf_col, band_col, pick_col in markets:
print(f"\n--- {market_label} ---")
sub = results_df[[correct_col, conf_col, band_col, pick_col, "league_group"]].copy()
total = len(sub)
overall_acc = sub[correct_col].mean() * 100
print(f" Overall accuracy: {overall_acc:.1f}% ({sub[correct_col].sum()}/{total})")
market_summary = {
"overall_accuracy": round(overall_acc, 2),
"total_matches": total,
"by_confidence_band": {},
"by_league": {},
"by_pick_direction": {},
}
# By confidence band
print(f" By confidence band:")
bands = ["<50%", "50-65%", "65-75%", "75%+"]
for band in bands:
mask = sub[band_col] == band
n = mask.sum()
if n > 0:
acc = sub.loc[mask, correct_col].mean() * 100
mean_conf = sub.loc[mask, conf_col].mean() * 100
print(f" {band:8s}: {acc:5.1f}% acc | {n:4d} matches | "
f"mean_conf={mean_conf:.1f}%")
market_summary["by_confidence_band"][band] = {
"accuracy": round(acc, 2),
"count": int(n),
"mean_confidence": round(mean_conf, 2),
}
# By league group
print(f" By league:")
league_groups = list(results_df["league_group"].unique())
# Sort: named leagues first, then Other
named = sorted([g for g in league_groups if g != "Other"])
ordered = named + (["Other"] if "Other" in league_groups else [])
for lg in ordered:
mask = sub["league_group"] == lg
n = mask.sum()
if n > 0:
acc = sub.loc[mask, correct_col].mean() * 100
print(f" {lg[:20]:20s}: {acc:5.1f}% ({n} matches)")
market_summary["by_league"][lg] = {
"accuracy": round(acc, 2),
"count": int(n),
}
# By pick direction
print(f" By pick direction:")
for pick_val in sorted(sub[pick_col].unique()):
mask = sub[pick_col] == pick_val
n = mask.sum()
if n > 0:
acc = sub.loc[mask, correct_col].mean() * 100
mean_conf = sub.loc[mask, conf_col].mean() * 100
print(f" {pick_val:8s}: {acc:5.1f}% acc | {n:4d} matches | "
f"mean_conf={mean_conf:.1f}%")
market_summary["by_pick_direction"][pick_val] = {
"accuracy": round(acc, 2),
"count": int(n),
"mean_confidence": round(mean_conf, 2),
}
summary["markets"][market_label] = market_summary
# ------------------------------------------------------------------
# Step 6 — Train calibration models
# ------------------------------------------------------------------
print("\n" + "=" * 70)
print(" CALIBRATION TRAINING")
print("=" * 70)
calibrator = Calibrator()
# Market config: market_key -> (label for prob, label for actual binary)
calib_market_map = {
"ms_home": "ms_home",
"ms_draw": "ms_draw",
"ms_away": "ms_away",
"ou15": "ou15",
"ou25": "ou25",
"btts": "btts",
}
calibration_results: Dict[str, Dict] = {}
for market_key in calib_market_map:
pairs = calib_data[market_key]
if len(pairs) < 100:
print(f"[Calib] {market_key}: only {len(pairs)} samples — skipping.")
continue
probs = np.array([p for p, _ in pairs])
actuals_bin = np.array([a for _, a in pairs])
# Build a tiny DataFrame to use Calibrator.train_calibration
calib_df = pd.DataFrame({
"prob": probs,
"actual": actuals_bin,
})
metrics = calibrator.train_calibration(
df=calib_df,
market=market_key,
prob_col="prob",
actual_col="actual",
min_samples=100,
save=True,
)
calibration_results[market_key] = metrics.to_dict()
print(f" [Calib] {market_key}: Brier={metrics.brier_score:.4f} | "
f"ECE={metrics.calibration_error:.4f} | n={metrics.sample_count}")
# ------------------------------------------------------------------
# Step 7 — Save results
# ------------------------------------------------------------------
output_path = os.path.join(REPORTS_DIR, "backtest_results.json")
full_report = {
**summary,
"calibration": calibration_results,
"runtime_seconds": round(time.time() - t_start, 1),
}
with open(output_path, "w") as f:
json.dump(full_report, f, indent=2)
print(f"\n[Step 7] Report saved to {output_path}")
# ------------------------------------------------------------------
# Final summary table
# ------------------------------------------------------------------
print("\n" + "=" * 70)
print(" FINAL SUMMARY TABLE")
print("=" * 70)
print(f"{'Market':<8} {'Overall Acc':>12} {'Matches':>8} "
f"{'Best Band (acc)':>18}")
print("-" * 70)
for market_label, _, _, _, _ in markets:
ms = summary["markets"].get(market_label, {})
overall = ms.get("overall_accuracy", 0)
total_m = ms.get("total_matches", 0)
bands_d = ms.get("by_confidence_band", {})
# Find best accuracy band with >= 50 matches
best_band = "-"
best_acc = 0.0
for band, bdata in bands_d.items():
if bdata["count"] >= 50 and bdata["accuracy"] > best_acc:
best_acc = bdata["accuracy"]
best_band = f"{band} ({best_acc:.1f}%)"
print(f"{market_label:<8} {overall:>11.1f}% {total_m:>8d} {best_band:>18s}")
elapsed_total = time.time() - t_start
print(f"\nTotal runtime: {elapsed_total:.1f}s")
print("=" * 70)
if __name__ == "__main__":
main()
+459
View File
@@ -0,0 +1,459 @@
"""
League-Specific Model Trainer
==============================
Trains dedicated XGBoost models + isotonic calibration for each qualified league.
Tiers:
- >=500 FT matches → full XGBoost (12 markets) + calibration
- 100-499 matches → isotonic calibration only (over general V25 predictions)
- <100 matches → skipped
Usage:
python scripts/train_league_models.py
python scripts/train_league_models.py --min-samples 300 # stricter threshold
python scripts/train_league_models.py --colab # Colab-friendly output
"""
import os
import sys
import json
import pickle
import argparse
import time
import warnings
from datetime import datetime
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.isotonic import IsotonicRegression
from sklearn.metrics import accuracy_score, log_loss
warnings.filterwarnings("ignore")
optuna_available = False
try:
import optuna
optuna.logging.set_verbosity(optuna.logging.WARNING)
optuna_available = True
except ImportError:
pass
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
DATA_PATH = os.path.join(AI_ENGINE_DIR, "data", "training_data.csv")
MODELS_DIR = os.path.join(AI_ENGINE_DIR, "models", "league_specific")
REPORTS_DIR = os.path.join(AI_ENGINE_DIR, "reports", "league_models")
QUALIFIED_LEAGUES_PATH = os.path.join(os.path.dirname(AI_ENGINE_DIR), "qualified_leagues.json")
os.makedirs(MODELS_DIR, exist_ok=True)
os.makedirs(REPORTS_DIR, exist_ok=True)
# ─── Markets ────────────────────────────────────────────────────────
MARKETS = {
"MS": {"label": "label_ms", "num_class": 3, "min_samples": 200},
"OU15": {"label": "label_ou15", "num_class": 2, "min_samples": 150},
"OU25": {"label": "label_ou25", "num_class": 2, "min_samples": 150},
"OU35": {"label": "label_ou35", "num_class": 2, "min_samples": 150},
"BTTS": {"label": "label_btts", "num_class": 2, "min_samples": 150},
"HT": {"label": "label_ht_result", "num_class": 3, "min_samples": 150},
"HT_OU05": {"label": "label_ht_ou05", "num_class": 2, "min_samples": 150},
"HT_OU15": {"label": "label_ht_ou15", "num_class": 2, "min_samples": 150},
"HTFT": {"label": "label_ht_ft", "num_class": 9, "min_samples": 300},
"OE": {"label": "label_odd_even", "num_class": 2, "min_samples": 150},
"CARDS": {"label": "label_cards_ou45", "num_class": 2, "min_samples": 150},
"HANDICAP": {"label": "label_handicap_ms", "num_class": 3, "min_samples": 200},
}
# Feature columns (from training_data.csv, excluding metadata + labels)
SKIP_COLS = {
"match_id", "home_team_id", "away_team_id", "league_id", "mst_utc",
"score_home", "score_away", "total_goals", "ht_score_home", "ht_score_away",
"ht_total_goals",
"label_ms", "label_ou05", "label_ou15", "label_ou25", "label_ou35",
"label_btts", "label_ht_result", "label_ht_ou05", "label_ht_ou15",
"label_ht_ft", "label_odd_even", "label_yellow_cards", "label_cards_ou45",
"label_handicap_ms",
}
# XGBoost defaults — fast, no Optuna
XGB_PARAMS_BINARY = {
"objective": "binary:logistic",
"eval_metric": "logloss",
"max_depth": 4,
"eta": 0.05,
"subsample": 0.8,
"colsample_bytree": 0.8,
"min_child_weight": 5,
"gamma": 0.1,
"reg_lambda": 1.0,
"verbosity": 0,
"seed": 42,
"nthread": -1,
}
XGB_PARAMS_MULTI = {
**XGB_PARAMS_BINARY,
"objective": "multi:softprob",
"eval_metric": "mlogloss",
}
def load_data() -> pd.DataFrame:
print(f"Loading training data from {DATA_PATH} ...")
df = pd.read_csv(DATA_PATH, low_memory=False)
print(f" {len(df):,} rows, {len(df.columns)} columns")
return df
def get_feature_cols(df: pd.DataFrame) -> list:
return [c for c in df.columns if c not in SKIP_COLS]
def load_qualified_leagues() -> list:
if os.path.exists(QUALIFIED_LEAGUES_PATH):
with open(QUALIFIED_LEAGUES_PATH) as f:
return json.load(f)
# fallback: all leagues in CSV
return []
def train_xgb_market(
X_train: np.ndarray,
y_train: np.ndarray,
X_test: np.ndarray,
y_test: np.ndarray,
num_class: int,
feature_cols: list,
) -> tuple:
"""Train XGBoost for one market. Returns (model, accuracy, logloss)."""
params = dict(XGB_PARAMS_MULTI if num_class > 2 else XGB_PARAMS_BINARY)
if num_class > 2:
params["num_class"] = num_class
dtrain = xgb.DMatrix(X_train, label=y_train, feature_names=feature_cols)
dtest = xgb.DMatrix(X_test, label=y_test, feature_names=feature_cols)
model = xgb.train(
params,
dtrain,
num_boost_round=300,
evals=[(dtest, "val")],
early_stopping_rounds=30,
verbose_eval=False,
)
raw = model.predict(dtest)
if num_class > 2:
probs = raw.reshape(-1, num_class)
preds = np.argmax(probs, axis=1)
ll = log_loss(y_test, probs)
else:
preds = (raw >= 0.5).astype(int)
ll = log_loss(y_test, raw)
acc = accuracy_score(y_test, preds)
return model, acc, ll
def train_isotonic(raw_probs: np.ndarray, y_true: np.ndarray) -> IsotonicRegression:
iso = IsotonicRegression(out_of_bounds="clip")
iso.fit(raw_probs, y_true)
return iso
def get_general_v25_probs(df_league: pd.DataFrame, feature_cols: list, market: str, num_class: int):
"""Use general V25 model to get predictions on this league's matches (for cal-only leagues)."""
try:
from models.v25_ensemble import get_v25_predictor
v25 = get_v25_predictor()
if not v25._loaded:
v25.load_models()
label_col = MARKETS[market]["label"]
valid = df_league[feature_cols + [label_col]].dropna()
if len(valid) < 50:
return None, None
market_key_map = {
"MS": "ms", "OU15": "ou15", "OU25": "ou25", "OU35": "ou35",
"BTTS": "btts", "HT": "ht_result", "HT_OU05": "ht_ou05",
"HT_OU15": "ht_ou15", "HTFT": "htft", "OE": "odd_even",
"CARDS": "cards_ou45", "HANDICAP": "handicap_ms",
}
mkey = market_key_map.get(market)
if not mkey or not v25.has_market(mkey):
return None, None
X = valid[feature_cols].fillna(0).values
y = valid[label_col].values
all_probs = []
for i in range(0, len(X), 500):
batch = X[i:i+500]
feat_dict = {col: float(batch[j, k]) for j, row in enumerate(batch) for k, col in enumerate(feature_cols)}
# batch predict
df_batch = pd.DataFrame(batch, columns=feature_cols)
dmat = xgb.DMatrix(df_batch)
models = v25.models.get(mkey, {})
batch_probs = []
if "xgb" in models:
p = models["xgb"].predict(dmat)
if num_class > 2:
p = p.reshape(-1, num_class)
batch_probs.append(p)
if batch_probs:
all_probs.append(np.mean(batch_probs, axis=0))
if not all_probs:
return None, None
probs = np.vstack(all_probs) if num_class > 2 else np.concatenate(all_probs)
return probs, y
except Exception as e:
return None, None
def process_league(
league_id: str,
df_league: pd.DataFrame,
feature_cols: list,
full_model: bool,
league_name: str,
) -> dict:
"""Train models for one league. Returns metrics dict."""
n = len(df_league)
out_dir = os.path.join(MODELS_DIR, league_id)
os.makedirs(out_dir, exist_ok=True)
metrics = {"league_id": league_id, "league_name": league_name, "n_matches": n, "markets": {}}
# Time-based split: last 20% as test
split_idx = int(n * 0.80)
df_sorted = df_league.sort_values("mst_utc")
df_train = df_sorted.iloc[:split_idx]
df_test = df_sorted.iloc[split_idx:]
saved_feature_cols = False
for market, cfg in MARKETS.items():
label_col = cfg["label"]
num_class = cfg["num_class"]
min_samp = cfg["min_samples"]
if label_col not in df_league.columns:
continue
valid_train = df_train[feature_cols + [label_col]].dropna()
valid_test = df_test[feature_cols + [label_col]].dropna()
if len(valid_train) < min_samp or len(valid_test) < 30:
continue
X_train = valid_train[feature_cols].fillna(0).values
y_train = valid_train[label_col].values.astype(int)
X_test = valid_test[feature_cols].fillna(0).values
y_test = valid_test[label_col].values.astype(int)
mkt_metrics = {"n_train": len(X_train), "n_test": len(X_test)}
if full_model:
try:
model, acc, ll = train_xgb_market(X_train, y_train, X_test, y_test, num_class, feature_cols)
model_path = os.path.join(out_dir, f"xgb_{market.lower()}.json")
model.save_model(model_path)
mkt_metrics.update({"accuracy": round(acc, 4), "logloss": round(ll, 4), "model": "xgb"})
if not saved_feature_cols:
with open(os.path.join(out_dir, "feature_cols.json"), "w") as f:
json.dump(feature_cols, f)
saved_feature_cols = True
# Isotonic calibration from own model predictions
dtest_xgb = xgb.DMatrix(X_test, feature_names=feature_cols)
raw = model.predict(dtest_xgb)
if num_class > 2:
raw = raw.reshape(-1, num_class)
for cls_idx in range(num_class):
iso = train_isotonic(raw[:, cls_idx], (y_test == cls_idx).astype(int))
with open(os.path.join(out_dir, f"cal_{market.lower()}_{cls_idx}.pkl"), "wb") as f:
pickle.dump(iso, f)
else:
iso = train_isotonic(raw, y_test)
with open(os.path.join(out_dir, f"cal_{market.lower()}.pkl"), "wb") as f:
pickle.dump(iso, f)
except Exception as e:
mkt_metrics["error"] = str(e)
else:
# Calibration only: use general V25 model
try:
all_valid = df_league[feature_cols + [label_col]].dropna()
if len(all_valid) < min_samp:
continue
X_all = all_valid[feature_cols].fillna(0).values
y_all = all_valid[label_col].values.astype(int)
# Use V25 general model
from models.v25_ensemble import get_v25_predictor
v25 = get_v25_predictor()
if not v25._loaded:
v25.load_models()
market_key_map = {
"MS": "ms", "OU15": "ou15", "OU25": "ou25", "OU35": "ou35",
"BTTS": "btts", "HT": "ht_result", "HT_OU05": "ht_ou05",
"HT_OU15": "ht_ou15", "HTFT": "htft", "OE": "odd_even",
"CARDS": "cards_ou45", "HANDICAP": "handicap_ms",
}
mkey = market_key_map.get(market)
if not mkey or not v25.has_market(mkey):
continue
df_feat = pd.DataFrame(X_all, columns=feature_cols)
dmat = xgb.DMatrix(df_feat)
models_v25 = v25.models.get(mkey, {})
if "xgb" not in models_v25:
continue
raw = models_v25["xgb"].predict(dmat)
if num_class > 2:
raw = raw.reshape(-1, num_class)
for cls_idx in range(num_class):
iso = train_isotonic(raw[:, cls_idx], (y_all == cls_idx).astype(int))
with open(os.path.join(out_dir, f"cal_{market.lower()}_{cls_idx}.pkl"), "wb") as f:
pickle.dump(iso, f)
else:
iso = train_isotonic(raw, y_all)
with open(os.path.join(out_dir, f"cal_{market.lower()}.pkl"), "wb") as f:
pickle.dump(iso, f)
mkt_metrics.update({"n_train": len(X_all), "model": "cal_only"})
except Exception as e:
mkt_metrics["error"] = str(e)
metrics["markets"][market] = mkt_metrics
# Save metrics
with open(os.path.join(out_dir, "metrics.json"), "w") as f:
json.dump(metrics, f, indent=2)
return metrics
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--min-samples", type=int, default=500, help="Min matches for full model")
parser.add_argument("--cal-min", type=int, default=100, help="Min matches for calibration")
parser.add_argument("--colab", action="store_true", help="Colab-friendly verbose output")
args = parser.parse_args()
start_total = time.time()
df = load_data()
feature_cols = get_feature_cols(df)
print(f"Feature columns: {len(feature_cols)}")
qualified = load_qualified_leagues()
if not qualified:
qualified = df["league_id"].unique().tolist()
print(f"Qualified leagues: {len(qualified)}")
# Get league names
league_names = {}
try:
import psycopg2
from data.db import get_clean_dsn
conn = psycopg2.connect(get_clean_dsn())
cur = conn.cursor()
cur.execute("SELECT id, name FROM leagues WHERE id = ANY(%s)", (qualified,))
league_names = {r[0]: r[1] for r in cur.fetchall()}
conn.close()
except Exception:
pass
# Filter to qualified leagues with enough data
counts = df[df["league_id"].isin(qualified)].groupby("league_id").size()
full_model_ids = counts[counts >= args.min_samples].index.tolist()
cal_only_ids = counts[(counts >= args.cal_min) & (counts < args.min_samples)].index.tolist()
print(f"\nTam model ({args.min_samples}+ maç): {len(full_model_ids)} lig")
print(f"Kalibrasyon ({args.cal_min}-{args.min_samples-1} maç): {len(cal_only_ids)} lig")
print(f"Atlandı (<{args.cal_min} maç): {len([l for l in qualified if l not in full_model_ids and l not in cal_only_ids])} lig")
print()
all_results = []
total = len(full_model_ids) + len(cal_only_ids)
done = 0
for league_id, full_model in (
[(lid, True) for lid in full_model_ids] +
[(lid, False) for lid in cal_only_ids]
):
t0 = time.time()
df_league = df[df["league_id"] == league_id].copy()
n = len(df_league)
name = league_names.get(league_id, league_id[:12])
tier = "FULL" if full_model else "CAL"
try:
result = process_league(league_id, df_league, feature_cols, full_model, name)
done += 1
elapsed = time.time() - t0
# Build accuracy string for key markets
acc_parts = []
for mkt in ["MS", "OU15", "OU25", "BTTS"]:
m = result["markets"].get(mkt, {})
if "accuracy" in m:
acc_parts.append(f"{mkt}={m['accuracy']*100:.1f}%")
acc_str = " | ".join(acc_parts) if acc_parts else "(cal only)"
print(f"[{done:>3}/{total}] [{tier}] {name:<35} {n:>6,} maç | {acc_str} | {elapsed:.1f}s")
all_results.append(result)
except Exception as e:
done += 1
print(f"[{done:>3}/{total}] [{tier}] {name:<35} ERROR: {e}")
if done % 10 == 0:
elapsed_total = time.time() - start_total
remaining = (elapsed_total / done) * (total - done)
print(f" ── {done}/{total} tamamlandı | geçen: {elapsed_total/60:.1f}dk | kalan tahmini: {remaining/60:.1f}dk ──")
# Final report
total_elapsed = time.time() - start_total
print(f"\n{'='*70}")
print(f"TAMAMLANDI: {len(all_results)}/{total} lig | Süre: {total_elapsed/60:.1f} dakika")
print(f"{'='*70}")
# Top 20 by accuracy
printable = [(r["league_name"], r["n_matches"], r["markets"]) for r in all_results
if "MS" in r["markets"] and "accuracy" in r["markets"]["MS"]]
printable.sort(key=lambda x: x[2]["MS"].get("accuracy", 0), reverse=True)
print(f"\n{'Liga':<35} {'Maç':>6} {'MS':>7} {'OU15':>7} {'OU25':>7} {'BTTS':>7}")
print("-" * 70)
for name, n, mkts in printable[:30]:
ms = mkts.get("MS", {}).get("accuracy", 0) * 100
ou15 = mkts.get("OU15", {}).get("accuracy", 0) * 100
ou25 = mkts.get("OU25", {}).get("accuracy", 0) * 100
btts = mkts.get("BTTS", {}).get("accuracy", 0) * 100
print(f"{name:<35} {n:>6,} {ms:>6.1f}% {ou15:>6.1f}% {ou25:>6.1f}% {btts:>6.1f}%")
# Save master report
report = {
"generated_at": datetime.now().isoformat(),
"total_leagues": len(all_results),
"elapsed_minutes": round(total_elapsed / 60, 1),
"results": all_results,
}
report_path = os.path.join(REPORTS_DIR, "league_models_report.json")
with open(report_path, "w") as f:
json.dump(report, f, indent=2)
print(f"\nRapor kaydedildi: {report_path}")
if __name__ == "__main__":
main()
@@ -0,0 +1,259 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# League-Specific Model Trainer \u2014 Google Colab\n",
"164 lig i\u00e7in XGBoost + isotonic kalibrasyon. 12 market.\n",
"Modeller Drive'a kaydedilir, `models/league_specific/` klas\u00f6r\u00fcne kopyalan\u0131r.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Mount Drive\n",
"from google.colab import drive\n",
"drive.mount('/content/drive')\n",
"\n",
"DRIVE_DIR = '/content/drive/MyDrive/iddaai'\n",
"import os\n",
"os.makedirs(DRIVE_DIR, exist_ok=True)\n",
"print('Drive mounted:', DRIVE_DIR)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# training_data.csv zaten Drive da: /content/drive/MyDrive/iddaai/training_data.csv\n",
"# Sadece qualified_leagues.json upload et (iddaai-be/ klas\u00f6r\u00fcnden)\n",
"from google.colab import files\n",
"import shutil\n",
"print(\"qualified_leagues.json dosyasini upload edin\")\n",
"uploaded = files.upload()\n",
"for fname in uploaded:\n",
" shutil.copy(fname, f\"{DRIVE_DIR}/{fname}\")\n",
" print(f\"Kaydedildi: {DRIVE_DIR}/{fname}\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Upload training_data.csv and qualified_leagues.json from local machine\n",
"from google.colab import files\n",
"print('training_data.csv upload edin (ai-engine/data/training_data.csv)')\n",
"uploaded = files.upload()\n",
"import shutil\n",
"for fname in uploaded:\n",
" shutil.copy(fname, f'{DRIVE_DIR}/{fname}')\n",
" print(f'Saved: {DRIVE_DIR}/{fname}')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os, json, pickle, time, warnings\n",
"import numpy as np\n",
"import pandas as pd\n",
"import xgboost as xgb\n",
"from sklearn.isotonic import IsotonicRegression\n",
"from sklearn.metrics import accuracy_score, log_loss\n",
"warnings.filterwarnings('ignore')\n",
"\n",
"DRIVE_DIR = '/content/drive/MyDrive/iddaai'\n",
"DATA_PATH = f'{DRIVE_DIR}/training_data.csv'\n",
"QL_PATH = f'{DRIVE_DIR}/qualified_leagues.json'\n",
"MODELS_DIR = f'{DRIVE_DIR}/league_specific'\n",
"os.makedirs(MODELS_DIR, exist_ok=True)\n",
"\n",
"MARKETS = {\n",
" 'MS': {'label': 'label_ms', 'num_class': 3, 'min_samples': 200},\n",
" 'OU15': {'label': 'label_ou15', 'num_class': 2, 'min_samples': 150},\n",
" 'OU25': {'label': 'label_ou25', 'num_class': 2, 'min_samples': 150},\n",
" 'OU35': {'label': 'label_ou35', 'num_class': 2, 'min_samples': 150},\n",
" 'BTTS': {'label': 'label_btts', 'num_class': 2, 'min_samples': 150},\n",
" 'HT': {'label': 'label_ht_result', 'num_class': 3, 'min_samples': 150},\n",
" 'HT_OU05': {'label': 'label_ht_ou05', 'num_class': 2, 'min_samples': 150},\n",
" 'HT_OU15': {'label': 'label_ht_ou15', 'num_class': 2, 'min_samples': 150},\n",
" 'HTFT': {'label': 'label_ht_ft', 'num_class': 9, 'min_samples': 300},\n",
" 'OE': {'label': 'label_odd_even', 'num_class': 2, 'min_samples': 150},\n",
" 'CARDS': {'label': 'label_cards_ou45', 'num_class': 2, 'min_samples': 150},\n",
" 'HANDICAP': {'label': 'label_handicap_ms', 'num_class': 3, 'min_samples': 200},\n",
"}\n",
"\n",
"SKIP_COLS = {\n",
" 'match_id','home_team_id','away_team_id','league_id','mst_utc',\n",
" 'score_home','score_away','total_goals','ht_score_home','ht_score_away','ht_total_goals',\n",
" 'label_ms','label_ou05','label_ou15','label_ou25','label_ou35','label_btts',\n",
" 'label_ht_result','label_ht_ou05','label_ht_ou15','label_ht_ft',\n",
" 'label_odd_even','label_yellow_cards','label_cards_ou45','label_handicap_ms',\n",
"}\n",
"\n",
"XGB_BASE = {\n",
" 'max_depth': 4, 'eta': 0.05, 'subsample': 0.8,\n",
" 'colsample_bytree': 0.8, 'min_child_weight': 5,\n",
" 'gamma': 0.1, 'reg_lambda': 1.0, 'verbosity': 0, 'seed': 42,\n",
" 'nthread': -1,\n",
"}\n",
"\n",
"df = pd.read_csv(DATA_PATH, low_memory=False)\n",
"feature_cols = [c for c in df.columns if c not in SKIP_COLS]\n",
"print(f'Y\u00fcklendi: {len(df):,} sat\u0131r | {len(feature_cols)} feature')\n",
"\n",
"qualified = json.load(open(QL_PATH)) if os.path.exists(QL_PATH) else df['league_id'].unique().tolist()\n",
"counts = df[df['league_id'].isin(qualified)].groupby('league_id').size()\n",
"full_ids = counts[counts >= 500].index.tolist()\n",
"cal_ids = counts[(counts >= 100) & (counts < 500)].index.tolist()\n",
"print(f'Tam model: {len(full_ids)} | Kalibrasyon: {len(cal_ids)} | Toplam: {len(full_ids)+len(cal_ids)}')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def train_one_league(league_id, df_league, feature_cols, full_model):\n",
" n = len(df_league)\n",
" out_dir = f'{MODELS_DIR}/{league_id}'\n",
" os.makedirs(out_dir, exist_ok=True)\n",
" metrics = {}\n",
"\n",
" df_sorted = df_league.sort_values('mst_utc')\n",
" split = int(n * 0.80)\n",
" df_tr, df_te = df_sorted.iloc[:split], df_sorted.iloc[split:]\n",
"\n",
" saved_fc = False\n",
"\n",
" for market, cfg in MARKETS.items():\n",
" lbl, nc, ms = cfg['label'], cfg['num_class'], cfg['min_samples']\n",
" if lbl not in df_league.columns:\n",
" continue\n",
"\n",
" if full_model:\n",
" vtr = df_tr[feature_cols + [lbl]].dropna()\n",
" vte = df_te[feature_cols + [lbl]].dropna()\n",
" if len(vtr) < ms or len(vte) < 30:\n",
" continue\n",
" Xtr, ytr = vtr[feature_cols].fillna(0).values, vtr[lbl].values.astype(int)\n",
" Xte, yte = vte[feature_cols].fillna(0).values, vte[lbl].values.astype(int)\n",
"\n",
" params = {**XGB_BASE, 'objective': 'multi:softprob' if nc > 2 else 'binary:logistic',\n",
" 'eval_metric': 'mlogloss' if nc > 2 else 'logloss'}\n",
" if nc > 2: params['num_class'] = nc\n",
"\n",
" dtr = xgb.DMatrix(Xtr, label=ytr, feature_names=feature_cols)\n",
" dte = xgb.DMatrix(Xte, label=yte, feature_names=feature_cols)\n",
" model = xgb.train(params, dtr, 300, [(dte,'v')], early_stopping_rounds=30, verbose_eval=False)\n",
" model.save_model(f'{out_dir}/xgb_{market.lower()}.json')\n",
"\n",
" if not saved_fc:\n",
" json.dump(feature_cols, open(f'{out_dir}/feature_cols.json','w'))\n",
" saved_fc = True\n",
"\n",
" raw = model.predict(dte)\n",
" if nc > 2:\n",
" raw = raw.reshape(-1, nc)\n",
" acc = accuracy_score(yte, np.argmax(raw, axis=1))\n",
" for ci in range(nc):\n",
" iso = IsotonicRegression(out_of_bounds='clip').fit(raw[:,ci], (yte==ci).astype(int))\n",
" pickle.dump(iso, open(f'{out_dir}/cal_{market.lower()}_{ci}.pkl','wb'))\n",
" else:\n",
" acc = accuracy_score(yte, (raw>=0.5).astype(int))\n",
" iso = IsotonicRegression(out_of_bounds='clip').fit(raw, yte)\n",
" pickle.dump(iso, open(f'{out_dir}/cal_{market.lower()}.pkl','wb'))\n",
"\n",
" metrics[market] = {'accuracy': round(float(acc),4), 'n_train': len(Xtr)}\n",
" else:\n",
" # Cal only \u2014 store empty placeholder so prediction knows to use general V25\n",
" metrics[market] = {'model': 'cal_only', 'n': n}\n",
"\n",
" json.dump({'league_id': league_id, 'n': n, 'markets': metrics},\n",
" open(f'{out_dir}/metrics.json','w'), indent=2)\n",
" return metrics\n",
"\n",
"start = time.time()\n",
"all_ids = [(lid, True) for lid in full_ids] + [(lid, False) for lid in cal_ids]\n",
"results = []\n",
"\n",
"for i, (lid, full) in enumerate(all_ids, 1):\n",
" dfl = df[df['league_id'] == lid].copy()\n",
" t0 = time.time()\n",
" try:\n",
" mkt_res = train_one_league(lid, dfl, feature_cols, full)\n",
" ms_acc = mkt_res.get('MS', {}).get('accuracy', '-')\n",
" results.append((lid, len(dfl), mkt_res))\n",
" print(f'[{i:>3}/{len(all_ids)}] {lid[:20]:<20} n={len(dfl):>5,} MS={ms_acc} {time.time()-t0:.1f}s')\n",
" except Exception as e:\n",
" print(f'[{i:>3}/{len(all_ids)}] {lid[:20]:<20} ERROR: {e}')\n",
"\n",
" if i % 20 == 0:\n",
" el = time.time()-start\n",
" print(f' \u2500\u2500 {i}/{len(all_ids)} done | {el/60:.1f}min elapsed | ~{el/i*(len(all_ids)-i)/60:.1f}min left \u2500\u2500')\n",
"\n",
"print(f'\\nBitti! {len(results)} lig | {(time.time()-start)/60:.1f} dakika')\n",
"print(f'Modeller: {MODELS_DIR}')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Sonu\u00e7lar\u0131 g\u00f6ster \u2014 MS accuracy s\u0131ralamas\u0131\n",
"printable = [(lid, n, m) for lid, n, m in results if 'MS' in m and 'accuracy' in m['MS']]\n",
"printable.sort(key=lambda x: x[2]['MS']['accuracy'], reverse=True)\n",
"print(f'{\"Liga ID\":<30} {\"Ma\u00e7\":>6} {\"MS\":>7} {\"OU15\":>7} {\"OU25\":>7} {\"BTTS\":>7}')\n",
"print('-'*70)\n",
"for lid, n, m in printable[:30]:\n",
" ms = m.get('MS', {}).get('accuracy', 0)*100\n",
" ou15 = m.get('OU15',{}).get('accuracy', 0)*100\n",
" ou25 = m.get('OU25',{}).get('accuracy', 0)*100\n",
" btts = m.get('BTTS',{}).get('accuracy', 0)*100\n",
" print(f'{lid:<30} {n:>6,} {ms:>6.1f}% {ou15:>6.1f}% {ou25:>6.1f}% {btts:>6.1f}%')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Zip ve indir\n",
"import shutil\n",
"zip_path = f'{DRIVE_DIR}/league_specific_models.zip'\n",
"shutil.make_archive(zip_path.replace('.zip',''), 'zip', MODELS_DIR)\n",
"print(f'Zip: {zip_path}')\n",
"# \u0130ndirmek i\u00e7in:\n",
"# from google.colab import files\n",
"# files.download(zip_path)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"version": "3.10.0"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
+108
View File
@@ -0,0 +1,108 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# HÜCRE 1 — Paketler\n",
"!pip install xgboost lightgbm optuna scikit-learn pandas numpy -q\n",
"print('Hazır')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# HÜCRE 2 — Drive bağla + CSV çek\n",
"from google.colab import drive\n",
"import os, shutil\n",
"drive.mount('/content/drive')\n",
"\n",
"# training_data.csv'yi Drive'ın iddaai klasöründen kopyala\n",
"shutil.copy('/content/drive/MyDrive/iddaai/training_data.csv', '/content/training_data.csv')\n",
"print('CSV hazır:', os.path.getsize('/content/training_data.csv') // 1024 // 1024, 'MB')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# HÜCRE 3 — iddaai_colab3.zip upload et (ai-engine kodları)\n",
"from google.colab import files\n",
"import zipfile\n",
"print('iddaai_colab3.zip dosyasını seç:')\n",
"uploaded = files.upload()\n",
"with zipfile.ZipFile('iddaai_colab3.zip') as z:\n",
" z.extractall('/content')\n",
"print('Kod hazır')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# HÜCRE 4 — training_data.csv'yi script'in beklediği yere koy\n",
"import os, shutil\n",
"os.makedirs('/content/ai-engine/data', exist_ok=True)\n",
"shutil.copy('/content/training_data.csv', '/content/ai-engine/data/training_data.csv')\n",
"print('Yerleştirildi')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# HÜCRE 5 — Eğitimi başlat (her 5 trial'da bir ilerleme gösterir)\n",
"import subprocess, os\n",
"\n",
"proc = subprocess.Popen(\n",
" ['python', 'scripts/train_v25_pro.py'],\n",
" stdout=subprocess.PIPE,\n",
" stderr=subprocess.STDOUT,\n",
" text=True,\n",
" cwd='/content/ai-engine',\n",
" env={**os.environ, 'PYTHONPATH': '/content/ai-engine'}\n",
")\n",
"\n",
"for line in proc.stdout:\n",
" print(line, end='', flush=True)\n",
"\n",
"proc.wait()\n",
"print('\\nEĞİTİM BİTTİ!')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# HÜCRE 6 — Modelleri Drive'a kaydet\n",
"import shutil, os\n",
"os.makedirs('/content/drive/MyDrive/iddaai/models_v25', exist_ok=True)\n",
"shutil.copytree(\n",
" '/content/ai-engine/models/v25',\n",
" '/content/drive/MyDrive/iddaai/models_v25',\n",
" dirs_exist_ok=True\n",
")\n",
"print('Modeller Drive a kaydedildi: MyDrive/iddaai/models_v25/')"
]
}
],
"metadata": {
"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"},
"language_info": {"name": "python", "version": "3.10.0"}
},
"nbformat": 4,
"nbformat_minor": 4
}
+33
View File
@@ -101,6 +101,32 @@ FEATURES = [
"home_top_scorer_form", "away_top_scorer_form",
"home_avg_player_exp", "away_avg_player_exp",
"home_goals_diversity", "away_goals_diversity",
# V27 H2H Expanded (4)
"h2h_home_goals_avg", "h2h_away_goals_avg",
"h2h_recent_trend", "h2h_venue_advantage",
# V27 Rolling Stats (13)
"home_rolling5_goals", "home_rolling5_conceded",
"home_rolling10_goals", "home_rolling10_conceded",
"home_rolling20_goals", "home_rolling20_conceded",
"away_rolling5_goals", "away_rolling5_conceded",
"away_rolling10_goals", "away_rolling10_conceded",
"home_rolling5_cs", "away_rolling5_cs",
# V27 Venue Stats (4)
"home_venue_goals", "home_venue_conceded",
"away_venue_goals", "away_venue_conceded",
# V27 Goal Trend (2)
"home_goal_trend", "away_goal_trend",
# V27 Calendar (5)
"home_days_rest", "away_days_rest",
"match_month", "is_season_start", "is_season_end",
# V27 Interaction (6)
"attack_vs_defense_home", "attack_vs_defense_away",
"xg_diff", "form_momentum_interaction",
"elo_form_consistency", "upset_x_elo_gap",
# V27 League Expanded (5)
"league_home_win_rate", "league_draw_rate",
"league_btts_rate", "league_ou25_rate",
"league_reliability_score",
]
MARKET_CONFIGS = [
@@ -295,12 +321,18 @@ def train_market(df, target_col, market_name, num_class, n_trials):
print(f"[INFO] Split: train={len(X_train)} val={len(X_val)} cal={len(X_cal)} test={len(X_test)}")
def _cb(study, trial):
if trial.number % 5 == 0 or trial.number == n_trials - 1:
best = study.best_value if study.best_trial else float('inf')
print(f" [{trial.number+1:>3}/{n_trials}] loss={trial.value:.4f} | best={best:.4f}", flush=True)
# ── Phase 1: Optuna XGBoost ──────────────────────────────────
print(f"\n[OPTUNA] XGBoost tuning ({n_trials} trials)...")
xgb_study = optuna.create_study(direction="minimize", sampler=TPESampler(seed=42))
xgb_study.optimize(
lambda trial: xgb_objective(trial, X_train, y_train, X_val, y_val, num_class),
n_trials=n_trials,
callbacks=[_cb],
)
xgb_best = xgb_study.best_params
print(f"[OK] XGB best logloss: {xgb_study.best_value:.4f}")
@@ -311,6 +343,7 @@ def train_market(df, target_col, market_name, num_class, n_trials):
lgb_study.optimize(
lambda trial: lgb_objective(trial, X_train, y_train, X_val, y_val, num_class),
n_trials=n_trials,
callbacks=[_cb],
)
lgb_best = lgb_study.best_params
print(f"[OK] LGB best logloss: {lgb_study.best_value:.4f}")
File diff suppressed because one or more lines are too long
+49 -10
View File
@@ -19,11 +19,26 @@ class BettingBrain:
SOFT_DIVERGENCE = 0.14
EXTREME_MODEL_PROB = 0.85
EXTREME_GAP = 0.30
# Vetoes that is_value_sniper bypasses (does NOT bypass odds_below_minimum)
SNIPER_BYPASSABLE_VETOES = {"calibrated_confidence_too_low", "play_score_too_low"}
# Trap market: market implied probability massively exceeds historical band hit rate
SNIPER_BYPASSABLE_VETOES = {"play_score_too_low"}
TRAP_MARKET_GAP = 0.10
MARKET_MIN_CONFIDENCE = {
"MS": 45.0,
"DC": 55.0,
"OU25": 48.0,
"OU15": 55.0,
"OU35": 42.0,
"BTTS": 48.0,
"HT": 55.0,
"HTFT": 65.0,
"OE": 55.0,
"CARDS": 50.0,
"HT_OU05": 55.0,
"HT_OU15": 50.0,
}
SNIPER_BLOCKED_MARKETS = {"HT", "HTFT", "OE", "CARDS", "HT_OU05", "HT_OU15"}
MARKET_PRIORS = {
"DC": 4.0,
"OU15": 3.0,
@@ -31,10 +46,10 @@ class BettingBrain:
"BTTS": 0.0,
"MS": -2.0,
"OU35": -2.0,
"HT": -6.0,
"HTFT": -12.0,
"CARDS": -5.0,
"OE": -8.0,
"HT": -10.0,
"HTFT": -18.0,
"CARDS": -8.0,
"OE": -12.0,
}
def judge(self, package: Dict[str, Any]) -> Dict[str, Any]:
@@ -182,8 +197,10 @@ class BettingBrain:
issues.append("base_model_not_playable")
is_value_sniper = bool(row.get("is_value_sniper"))
if market in self.SNIPER_BLOCKED_MARKETS:
is_value_sniper = False
if is_value_sniper:
score += 35.0
score += 20.0
positives.append("value_sniper_override")
score += max(0.0, min(20.0, calibrated_conf * 0.22))
@@ -197,9 +214,31 @@ class BettingBrain:
risk = str((package.get("risk") or {}).get("level") or "MEDIUM").upper()
score += {"LOW": 5.0, "MEDIUM": 0.0, "HIGH": -12.0, "EXTREME": -22.0}.get(risk, -4.0)
# League reliability penalty: weak leagues produce unreliable raw probabilities.
# odds_reliability is pre-computed per-league from historical Brier score analysis.
odds_rel = self._safe_float(row.get("odds_reliability"), 0.35) or 0.35
if odds_rel < 0.30:
score -= 22.0
issues.append("very_low_reliability_league")
if market in {"MS", "DC", "OU25", "BTTS"} and not is_value_sniper:
vetoes.append("low_reliability_league_hard_block")
elif odds_rel < 0.45:
score -= 12.0
issues.append("low_reliability_league")
elif odds_rel < 0.55:
score -= 5.0
# Inferred features penalty: when ELO/form/H2H come from live enrichment
# (not pre-computed table), statistical quality is unknown — penalise hard.
dq_flags = list(data_quality.get("flags") or [])
if "ai_features_inferred_from_history" in dq_flags:
score -= 18.0
issues.append("inferred_statistical_features")
if odds < self.MIN_ODDS:
vetoes.append("odds_below_minimum")
if calibrated_conf < 38.0 and not is_value_sniper:
min_conf = self.MARKET_MIN_CONFIDENCE.get(market, 45.0)
if calibrated_conf < min_conf:
vetoes.append("calibrated_confidence_too_low")
if play_score < 50.0 and not is_value_sniper:
vetoes.append("play_score_too_low")
@@ -270,7 +309,7 @@ class BettingBrain:
score -= 24.0
vetoes.append("extreme_probability_without_evidence")
if market in {"HT", "HTFT", "OE"} and score < 86.0 and not is_value_sniper:
if market in {"HT", "HTFT", "OE"} and score < 86.0:
vetoes.append("volatile_market_requires_exceptional_evidence")
# Sniper override: bypass eligible vetoes when value sniper triggered
+7 -1
View File
@@ -62,7 +62,7 @@ def generate_match_commentary(package: Dict[str, Any]) -> Dict[str, Any]:
)
# ── Quick notes ───────────────────────────────────────────────
notes = _build_notes(market_board, v27_engine, score_pred, risk, home, away)
notes = _build_notes(market_board, v27_engine, score_pred, risk, home, away, league_name=match_info.get("league", ""))
# ── Contradiction detection ───────────────────────────────────
contradictions = _detect_contradictions(market_board, v27_engine, package)
@@ -206,11 +206,17 @@ def _build_notes(
risk: Dict[str, Any],
home: str,
away: str,
league_name: str = "",
) -> List[str]:
notes: List[str] = []
triple_value = v27_engine.get("triple_value") or {}
odds_band = v27_engine.get("odds_band") or {}
# Cup game note — model uses league statistics; cup dynamics differ
_cup_kws = ("kupa", "cup", "coupe", "copa", "pokal", "ziraat", "trophy", "shield", "super cup", "süper kupa")
if any(kw in (league_name or "").lower() for kw in _cup_kws):
notes.append("⚠️ Kupa maçı: ev avantajı zayıf, rotasyon ve düşük motivasyon riski var")
# MS note
ms = market_board.get("MS") or {}
ms_conf = float(ms.get("confidence", 0) or 0)
@@ -0,0 +1,28 @@
"""Orchestrator package — mixin modules split from the original 5786-line
monolithic SingleMatchOrchestrator. Behaviour is identical to the pre-refactor
version; only file layout has changed.
"""
from services.orchestrator.data_loader import DataLoaderMixin
from services.orchestrator.feature_builder import FeatureBuilderMixin
from services.orchestrator.prediction import PredictionMixin
from services.orchestrator.basketball import BasketballMixin
from services.orchestrator.upper_brain import UpperBrainMixin
from services.orchestrator.htms import HtmsMixin
from services.orchestrator.coupon import CouponMixin
from services.orchestrator.reversal import ReversalMixin
from services.orchestrator.market_board import MarketBoardMixin
from services.orchestrator.utils import UtilsMixin
__all__ = [
"DataLoaderMixin",
"FeatureBuilderMixin",
"PredictionMixin",
"BasketballMixin",
"UpperBrainMixin",
"HtmsMixin",
"CouponMixin",
"ReversalMixin",
"MarketBoardMixin",
"UtilsMixin",
]
@@ -0,0 +1,538 @@
"""Basketball Mixin — basketball-specific market construction.
Auto-extracted mixin module split from services/single_match_orchestrator.py.
All methods here are composed into SingleMatchOrchestrator via inheritance.
`self` attributes (self.dsn, self.enrichment, self.v25_predictor, etc.) are
initialised in the main __init__.
"""
from __future__ import annotations
import json
import re
import time
import math
import os
import pickle
from collections import defaultdict
from typing import Any, Dict, List, Optional, Set, Tuple, overload
import pandas as pd
import numpy as np
import psycopg2
from psycopg2.extras import RealDictCursor
from data.db import get_clean_dsn
from schemas.prediction import FullMatchPrediction
from schemas.match_data import MatchData
from models.v25_ensemble import V25Predictor, get_v25_predictor
try:
from models.v27_predictor import V27Predictor, compute_divergence, compute_value_edge
except ImportError:
class V27Predictor: # type: ignore[no-redef]
def __init__(self): self.models = {}
def load_models(self): return False
def predict_all(self, features): return {}
def compute_divergence(*args, **kwargs):
return {}
def compute_value_edge(*args, **kwargs):
return {}
from features.odds_band_analyzer import OddsBandAnalyzer
try:
from models.basketball_v25 import (
BasketballMatchPrediction,
get_basketball_v25_predictor,
)
except ImportError:
BasketballMatchPrediction = Any # type: ignore[misc]
def get_basketball_v25_predictor() -> Any:
raise ImportError("Basketball predictor is not available")
from core.engines.player_predictor import PlayerPrediction, get_player_predictor
from services.feature_enrichment import FeatureEnrichmentService
from services.betting_brain import BettingBrain
from services.v26_shadow_engine import V26ShadowEngine, get_v26_shadow_engine
from services.match_commentary import generate_match_commentary
from utils.top_leagues import load_top_league_ids
from utils.league_reliability import load_league_reliability
from config.config_loader import build_threshold_dict, get_threshold_default
from models.calibration import get_calibrator
class BasketballMixin:
def _build_basketball_prediction_package(
self,
data: MatchData,
prediction: Dict[str, Any],
) -> Dict[str, Any]:
quality = self._compute_data_quality(data)
raw_market_rows = self._build_basketball_market_rows(data, prediction)
market_rows = [
self._decorate_basketball_market_row(data, prediction, quality, row)
for row in raw_market_rows
]
market_rows.sort(
key=lambda row: (
1 if row.get("playable") else 0,
float(row.get("play_score", 0.0)),
),
reverse=True,
)
playable_rows = [row for row in market_rows if row.get("playable")]
MIN_ODDS = 1.30
playable_with_odds = [
row for row in playable_rows
if float(row.get("odds", 0.0)) >= MIN_ODDS
]
if playable_with_odds:
playable_with_odds.sort(
key=lambda r: (
float(r.get("ev_edge", 0.0)),
float(r.get("play_score", 0.0)),
),
reverse=True,
)
main_pick = playable_with_odds[0]
main_pick["is_guaranteed"] = False
main_pick["pick_reason"] = "positive_ev_pick"
else:
fallback_with_odds = [r for r in market_rows if float(r.get("odds", 0.0)) > 1.0]
fallback_with_odds.sort(key=lambda r: float(r.get("play_score", 0.0)), reverse=True)
main_pick = fallback_with_odds[0] if fallback_with_odds else (market_rows[0] if market_rows else None)
if main_pick:
main_pick["is_guaranteed"] = False
main_pick["playable"] = False
main_pick["stake_units"] = 0.0
main_pick["bet_grade"] = "PASS"
main_pick["pick_reason"] = "no_playable_value_found"
supporting: List[Dict[str, Any]] = []
for row in market_rows:
if main_pick and row["market"] == main_pick["market"] and row["pick"] == main_pick["pick"]:
continue
supporting.append(row)
supporting = supporting[:5]
bet_summary = [self._to_bet_summary_item(row) for row in market_rows]
scenarios = self._build_basketball_scenarios(prediction)
reasons = self._build_basketball_reasoning_factors(data, prediction, quality)
aggressive_pick: Optional[Dict[str, Any]] = None
risk_level = prediction.get("risk_level", "MEDIUM")
risk_score = float(prediction.get("risk_score", 50.0) or 50.0)
# Build aggressive pick if available from Spreak in market_board
board = prediction.get("market_board", {})
if risk_level in ("LOW", "MEDIUM") and "Spread" in board:
spr_data = board["Spread"]
probs = list(spr_data.values())
keys = list(spr_data.keys())
if len(probs) >= 2:
prob_a = float(str(probs[0]).replace('%', '')) / 100.0
prob_h = float(str(probs[1]).replace('%', '')) / 100.0
max_prob = max(prob_a, prob_h)
spr_pick = "Home" if prob_h >= prob_a else "Away"
conf = 50.0
line_str = "Spread"
for b in prediction.get("bet_summary", []):
if b["market"] == "Spread":
conf = float(b["confidence"])
line_str = b["pick"]
aggressive_pick = {
"market": "SPREAD",
"pick": line_str,
"probability": round(max_prob, 4),
"confidence": round(conf, 1),
"odds": round(
float(
data.odds_data.get(
"spread_h" if spr_pick == "Home" else "spread_a", 0.0
)
),
2,
),
}
scores = prediction.get("score_prediction", {})
home_score = scores.get("home_expected", 80.0)
away_score = scores.get("away_expected", 80.0)
total_score = scores.get("total_expected", 160.0)
mb_out = {
"PLAYER_TOP": board.get("PLAYER_TOP", []),
}
if "ML" in board:
ml_data = board["ML"]
keys = list(ml_data.keys())
if len(keys) >= 2:
mb_out["ML"] = {
"pick": prediction.get("main_pick", ""),
"confidence": 60.0,
"probs": {
"1": round(float(str(ml_data[keys[0]]).replace('%', '')) / 100.0, 4),
"2": round(float(str(ml_data[keys[1]]).replace('%', '')) / 100.0, 4),
},
}
if "Totals" in board:
tot_data = board["Totals"]
keys = list(tot_data.keys())
if len(keys) >= 2:
mb_out["TOTAL"] = {
"line": 160.5,
"pick": prediction.get("main_pick", ""),
"confidence": 60.0,
"probs": {
"under": round(float(str(tot_data[keys[0]]).replace('%', '')) / 100.0, 4),
"over": round(float(str(tot_data[keys[1]]).replace('%', '')) / 100.0, 4),
},
}
if "Spread" in board:
spr_data = board["Spread"]
keys = list(spr_data.keys())
if len(keys) >= 2:
mb_out["SPREAD"] = {
"line_home": 0.0,
"pick": prediction.get("main_pick", ""),
"confidence": 60.0,
"probs": {
"away_cover": round(float(str(spr_data[keys[0]]).replace('%', '')) / 100.0, 4),
"home_cover": round(float(str(spr_data[keys[1]]).replace('%', '')) / 100.0, 4),
},
}
return {
"model_version": str(prediction.get("engine_version") or "v28.main.basketball"),
"match_info": {
"match_id": data.match_id,
"match_name": f"{data.home_team_name} vs {data.away_team_name}",
"home_team": data.home_team_name,
"away_team": data.away_team_name,
"league": data.league_name,
"match_date_ms": data.match_date_ms,
"sport": data.sport,
},
"data_quality": quality,
"risk": {
"level": risk_level,
"score": round(risk_score, 1),
"is_surprise_risk": False,
"surprise_type": "",
"warnings": [],
},
"engine_breakdown": prediction.get("engine_breakdown")
or {
"team": 60.0,
"player": 60.0,
"odds": 80.0,
"referee": 50.0,
},
"main_pick": main_pick,
"bet_advice": {
"playable": bool(main_pick and main_pick.get("playable")),
"suggested_stake_units": float(main_pick.get("stake_units", 0.0))
if (main_pick and main_pick.get("playable"))
else 0.0,
"reason": "playable_pick_found"
if (main_pick and main_pick.get("playable"))
else "no_bet_conditions_met",
},
"bet_summary": bet_summary,
"supporting_picks": supporting,
"aggressive_pick": aggressive_pick,
"scenario_top5": scenarios,
"score_prediction": {
"ft": f"{int(round(home_score))}-{int(round(away_score))}",
"ht": f"{int(round(home_score * 0.52))}-{int(round(away_score * 0.52))}",
"xg_home": round(float(home_score), 2),
"xg_away": round(float(away_score), 2),
"xg_total": round(float(total_score), 2),
},
"market_board": mb_out,
"reasoning_factors": reasons,
}
def _build_basketball_market_rows(
self,
data: MatchData,
pred: Dict[str, Any],
) -> List[Dict[str, Any]]:
odds = data.odds_data
market_board = pred.get("market_board", {})
# 1. Moneyline
ml_row = None
if "ML" in market_board:
ml_data = market_board["ML"]
# To get specific pick (MS 1 or MS 2), look at the probability values
probs = list(ml_data.values())
keys = list(ml_data.keys())
if len(probs) >= 2:
prob_1 = float(str(probs[0]).replace('%', '')) / 100.0
prob_2 = float(str(probs[1]).replace('%', '')) / 100.0
max_prob = max(prob_1, prob_2)
# Derive pick string
ml_pick_val = keys[0] if prob_1 >= prob_2 else keys[1]
ml_pick = "1" if "1" in ml_pick_val else "2"
ml_odd_key = "ml_h" if ml_pick == "1" else "ml_a"
# Find confidence from bet summary
conf = 50.0
for b in pred.get("bet_summary", []):
if b["market"] == "Moneyline": conf = float(b["confidence"])
ml_row = {
"market": "ML",
"pick": ml_pick,
"probability": round(max_prob, 4),
"confidence": round(conf, 1),
"odds": round(float(odds.get(ml_odd_key, 0.0)), 2),
}
# 2. Totals
tot_row = None
if "Totals" in market_board:
tot_data = market_board["Totals"]
probs = list(tot_data.values())
keys = list(tot_data.keys())
if len(probs) >= 2:
prob_u = float(str(probs[0]).replace('%', '')) / 100.0
prob_o = float(str(probs[1]).replace('%', '')) / 100.0
max_prob = max(prob_u, prob_o)
pick_str = keys[1] if prob_o >= prob_u else keys[0]
tot_pick = "Over" if "Over" in pick_str else "Under"
line_val = pick_str.replace("Over", "").replace("Under", "").strip()
conf = 50.0
for b in pred.get("bet_summary", []):
if b["market"] == "Totals": conf = float(b["confidence"])
tot_row = {
"market": "TOTAL",
"pick": f"{tot_pick} {line_val}",
"probability": round(max_prob, 4),
"confidence": round(conf, 1),
"odds": round(float(odds.get("tot_o" if tot_pick == "Over" else "tot_u", 0.0)), 2),
}
# 3. Spread
spr_row = None
if "Spread" in market_board:
spr_data = market_board["Spread"]
probs = list(spr_data.values())
keys = list(spr_data.keys())
if len(probs) >= 2:
prob_a = float(str(probs[0]).replace('%', '')) / 100.0
prob_h = float(str(probs[1]).replace('%', '')) / 100.0
max_prob = max(prob_a, prob_h)
spr_pick = "Home" if prob_h >= prob_a else "Away"
conf = 50.0
line_str = ""
for b in pred.get("bet_summary", []):
if b["market"] == "Spread":
conf = float(b["confidence"])
line_str = b["pick"]
spr_row = {
"market": "SPREAD",
"pick": spr_pick + " " + line_str,
"probability": round(max_prob, 4),
"confidence": round(conf, 1),
"odds": round(float(odds.get("spread_h" if spr_pick == "Home" else "spread_a", 0.0)), 2),
}
# Return valid rows
rows = []
if ml_row: rows.append(ml_row)
if tot_row: rows.append(tot_row)
if spr_row: rows.append(spr_row)
return rows
def _decorate_basketball_market_row(
self,
data: MatchData,
prediction: Dict[str, Any],
quality: Dict[str, Any],
row: Dict[str, Any],
) -> Dict[str, Any]:
market = str(row.get("market") or "")
raw_conf = float(row.get("confidence") or 0.0)
prob = float(row.get("probability") or 0.0)
odd = float(row.get("odds") or 0.0)
calibration = {"ML": 0.90, "TOTAL": 0.88, "SPREAD": 0.86}.get(market, 0.88)
min_conf = {"ML": 55.0, "TOTAL": 56.0, "SPREAD": 55.0}.get(market, 55.0)
calibrated_conf = max(1.0, min(99.0, raw_conf * calibration))
implied_prob = (1.0 / odd) if odd > 1.0 else 0.0
edge = prob - implied_prob if implied_prob > 0 else 0.0
risk_level = str(prediction.get("risk_level", "MEDIUM")).upper()
risk_penalty = {"LOW": 0.0, "MEDIUM": 3.0, "HIGH": 8.0, "EXTREME": 12.0}.get(
risk_level,
4.0,
)
quality_label = str(quality.get("label") or "MEDIUM").upper()
quality_penalty = {"HIGH": 0.0, "MEDIUM": 2.0, "LOW": 6.0}.get(
quality_label,
4.0,
)
base_score = calibrated_conf + (edge * 100.0)
play_score = max(0.0, min(100.0, base_score - risk_penalty - quality_penalty))
reasons: List[str] = []
playable = True
min_play_score = self.market_min_play_score.get(market, 68.0)
min_edge = self.market_min_edge.get(market, 0.02)
if calibrated_conf < min_conf:
playable = False
reasons.append("below_calibrated_conf_threshold")
if market in self.ODDS_REQUIRED_MARKETS and odd <= 1.01:
playable = False
reasons.append("market_odds_missing")
if risk_level in ("HIGH", "EXTREME") and quality_label == "LOW":
playable = False
reasons.append("high_risk_low_data_quality")
if odd > 1.0 and edge < -0.05:
playable = False
reasons.append("negative_model_edge")
if not reasons:
reasons.append("market_passed_all_gates")
if not playable:
grade = "PASS"
stake_units = 0.0
elif play_score >= 72:
grade = "A"
stake_units = 1.0
elif play_score >= 61:
grade = "B"
stake_units = 0.5
else:
grade = "C"
stake_units = 0.25
out = dict(row)
out.update(
{
"raw_confidence": round(raw_conf, 1),
"calibrated_confidence": round(calibrated_conf, 1),
"min_required_confidence": round(min_conf, 1),
"edge": round(edge, 4),
"play_score": round(play_score, 1),
"playable": playable,
"bet_grade": grade,
"stake_units": stake_units,
"decision_reasons": reasons[:3],
},
)
return out
def _build_basketball_scenarios(
self,
prediction: Dict[str, Any],
) -> List[Dict[str, Any]]:
scores = prediction.get("score_prediction", {})
home = float(scores.get("home_expected", 80.0))
away = float(scores.get("away_expected", 80.0))
templates = [
(0.00, 0.23),
(+3.5, 0.20),
(-3.5, 0.19),
(+6.0, 0.16),
(-6.0, 0.14),
]
out: List[Dict[str, Any]] = []
for delta, prob in templates:
h = int(round(home + delta))
a = int(round(away - delta))
out.append({"score": f"{h}-{a}", "prob": prob})
return out
def _build_basketball_reasoning_factors(
self,
data: MatchData,
prediction: Dict[str, Any],
quality: Dict[str, Any],
) -> List[str]:
factors: List[str] = []
# XGBoost models are odds-aware, weight it heavily
factors.append("market_signal_dominant")
if quality.get("label") in ("HIGH", "MEDIUM"):
factors.append("player_form_signal_strong")
else:
factors.append("player_form_signal_limited")
if prediction.get("is_surprise_risk"):
factors.append("upset_risk_detected")
if quality.get("label") == "LOW":
factors.append("limited_data_confidence")
factors.append("basketball_points_model")
return factors
def _compute_basketball_data_quality(self, data: MatchData) -> Dict[str, Any]:
flags: List[str] = []
has_ml = float(data.odds_data.get("ml_h", 0.0)) > 1.0 and float(data.odds_data.get("ml_a", 0.0)) > 1.0
has_total = (
float(data.odds_data.get("tot_line", 0.0)) > 0.0
and float(data.odds_data.get("tot_o", 0.0)) > 1.0
and float(data.odds_data.get("tot_u", 0.0)) > 1.0
)
has_spread = (
"spread_home_line" in data.odds_data
and float(data.odds_data.get("spread_h", 0.0)) > 1.0
and float(data.odds_data.get("spread_a", 0.0)) > 1.0
)
odds_components = [has_ml, has_total, has_spread]
odds_score = sum(1.0 for x in odds_components if x) / 3.0
if not has_ml:
flags.append("missing_moneyline_odds")
if not has_total:
flags.append("missing_total_odds")
if not has_spread:
flags.append("missing_spread_odds")
# Basketball live lineup/referee coverage is structurally lower in this project.
# Keep neutral baseline and rely mostly on odds depth.
lineup_score = 0.7
ref_score = 0.7
total_score = (odds_score * 0.75) + (lineup_score * 0.15) + (ref_score * 0.10)
if total_score >= 0.75:
label = "HIGH"
elif total_score >= 0.52:
label = "MEDIUM"
else:
label = "LOW"
return {
"label": label,
"score": round(total_score, 3),
"home_lineup_count": len(data.home_lineup or []),
"away_lineup_count": len(data.away_lineup or []),
"lineup_source": data.lineup_source,
"flags": flags,
}
+444
View File
@@ -0,0 +1,444 @@
"""Coupon Mixin — multi-match coupon builder + daily bankers.
Auto-extracted mixin module split from services/single_match_orchestrator.py.
All methods here are composed into SingleMatchOrchestrator via inheritance.
`self` attributes (self.dsn, self.enrichment, self.v25_predictor, etc.) are
initialised in the main __init__.
"""
from __future__ import annotations
import json
import re
import time
import math
import os
import pickle
from collections import defaultdict
from typing import Any, Dict, List, Optional, Set, Tuple, overload
import pandas as pd
import numpy as np
import psycopg2
from psycopg2.extras import RealDictCursor
from data.db import get_clean_dsn
from schemas.prediction import FullMatchPrediction
from schemas.match_data import MatchData
from models.v25_ensemble import V25Predictor, get_v25_predictor
try:
from models.v27_predictor import V27Predictor, compute_divergence, compute_value_edge
except ImportError:
class V27Predictor: # type: ignore[no-redef]
def __init__(self): self.models = {}
def load_models(self): return False
def predict_all(self, features): return {}
def compute_divergence(*args, **kwargs):
return {}
def compute_value_edge(*args, **kwargs):
return {}
from features.odds_band_analyzer import OddsBandAnalyzer
try:
from models.basketball_v25 import (
BasketballMatchPrediction,
get_basketball_v25_predictor,
)
except ImportError:
BasketballMatchPrediction = Any # type: ignore[misc]
def get_basketball_v25_predictor() -> Any:
raise ImportError("Basketball predictor is not available")
from core.engines.player_predictor import PlayerPrediction, get_player_predictor
from services.feature_enrichment import FeatureEnrichmentService
from services.betting_brain import BettingBrain
from services.v26_shadow_engine import V26ShadowEngine, get_v26_shadow_engine
from services.match_commentary import generate_match_commentary
from utils.top_leagues import load_top_league_ids
from utils.league_reliability import load_league_reliability
from config.config_loader import build_threshold_dict, get_threshold_default
from models.calibration import get_calibrator
class CouponMixin:
def build_coupon(
self,
match_ids: List[str],
strategy: str = "BALANCED",
max_matches: Optional[int] = None,
min_confidence: Optional[float] = None,
) -> Dict[str, Any]:
strategy_name = (strategy or "BALANCED").upper()
strategy_config = {
"SAFE": {"max_matches": 4, "min_conf": 66.0},
"BALANCED": {"max_matches": 5, "min_conf": 58.0},
"AGGRESSIVE": {"max_matches": 8, "min_conf": 52.0},
"VALUE": {"max_matches": 8, "min_conf": 48.0},
"MIRACLE": {"max_matches": 10, "min_conf": 44.0},
}
cfg = strategy_config.get(strategy_name, strategy_config["BALANCED"])
max_allowed = max_matches if max_matches is not None else cfg["max_matches"]
min_conf = min_confidence if min_confidence is not None else cfg["min_conf"]
candidates: List[Dict[str, Any]] = []
rejected: List[Dict[str, Any]] = []
for match_id in match_ids:
package = self.analyze_match(match_id)
if not package:
rejected.append({"match_id": match_id, "reason": "match_not_found"})
continue
risk_level = str(package.get("risk", {}).get("level", "MEDIUM")).upper()
data_quality = str(package.get("data_quality", {}).get("label", "MEDIUM")).upper()
match_candidates: List[Dict[str, Any]] = []
seen_keys: Set[Tuple[str, str]] = set()
bet_summary = package.get("bet_summary") or []
raw_picks = []
for candidate in [
package.get("main_pick"),
package.get("value_pick"),
*(package.get("supporting_picks") or []),
]:
if isinstance(candidate, dict):
raw_picks.append(candidate)
for candidate in bet_summary:
if isinstance(candidate, dict):
raw_picks.append(candidate)
for candidate in raw_picks:
market = str(candidate.get("market") or "")
pick = str(candidate.get("pick") or "")
if not market or not pick:
continue
dedupe_key = (market, pick)
if dedupe_key in seen_keys:
continue
seen_keys.add(dedupe_key)
calibrated_conf = float(
candidate.get("calibrated_confidence", candidate.get("confidence", 0.0))
or 0.0
)
odds = float(candidate.get("odds", 0.0) or 0.0)
probability = float(candidate.get("probability", 0.0) or 0.0)
play_score = float(candidate.get("play_score", 0.0) or 0.0)
ev_edge = float(
candidate.get("ev_edge", candidate.get("edge", 0.0)) or 0.0
)
playable = bool(candidate.get("playable"))
bet_grade = str(candidate.get("bet_grade", "PASS")).upper()
if odds <= 1.01:
continue
strict_candidate = (
playable
and calibrated_conf >= min_conf
and bet_grade != "PASS"
)
if strategy_name == "SAFE":
strict_pass = strict_candidate
if odds > 2.35 or play_score < 60.0 or risk_level in {"HIGH", "EXTREME"}:
strict_pass = False
if data_quality == "LOW" or ev_edge < 0.01 or bet_grade == "PASS":
strict_pass = False
strict_score = (
calibrated_conf * 1.10
+ play_score * 0.90
+ (ev_edge * 180.0)
- abs(odds - 1.55) * 12.0
)
soft_pass = (
calibrated_conf >= max(min_conf - 10.0, 56.0)
and odds <= 2.70
and play_score >= 50.0
and risk_level != "EXTREME"
and data_quality != "LOW"
and ev_edge >= -0.01
)
soft_score = (
calibrated_conf
+ play_score * 0.85
+ (ev_edge * 140.0)
- abs(odds - 1.65) * 9.0
)
elif strategy_name == "BALANCED":
strict_pass = strict_candidate
if odds > 3.40 or play_score < 52.0 or risk_level == "EXTREME":
strict_pass = False
if ev_edge < 0.0 or bet_grade == "PASS":
strict_pass = False
strict_score = (
calibrated_conf
+ play_score
+ (ev_edge * 220.0)
+ min(odds, 3.0) * 3.0
)
soft_pass = (
calibrated_conf >= max(min_conf - 10.0, 48.0)
and odds <= 4.20
and play_score >= 44.0
and risk_level != "EXTREME"
and ev_edge >= -0.015
)
soft_score = (
calibrated_conf * 0.95
+ play_score * 0.90
+ (ev_edge * 180.0)
+ min(odds, 3.5) * 3.5
)
elif strategy_name == "AGGRESSIVE":
strict_pass = strict_candidate
if odds < 1.35 or odds > 7.50 or play_score < 46.0:
strict_pass = False
if risk_level == "EXTREME" or bet_grade == "PASS":
strict_pass = False
strict_score = (
calibrated_conf * 0.85
+ play_score * 0.75
+ (ev_edge * 260.0)
+ min(odds, 6.0) * 7.0
)
soft_pass = (
calibrated_conf >= max(min_conf - 10.0, 42.0)
and 1.25 <= odds <= 8.50
and play_score >= 40.0
and risk_level != "EXTREME"
and ev_edge >= -0.02
)
soft_score = (
calibrated_conf * 0.80
+ play_score * 0.70
+ (ev_edge * 210.0)
+ min(odds, 7.0) * 7.5
)
elif strategy_name == "VALUE":
strict_pass = strict_candidate
if odds < 1.55 or play_score < 48.0 or ev_edge < 0.03:
strict_pass = False
if risk_level == "EXTREME" or data_quality == "LOW" or bet_grade == "PASS":
strict_pass = False
strict_score = (
calibrated_conf * 0.75
+ play_score * 0.85
+ (ev_edge * 320.0)
+ min(odds, 6.5) * 8.0
)
soft_pass = (
calibrated_conf >= max(min_conf - 10.0, 40.0)
and odds >= 1.35
and play_score >= 40.0
and risk_level != "EXTREME"
and data_quality != "LOW"
and ev_edge >= 0.0
)
soft_score = (
calibrated_conf * 0.70
+ play_score * 0.80
+ (ev_edge * 260.0)
+ min(odds, 7.0) * 7.0
)
else: # MIRACLE
strict_pass = strict_candidate
if odds < 2.10 or play_score < 40.0 or ev_edge < 0.01:
strict_pass = False
if risk_level == "EXTREME" or bet_grade == "PASS":
strict_pass = False
strict_score = (
calibrated_conf * 0.55
+ play_score * 0.60
+ (ev_edge * 260.0)
+ min(odds, 10.0) * 10.0
)
soft_pass = (
calibrated_conf >= max(min_conf - 10.0, 36.0)
and odds >= 1.60
and play_score >= 34.0
and risk_level != "EXTREME"
and ev_edge >= -0.02
)
soft_score = (
calibrated_conf * 0.50
+ play_score * 0.55
+ (ev_edge * 200.0)
+ min(odds, 10.0) * 9.0
)
fallback_pass = (
calibrated_conf >= max(min_conf - 14.0, 34.0)
and odds >= 1.20
and play_score >= 32.0
and risk_level != "EXTREME"
)
fallback_score = (
calibrated_conf * 0.60
+ play_score * 0.65
+ (ev_edge * 120.0)
+ min(odds, 6.0) * 4.0
)
strategy_score = strict_score
selection_mode = "strict"
if strict_pass:
pass
elif soft_pass:
strategy_score = soft_score
selection_mode = "soft"
elif fallback_pass:
strategy_score = fallback_score
selection_mode = "fallback"
else:
continue
match_candidates.append(
{
"match_id": package["match_info"]["match_id"],
"match_name": package["match_info"]["match_name"],
"market": market,
"pick": pick,
"probability": probability,
"confidence": calibrated_conf,
"odds": odds,
"risk_level": risk_level,
"data_quality": data_quality,
"bet_grade": bet_grade,
"playable": playable,
"play_score": round(play_score, 1),
"ev_edge": round(ev_edge, 4),
"selection_mode": selection_mode,
"strategy_score": round(strategy_score, 3),
}
)
if not match_candidates:
rejected.append(
{
"match_id": match_id,
"reason": "no_strategy_fit",
"threshold": min_conf,
}
)
continue
match_candidates.sort(
key=lambda item: (
float(item.get("strategy_score", 0.0)),
float(item.get("confidence", 0.0)),
float(item.get("ev_edge", 0.0)),
),
reverse=True,
)
candidates.append(match_candidates[0])
candidates.sort(
key=lambda item: (
float(item.get("strategy_score", 0.0)),
float(item.get("confidence", 0.0)),
float(item.get("ev_edge", 0.0)),
),
reverse=True,
)
selected = candidates[: max(1, max_allowed)]
total_odds = 1.0
win_probability = 1.0
for pick in selected:
odd = float(pick.get("odds") or 1.0)
prob = float(pick.get("probability") or 0.0)
total_odds *= odd if odd > 1.0 else 1.0
win_probability *= prob
return {
"strategy": strategy_name,
"generated_at": __import__("datetime").datetime.utcnow().isoformat() + "Z",
"match_count": len(selected),
"bets": selected,
"total_odds": round(total_odds, 2),
"expected_win_rate": round(win_probability, 4),
"rejected_matches": rejected,
}
def get_daily_bankers_live(self, count: int = 3) -> List[Dict[str, Any]]:
with psycopg2.connect(self.dsn) as conn:
with conn.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute(
"""
SELECT id
FROM live_matches
WHERE mst_utc > EXTRACT(EPOCH FROM NOW()) * 1000
AND mst_utc < EXTRACT(EPOCH FROM NOW() + INTERVAL '24 hours') * 1000
ORDER BY mst_utc ASC
LIMIT 60
""",
)
ids = [row["id"] for row in cur.fetchall()]
if not ids:
return []
coupon = self.build_coupon(
match_ids=ids,
strategy="SAFE",
max_matches=max(1, count),
min_confidence=78.0,
)
return coupon.get("bets", [])[: max(1, count)]
def get_daily_bankers(self, count: int = 3) -> List[Dict[str, Any]]:
"""
Identifies the safest, highest value bets for the next 24 hours.
"""
now_ms = int(time.time() * 1000)
horizon_ms = now_ms + (24 * 60 * 60 * 1000)
with psycopg2.connect(self.dsn) as conn:
with conn.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute("""
SELECT m.id, m.match_name, m.mst_utc
FROM matches m
WHERE m.mst_utc >= %s AND m.mst_utc <= %s
AND m.status = 'NS'
AND EXISTS (SELECT 1 FROM odd_categories oc WHERE oc.match_id = m.id)
ORDER BY m.mst_utc ASC
LIMIT 50
""", (now_ms, horizon_ms))
matches = cur.fetchall()
potential_bankers = []
print(f"🔍 Scanning {len(matches)} upcoming matches for Bankers...")
for match in matches:
try:
data = self._load_match_data(match['id'])
if data is None: continue
result = self.analyze_match(match['id'])
if result and 'main_pick' in result:
pick = result['main_pick']
conf = pick.get('calibrated_confidence', pick.get('confidence', 0))
odds = pick.get('odds', 0)
market = pick.get('market', '')
pick_name = pick.get('pick', '')
# Banker Criteria: High Confidence (>75%) AND Decent Odds (>1.30)
if conf >= 75.0 and odds >= 1.30:
score = conf * (odds - 1.0)
potential_bankers.append({
"match_id": match['id'],
"match_name": match['match_name'] or f"{data.home_team_name} vs {data.away_team_name}",
"league": data.league_name,
"pick": f"{market} - {pick_name}",
"confidence": conf,
"odds": odds,
"value_score": score
})
except Exception:
pass
potential_bankers.sort(key=lambda x: x['value_score'], reverse=True)
return potential_bankers[:count]
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,498 @@
"""Feature Builder Mixin — V25/V28 feature vector assembly.
Auto-extracted mixin module split from services/single_match_orchestrator.py.
All methods here are composed into SingleMatchOrchestrator via inheritance.
`self` attributes (self.dsn, self.enrichment, self.v25_predictor, etc.) are
initialised in the main __init__.
"""
from __future__ import annotations
import json
import re
import time
import math
import os
import pickle
from collections import defaultdict
from typing import Any, Dict, List, Optional, Set, Tuple, overload
import pandas as pd
import numpy as np
import psycopg2
from psycopg2.extras import RealDictCursor
from data.db import get_clean_dsn
from schemas.prediction import FullMatchPrediction
from schemas.match_data import MatchData
from models.v25_ensemble import V25Predictor, get_v25_predictor
try:
from models.v27_predictor import V27Predictor, compute_divergence, compute_value_edge
except ImportError:
class V27Predictor: # type: ignore[no-redef]
def __init__(self): self.models = {}
def load_models(self): return False
def predict_all(self, features): return {}
def compute_divergence(*args, **kwargs):
return {}
def compute_value_edge(*args, **kwargs):
return {}
from features.odds_band_analyzer import OddsBandAnalyzer
try:
from models.basketball_v25 import (
BasketballMatchPrediction,
get_basketball_v25_predictor,
)
except ImportError:
BasketballMatchPrediction = Any # type: ignore[misc]
def get_basketball_v25_predictor() -> Any:
raise ImportError("Basketball predictor is not available")
from core.engines.player_predictor import PlayerPrediction, get_player_predictor
from features.upset_engine import get_upset_engine
from services.feature_enrichment import FeatureEnrichmentService
from services.betting_brain import BettingBrain
from services.v26_shadow_engine import V26ShadowEngine, get_v26_shadow_engine
from services.match_commentary import generate_match_commentary
from utils.top_leagues import load_top_league_ids
from utils.league_reliability import load_league_reliability
from config.config_loader import build_threshold_dict, get_threshold_default
from models.calibration import get_calibrator
class FeatureBuilderMixin:
def _build_v25_features(self, data: MatchData) -> Dict[str, float]:
"""
Build the single authoritative V25 pre-match feature vector.
"""
odds = self._sanitize_v25_odds(data.odds_data or {})
ms_h = float(odds.get('ms_h') or 0)
ms_d = float(odds.get('ms_d') or 0)
ms_a = float(odds.get('ms_a') or 0)
# Implied probabilities (vig-normalised)
implied_home, implied_draw, implied_away = 0.33, 0.33, 0.33
if ms_h > 0 and ms_d > 0 and ms_a > 0:
raw_sum = 1 / ms_h + 1 / ms_d + 1 / ms_a
implied_home = (1 / ms_h) / raw_sum
implied_draw = (1 / ms_d) / raw_sum
implied_away = (1 / ms_a) / raw_sum
upset_potential = max(
0.0,
min(
1.0,
1.0 - abs(implied_home - implied_away) + (implied_draw * 0.35),
),
)
# All enrichment queries in a single DB connection
home_elo, away_elo = 1500.0, 1500.0
home_venue_elo, away_venue_elo = 1500.0, 1500.0
home_form_elo_val, away_form_elo_val = 1500.0, 1500.0
enr = self.enrichment
# Defaults — overridden by successful queries
home_stats = dict(enr._DEFAULT_TEAM_STATS)
away_stats = dict(enr._DEFAULT_TEAM_STATS)
h2h = dict(enr._DEFAULT_H2H)
home_form = dict(enr._DEFAULT_FORM)
away_form = dict(enr._DEFAULT_FORM)
ref = dict(enr._DEFAULT_REFEREE)
league = dict(enr._DEFAULT_LEAGUE)
home_momentum, away_momentum = 0.0, 0.0
home_rolling = dict(enr._DEFAULT_ROLLING)
away_rolling = dict(enr._DEFAULT_ROLLING)
home_venue = dict(enr._DEFAULT_VENUE)
away_venue = dict(enr._DEFAULT_VENUE)
home_rest, away_rest = 7.0, 7.0
odds_band_features = {}
enrichment_failures = []
try:
with psycopg2.connect(self.dsn) as conn:
with conn.cursor(cursor_factory=RealDictCursor) as cur:
# ELO
try:
cur.execute(
"SELECT home_elo, away_elo, "
" home_home_elo, away_away_elo, "
" home_form_elo, away_form_elo "
"FROM football_ai_features "
"WHERE match_id = %s LIMIT 1",
(data.match_id,),
)
elo_row = cur.fetchone()
if elo_row:
home_elo = float(elo_row.get('home_elo') or 1500.0)
away_elo = float(elo_row.get('away_elo') or 1500.0)
home_venue_elo = float(elo_row.get('home_home_elo') or home_elo)
away_venue_elo = float(elo_row.get('away_away_elo') or away_elo)
home_form_elo_val = float(elo_row.get('home_form_elo') or home_elo)
away_form_elo_val = float(elo_row.get('away_form_elo') or away_elo)
else:
cur.execute(
"SELECT team_id, overall_elo, home_elo, away_elo, form_elo "
"FROM team_elo_ratings WHERE team_id IN (%s, %s)",
(data.home_team_id, data.away_team_id),
)
by_team = {str(r.get("team_id")): r for r in cur.fetchall()}
home_row = by_team.get(str(data.home_team_id))
away_row = by_team.get(str(data.away_team_id))
if home_row:
home_elo = float(home_row.get("overall_elo") or 1500.0)
home_venue_elo = float(home_row.get("home_elo") or home_elo)
home_form_elo_val = float(home_row.get("form_elo") or home_elo)
if away_row:
away_elo = float(away_row.get("overall_elo") or 1500.0)
away_venue_elo = float(away_row.get("away_elo") or away_elo)
away_form_elo_val = float(away_row.get("form_elo") or away_elo)
setattr(data, "feature_source", "football_ai_features" if elo_row else "live_prematch_enrichment")
# Staleness check: both teams at exact 1500 → ELO was never computed
if home_elo == 1500.0 and away_elo == 1500.0:
enrichment_failures.append("elo_stale:both_teams_at_default_1500")
except Exception as e:
enrichment_failures.append(f"elo:{e}")
setattr(data, "feature_source", "fallback_defaults")
# Team stats
try:
home_stats = enr.compute_team_stats(cur, data.home_team_id, data.match_date_ms)
away_stats = enr.compute_team_stats(cur, data.away_team_id, data.match_date_ms)
except Exception as e:
enrichment_failures.append(f"team_stats:{e}")
# H2H
try:
h2h = enr.compute_h2h(cur, data.home_team_id, data.away_team_id, data.match_date_ms)
except Exception as e:
enrichment_failures.append(f"h2h:{e}")
# Form
try:
home_form = enr.compute_form_streaks(cur, data.home_team_id, data.match_date_ms)
away_form = enr.compute_form_streaks(cur, data.away_team_id, data.match_date_ms)
except Exception as e:
enrichment_failures.append(f"form:{e}")
# Referee
try:
ref = enr.compute_referee_stats(cur, data.referee_name, data.match_date_ms)
except Exception as e:
enrichment_failures.append(f"referee:{e}")
# League
try:
league = enr.compute_league_averages(cur, data.league_id, data.match_date_ms)
except Exception as e:
enrichment_failures.append(f"league:{e}")
# Momentum
try:
home_momentum = enr.compute_momentum(cur, data.home_team_id, data.match_date_ms)
away_momentum = enr.compute_momentum(cur, data.away_team_id, data.match_date_ms)
except Exception as e:
enrichment_failures.append(f"momentum:{e}")
# V27 Rolling + Venue + Rest
try:
home_rolling = enr.compute_rolling_stats(cur, data.home_team_id, data.match_date_ms)
away_rolling = enr.compute_rolling_stats(cur, data.away_team_id, data.match_date_ms)
home_venue = enr.compute_venue_stats(cur, data.home_team_id, data.match_date_ms, is_home=True)
away_venue = enr.compute_venue_stats(cur, data.away_team_id, data.match_date_ms, is_home=False)
home_rest = enr.compute_days_rest(cur, data.home_team_id, data.match_date_ms)
away_rest = enr.compute_days_rest(cur, data.away_team_id, data.match_date_ms)
except Exception as e:
enrichment_failures.append(f"rolling/venue:{e}")
# V28 Odds-Band
try:
odds_band_features = self.odds_band_analyzer.compute_all(
cur=cur,
home_team_id=data.home_team_id,
away_team_id=data.away_team_id,
league_id=data.league_id,
odds=odds,
before_ts=data.match_date_ms,
referee_name=data.referee_name,
)
except Exception as e:
enrichment_failures.append(f"odds_band:{e}")
except Exception as e:
enrichment_failures.append(f"db_connection:{e}")
setattr(data, "feature_source", "fallback_defaults")
setattr(data, "odds_band_features", odds_band_features)
if enrichment_failures:
print(f"⚠️ Enrichment partial failures for {data.match_id}: {', '.join(enrichment_failures)}")
# Upset engine features
upset_atmosphere, upset_motivation, upset_fatigue = 0.0, 0.0, 0.0
try:
upset_engine = get_upset_engine()
upset_feats = upset_engine.get_features(
home_team_name=getattr(data, 'home_team_name', '') or '',
home_team_id=data.home_team_id,
away_team_name=getattr(data, 'away_team_name', '') or '',
league_name=getattr(data, 'league_name', '') or '',
home_position=10,
away_position=10,
match_date_ms=data.match_date_ms,
home_days_rest=int(home_rest),
away_days_rest=int(away_rest),
)
upset_atmosphere = upset_feats.get('upset_atmosphere', 0.0)
upset_motivation = upset_feats.get('upset_motivation', 0.0)
upset_fatigue = upset_feats.get('upset_fatigue', 0.0)
except Exception as e:
print(f"⚠️ Upset engine failed: {e}")
odds_presence = {
'odds_ms_h_present': 1.0 if ms_h > 1.01 else 0.0,
'odds_ms_d_present': 1.0 if ms_d > 1.01 else 0.0,
'odds_ms_a_present': 1.0 if ms_a > 1.01 else 0.0,
'odds_ht_ms_h_present': 1.0 if float(odds.get('ht_h') or 0) > 1.01 else 0.0,
'odds_ht_ms_d_present': 1.0 if float(odds.get('ht_d') or 0) > 1.01 else 0.0,
'odds_ht_ms_a_present': 1.0 if float(odds.get('ht_a') or 0) > 1.01 else 0.0,
'odds_ou05_o_present': 1.0 if float(odds.get('ou05_o') or 0) > 1.01 else 0.0,
'odds_ou05_u_present': 1.0 if float(odds.get('ou05_u') or 0) > 1.01 else 0.0,
'odds_ou15_o_present': 1.0 if float(odds.get('ou15_o') or 0) > 1.01 else 0.0,
'odds_ou15_u_present': 1.0 if float(odds.get('ou15_u') or 0) > 1.01 else 0.0,
'odds_ou25_o_present': 1.0 if float(odds.get('ou25_o') or 0) > 1.01 else 0.0,
'odds_ou25_u_present': 1.0 if float(odds.get('ou25_u') or 0) > 1.01 else 0.0,
'odds_ou35_o_present': 1.0 if float(odds.get('ou35_o') or 0) > 1.01 else 0.0,
'odds_ou35_u_present': 1.0 if float(odds.get('ou35_u') or 0) > 1.01 else 0.0,
'odds_ht_ou05_o_present': 1.0 if float(odds.get('ht_ou05_o') or 0) > 1.01 else 0.0,
'odds_ht_ou05_u_present': 1.0 if float(odds.get('ht_ou05_u') or 0) > 1.01 else 0.0,
'odds_ht_ou15_o_present': 1.0 if float(odds.get('ht_ou15_o') or 0) > 1.01 else 0.0,
'odds_ht_ou15_u_present': 1.0 if float(odds.get('ht_ou15_u') or 0) > 1.01 else 0.0,
'odds_btts_y_present': 1.0 if float(odds.get('btts_y') or 0) > 1.01 else 0.0,
'odds_btts_n_present': 1.0 if float(odds.get('btts_n') or 0) > 1.01 else 0.0,
}
# ── Calendar features (V27) ──
import datetime
match_dt = datetime.datetime.utcfromtimestamp(data.match_date_ms / 1000)
match_month = match_dt.month
is_season_start = 1.0 if match_month in (7, 8, 9) else 0.0
is_season_end = 1.0 if match_month in (5, 6) else 0.0
# ── Cup game detection: dampen home advantage in feature space ──
_league_name = (getattr(data, 'league_name', '') or '').lower()
_cup_keywords = ("kupa", "cup", "coupe", "copa", "coppa", "pokal",
"trophy", "shield", "ziraat", "süper kupa", "super cup")
_is_cup = any(kw in _league_name for kw in _cup_keywords)
# ── Derived / Interaction features (V27) ──
# Cup games: home ELO advantage is ~30% weaker (rotation, lower motivation)
elo_diff = (home_elo - away_elo) * (0.70 if _is_cup else 1.0)
form_elo_diff = home_form_elo_val - away_form_elo_val
attack_vs_defense_home = data.home_goals_avg - data.away_conceded_avg
attack_vs_defense_away = data.away_goals_avg - data.home_conceded_avg
xga_home = data.home_conceded_avg
xga_away = data.away_conceded_avg
xg_diff = xga_home - xga_away
mom_diff = home_momentum - away_momentum
form_momentum_interaction = mom_diff * form_elo_diff / 1000.0
elo_form_consistency = 1.0 - abs(elo_diff - form_elo_diff) / max(abs(elo_diff), 100.0)
upset_x_elo_gap = upset_potential * abs(elo_diff) / 500.0
return {
# META (1)
'mst_utc': float(data.match_date_ms),
# ELO (8)
'home_overall_elo': home_elo,
'away_overall_elo': away_elo,
'elo_diff': elo_diff,
'home_home_elo': home_venue_elo,
'away_away_elo': away_venue_elo,
'home_form_elo': home_form_elo_val,
'away_form_elo': away_form_elo_val,
'form_elo_diff': form_elo_diff,
# Form (12)
'home_goals_avg': data.home_goals_avg,
'home_conceded_avg': data.home_conceded_avg,
'away_goals_avg': data.away_goals_avg,
'away_conceded_avg': data.away_conceded_avg,
'home_clean_sheet_rate': home_form['clean_sheet_rate'],
'away_clean_sheet_rate': away_form['clean_sheet_rate'],
'home_scoring_rate': home_form['scoring_rate'],
'away_scoring_rate': away_form['scoring_rate'],
'home_winning_streak': home_form['winning_streak'],
'away_winning_streak': away_form['winning_streak'],
'home_unbeaten_streak': home_form['unbeaten_streak'],
'away_unbeaten_streak': away_form['unbeaten_streak'],
# H2H (10 — original 6 + V27 expanded 4)
'h2h_total_matches': h2h['total_matches'],
'h2h_home_win_rate': h2h['home_win_rate'],
'h2h_draw_rate': h2h['draw_rate'],
'h2h_avg_goals': h2h['avg_goals'],
'h2h_btts_rate': h2h['btts_rate'],
'h2h_over25_rate': h2h['over25_rate'],
'h2h_home_goals_avg': h2h['home_goals_avg'],
'h2h_away_goals_avg': h2h['away_goals_avg'],
'h2h_recent_trend': h2h['recent_trend'],
'h2h_venue_advantage': h2h['venue_advantage'],
# Stats (8)
'home_avg_possession': home_stats['avg_possession'],
'away_avg_possession': away_stats['avg_possession'],
'home_avg_shots_on_target': home_stats['avg_shots_on_target'],
'away_avg_shots_on_target': away_stats['avg_shots_on_target'],
'home_shot_conversion': home_stats['shot_conversion'],
'away_shot_conversion': away_stats['shot_conversion'],
'home_avg_corners': home_stats['avg_corners'],
'away_avg_corners': away_stats['avg_corners'],
# Odds (24)
'odds_ms_h': ms_h,
'odds_ms_d': ms_d,
'odds_ms_a': ms_a,
'implied_home': implied_home,
'implied_draw': implied_draw,
'implied_away': implied_away,
'odds_ht_ms_h': float(odds.get('ht_h') or 0),
'odds_ht_ms_d': float(odds.get('ht_d') or 0),
'odds_ht_ms_a': float(odds.get('ht_a') or 0),
'odds_ou05_o': float(odds.get('ou05_o') or 0),
'odds_ou05_u': float(odds.get('ou05_u') or 0),
'odds_ou15_o': float(odds.get('ou15_o') or 0),
'odds_ou15_u': float(odds.get('ou15_u') or 0),
'odds_ou25_o': float(odds.get('ou25_o') or 0),
'odds_ou25_u': float(odds.get('ou25_u') or 0),
'odds_ou35_o': float(odds.get('ou35_o') or 0),
'odds_ou35_u': float(odds.get('ou35_u') or 0),
'odds_ht_ou05_o': float(odds.get('ht_ou05_o') or 0),
'odds_ht_ou05_u': float(odds.get('ht_ou05_u') or 0),
'odds_ht_ou15_o': float(odds.get('ht_ou15_o') or 0),
'odds_ht_ou15_u': float(odds.get('ht_ou15_u') or 0),
'odds_btts_y': float(odds.get('btts_y') or 0),
'odds_btts_n': float(odds.get('btts_n') or 0),
**odds_presence,
# League (9 — original 2 + V27 expanded 5 + xga 2)
'home_xga': xga_home,
'away_xga': xga_away,
'league_avg_goals': league['avg_goals'],
'league_zero_goal_rate': league['zero_goal_rate'],
'league_home_win_rate': league['home_win_rate'],
'league_draw_rate': league['draw_rate'],
'league_btts_rate': league['btts_rate'],
'league_ou25_rate': league['ou25_rate'],
'league_reliability_score': league['reliability_score'],
# Upset (4)
'upset_atmosphere': upset_atmosphere,
'upset_motivation': upset_motivation,
'upset_fatigue': upset_fatigue,
'upset_potential': upset_potential,
# Referee (5)
'referee_home_bias': ref['home_bias'],
'referee_avg_goals': ref['avg_goals'],
'referee_cards_total': ref['cards_total'],
'referee_avg_yellow': ref['avg_yellow'],
'referee_experience': ref['experience'],
# Momentum (3)
'home_momentum_score': home_momentum,
'away_momentum_score': away_momentum,
'momentum_diff': mom_diff,
# ── V27 Rolling Stats (13) ──
'home_rolling5_goals': home_rolling['rolling5_goals'],
'home_rolling5_conceded': home_rolling['rolling5_conceded'],
'home_rolling10_goals': home_rolling['rolling10_goals'],
'home_rolling10_conceded': home_rolling['rolling10_conceded'],
'home_rolling20_goals': home_rolling['rolling20_goals'],
'home_rolling20_conceded': home_rolling['rolling20_conceded'],
'away_rolling5_goals': away_rolling['rolling5_goals'],
'away_rolling5_conceded': away_rolling['rolling5_conceded'],
'away_rolling10_goals': away_rolling['rolling10_goals'],
'away_rolling10_conceded': away_rolling['rolling10_conceded'],
'home_rolling5_cs': home_rolling['rolling5_cs'],
'away_rolling5_cs': away_rolling['rolling5_cs'],
# ── V27 Venue Stats (4) ──
'home_venue_goals': home_venue['venue_goals'],
'home_venue_conceded': home_venue['venue_conceded'],
'away_venue_goals': away_venue['venue_goals'],
'away_venue_conceded': away_venue['venue_conceded'],
# ── V27 Goal Trend (2) ──
'home_goal_trend': home_rolling['rolling5_goals'] - home_rolling['rolling10_goals'],
'away_goal_trend': away_rolling['rolling5_goals'] - away_rolling['rolling10_goals'],
# ── V27 Calendar (4) ──
'home_days_rest': home_rest,
'away_days_rest': away_rest,
'match_month': float(match_month),
'is_season_start': is_season_start,
'is_season_end': is_season_end,
# ── V27 Interaction (6) ──
'attack_vs_defense_home': attack_vs_defense_home,
'attack_vs_defense_away': attack_vs_defense_away,
'xg_diff': xg_diff,
'form_momentum_interaction': form_momentum_interaction,
'elo_form_consistency': elo_form_consistency,
'upset_x_elo_gap': upset_x_elo_gap,
# Squad Features (9) — PlayerPredictorEngine
**self._get_squad_features(data),
# V28 Odds-Band Historical Performance Features
**odds_band_features,
}
def _get_squad_features(self, data: MatchData) -> Dict[str, float]:
"""Non-fatal squad analysis with 12 player-level features."""
defaults = {
'home_squad_quality': 12.0, 'away_squad_quality': 12.0, 'squad_diff': 0.0,
'home_key_players': 3.0, 'away_key_players': 3.0,
'home_missing_impact': 0.0, 'away_missing_impact': 0.0,
'home_goals_form': 1.3, 'away_goals_form': 1.3,
'home_lineup_goals_per90': 0.0, 'away_lineup_goals_per90': 0.0,
'home_lineup_assists_per90': 0.0, 'away_lineup_assists_per90': 0.0,
'home_squad_continuity': 0.5, 'away_squad_continuity': 0.5,
'home_top_scorer_form': 0.0, 'away_top_scorer_form': 0.0,
'home_avg_player_exp': 0.0, 'away_avg_player_exp': 0.0,
'home_goals_diversity': 0.0, 'away_goals_diversity': 0.0,
}
try:
engine = get_player_predictor()
pred = engine.predict(
match_id=data.match_id,
home_team_id=data.home_team_id,
away_team_id=data.away_team_id,
home_lineup=data.home_lineup,
away_lineup=data.away_lineup,
sidelined_data=data.sidelined_data,
)
result = {
'home_squad_quality': float(pred.home_squad_quality or 0.0),
'away_squad_quality': float(pred.away_squad_quality or 0.0),
'squad_diff': float(pred.squad_diff or 0.0),
'home_key_players': float(pred.home_key_players or 0),
'away_key_players': float(pred.away_key_players or 0),
'home_missing_impact': float(pred.home_missing_impact or 0.0),
'away_missing_impact': float(pred.away_missing_impact or 0.0),
'home_goals_form': float(pred.home_goals_form or 0.0),
'away_goals_form': float(pred.away_goals_form or 0.0),
'home_lineup_goals_per90': float(pred.home_lineup_goals_per90 or 0.0),
'away_lineup_goals_per90': float(pred.away_lineup_goals_per90 or 0.0),
'home_lineup_assists_per90': float(pred.home_lineup_assists_per90 or 0.0),
'away_lineup_assists_per90': float(pred.away_lineup_assists_per90 or 0.0),
'home_squad_continuity': float(pred.home_squad_continuity or 0.5),
'away_squad_continuity': float(pred.away_squad_continuity or 0.5),
'home_top_scorer_form': float(pred.home_top_scorer_form or 0),
'away_top_scorer_form': float(pred.away_top_scorer_form or 0),
'home_avg_player_exp': float(pred.home_avg_player_exp or 0.0),
'away_avg_player_exp': float(pred.away_avg_player_exp or 0.0),
'home_goals_diversity': float(pred.home_goals_diversity or 0.0),
'away_goals_diversity': float(pred.away_goals_diversity or 0.0),
}
for side in ('home', 'away'):
sq = result[f'{side}_squad_quality']
if sq > 50 or sq < 0:
print(f"🚨 SCALE MISMATCH: {side}_squad_quality={sq:.1f} "
f"(expected 3-36). Check player_predictor formula!")
return result
except Exception as e:
print(f"⚠️ Squad features failed: {e}")
return defaults
def _sanitize_v25_odds(self, odds_data: Dict[str, Any]) -> Dict[str, float]:
sanitized: Dict[str, float] = {}
for key in self.V25_ODDS_FEATURE_KEYS:
sanitized[key] = self._real_market_odds(odds_data, key)
for key in ("dc_1x", "dc_x2", "dc_12", "oe_odd", "oe_even", "cards_o", "cards_u", "hcap_h", "hcap_d", "hcap_a"):
if key in odds_data:
sanitized[key] = self._real_market_odds(odds_data, key)
return sanitized
+231
View File
@@ -0,0 +1,231 @@
"""HT/MS Mixin — analyze_match_htms endpoint and helpers.
Auto-extracted mixin module split from services/single_match_orchestrator.py.
All methods here are composed into SingleMatchOrchestrator via inheritance.
`self` attributes (self.dsn, self.enrichment, self.v25_predictor, etc.) are
initialised in the main __init__.
"""
from __future__ import annotations
import json
import re
import time
import math
import os
import pickle
from collections import defaultdict
from typing import Any, Dict, List, Optional, Set, Tuple, overload
import pandas as pd
import numpy as np
import psycopg2
from psycopg2.extras import RealDictCursor
from data.db import get_clean_dsn
from schemas.prediction import FullMatchPrediction
from schemas.match_data import MatchData
from models.v25_ensemble import V25Predictor, get_v25_predictor
try:
from models.v27_predictor import V27Predictor, compute_divergence, compute_value_edge
except ImportError:
class V27Predictor: # type: ignore[no-redef]
def __init__(self): self.models = {}
def load_models(self): return False
def predict_all(self, features): return {}
def compute_divergence(*args, **kwargs):
return {}
def compute_value_edge(*args, **kwargs):
return {}
from features.odds_band_analyzer import OddsBandAnalyzer
try:
from models.basketball_v25 import (
BasketballMatchPrediction,
get_basketball_v25_predictor,
)
except ImportError:
BasketballMatchPrediction = Any # type: ignore[misc]
def get_basketball_v25_predictor() -> Any:
raise ImportError("Basketball predictor is not available")
from core.engines.player_predictor import PlayerPrediction, get_player_predictor
from services.feature_enrichment import FeatureEnrichmentService
from services.betting_brain import BettingBrain
from services.v26_shadow_engine import V26ShadowEngine, get_v26_shadow_engine
from services.match_commentary import generate_match_commentary
from utils.top_leagues import load_top_league_ids
from utils.league_reliability import load_league_reliability
from config.config_loader import build_threshold_dict, get_threshold_default
from models.calibration import get_calibrator
class HtmsMixin:
def analyze_match_htms(self, match_id: str) -> Optional[Dict[str, Any]]:
"""
HT/MS focused response for upset-hunting workflows.
This endpoint is intentionally additive and does not mutate the
standard /v20plus/analyze package contract.
"""
data = self._load_match_data(match_id)
if data is None:
return None
if str(data.sport or "").lower() != "football":
return {
"status": "skip",
"match_id": match_id,
"reason": "unsupported_sport",
"engine_used": "htms_router",
}
is_top_league = self._is_top_league(data.league_id)
engine_used = "v20plus_top_htms"
# Hard gate: HT/MS upset model is trained on top leagues only.
if not is_top_league:
return {
"status": "skip",
"match_id": match_id,
"reason": "out_of_training_scope",
"engine_used": engine_used,
"data_quality": {
"label": "LOW",
"flags": ["league_out_of_scope"],
},
}
missing_requirements = self._missing_htms_requirements(data)
if missing_requirements:
return {
"status": "skip",
"match_id": match_id,
"reason": "missing_critical_data",
"missing": missing_requirements,
"engine_used": engine_used,
"data_quality": {
"label": "LOW",
"flags": [f"missing_{item}" for item in missing_requirements],
},
}
base_package = self.analyze_match(match_id)
if not base_package:
return None
data_quality = base_package.get("data_quality", {})
market_board = base_package.get("market_board", {})
ms_market = market_board.get("MS", {})
ht_market = market_board.get("HT", {})
htft_probs = market_board.get("HTFT", {}).get("probs", {})
reversal_probs = {
"1/2": float(htft_probs.get("1/2", 0.0)),
"2/1": float(htft_probs.get("2/1", 0.0)),
"X/1": float(htft_probs.get("X/1", 0.0)),
"X/2": float(htft_probs.get("X/2", 0.0)),
}
top_reversal = max(reversal_probs.items(), key=lambda item: item[1])
ms_conf = float(ms_market.get("confidence", 0.0))
ht_conf = float(ht_market.get("confidence", 0.0))
base_conf = (ms_conf + ht_conf) / 2.0
confidence_cap = 100.0
penalties: List[str] = []
if data.lineup_source == "probable_xi":
confidence_cap = min(confidence_cap, 72.0)
penalties.append("lineup_probable_xi")
if data.lineup_source == "none":
confidence_cap = min(confidence_cap, 58.0)
penalties.append("lineup_unavailable")
if str(data_quality.get("label", "LOW")).upper() == "LOW":
confidence_cap = min(confidence_cap, 55.0)
penalties.append("low_data_quality")
final_conf = min(base_conf, confidence_cap)
upset_score = self._compute_htms_upset_score(
reversal_probs=reversal_probs,
odds_data=data.odds_data,
is_top_league=is_top_league,
)
upset_threshold = 58.0 if is_top_league else 54.0
upset_playable = (
upset_score >= upset_threshold
and top_reversal[1] >= 0.045
and final_conf >= 45.0
and "low_data_quality" not in penalties
)
return {
"status": "ok",
"engine_used": engine_used,
"match_info": base_package.get("match_info", {}),
"data_quality": data_quality,
"htms_core": {
"ms_pick": ms_market.get("pick"),
"ms_confidence": round(ms_conf, 1),
"ht_pick": ht_market.get("pick"),
"ht_confidence": round(ht_conf, 1),
"combined_confidence": round(final_conf, 1),
"confidence_cap": round(confidence_cap, 1),
"penalties": penalties,
},
"surprise_hunter": {
"upset_score": round(upset_score, 1),
"threshold": upset_threshold,
"playable": upset_playable,
"top_reversal_pick": top_reversal[0],
"top_reversal_prob": round(top_reversal[1], 4),
"reversal_probs": {
key: round(value, 4) for key, value in reversal_probs.items()
},
},
"risk": base_package.get("risk", {}),
"reasoning_factors": base_package.get("reasoning_factors", []),
}
def _is_top_league(self, league_id: Optional[str]) -> bool:
if not league_id:
return False
return str(league_id) in self.top_league_ids
def _missing_htms_requirements(self, data: MatchData) -> List[str]:
missing: List[str] = []
ms_keys = ("ms_h", "ms_d", "ms_a")
ht_keys = ("ht_h", "ht_d", "ht_a")
if not all(float(data.odds_data.get(k, 0.0) or 0.0) > 1.0 for k in ms_keys):
missing.append("ms_odds")
if not all(float(data.odds_data.get(k, 0.0) or 0.0) > 1.0 for k in ht_keys):
missing.append("ht_odds")
return missing
def _compute_htms_upset_score(
self,
reversal_probs: Dict[str, float],
odds_data: Dict[str, float],
is_top_league: bool,
) -> float:
ms_h = self._to_float(odds_data.get("ms_h"), 0.0)
ms_a = self._to_float(odds_data.get("ms_a"), 0.0)
if ms_h <= 1.0 or ms_a <= 1.0:
favorite_gap = 0.0
else:
favorite_gap = abs(ms_h - ms_a)
reversal_max = max(reversal_probs.values()) if reversal_probs else 0.0
reversal_sum = sum(reversal_probs.values())
# Strong favorite + reversal probability is the core upset signal.
gap_factor = min(1.0, favorite_gap / 2.0)
score = (
(reversal_max * 100.0 * 0.60)
+ (reversal_sum * 100.0 * 0.25)
+ (gap_factor * 100.0 * 0.15)
)
if not is_top_league:
# Non-top leagues are noisier; keep it slightly conservative.
score *= 0.92
return max(0.0, min(100.0, score))
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,662 @@
"""Prediction Mixin — V25 signal extraction and prediction building.
Auto-extracted mixin module split from services/single_match_orchestrator.py.
All methods here are composed into SingleMatchOrchestrator via inheritance.
`self` attributes (self.dsn, self.enrichment, self.v25_predictor, etc.) are
initialised in the main __init__.
"""
from __future__ import annotations
import json
import re
import time
import math
import os
import pickle
from collections import defaultdict
from typing import Any, Dict, List, Optional, Set, Tuple, overload
import pandas as pd
import numpy as np
import psycopg2
from psycopg2.extras import RealDictCursor
from data.db import get_clean_dsn
from schemas.prediction import FullMatchPrediction
from schemas.match_data import MatchData
from models.v25_ensemble import V25Predictor, get_v25_predictor
try:
from models.v27_predictor import V27Predictor, compute_divergence, compute_value_edge
except ImportError:
class V27Predictor: # type: ignore[no-redef]
def __init__(self): self.models = {}
def load_models(self): return False
def predict_all(self, features): return {}
def compute_divergence(*args, **kwargs):
return {}
def compute_value_edge(*args, **kwargs):
return {}
from features.odds_band_analyzer import OddsBandAnalyzer
try:
from models.basketball_v25 import (
BasketballMatchPrediction,
get_basketball_v25_predictor,
)
except ImportError:
BasketballMatchPrediction = Any # type: ignore[misc]
def get_basketball_v25_predictor() -> Any:
raise ImportError("Basketball predictor is not available")
from core.engines.player_predictor import PlayerPrediction, get_player_predictor
from services.feature_enrichment import FeatureEnrichmentService
from services.betting_brain import BettingBrain
from services.v26_shadow_engine import V26ShadowEngine, get_v26_shadow_engine
from services.match_commentary import generate_match_commentary
from utils.top_leagues import load_top_league_ids
from utils.league_reliability import load_league_reliability
from config.config_loader import build_threshold_dict, get_threshold_default, get_config
from models.calibration import get_calibrator
from models.league_model import get_league_model_loader, FILE_TO_SIGNAL
class PredictionMixin:
def _get_score_model(self) -> Optional[Dict]:
"""Load XGBoost score prediction model (non-fatal)."""
if hasattr(self, "_score_model_cache"):
return self._score_model_cache
score_model_path = os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
"models", "xgb_score.pkl",
)
try:
if os.path.exists(score_model_path):
with open(score_model_path, "rb") as f:
model_data = pickle.load(f)
if all(k in model_data for k in ("home_model", "away_model", "ht_home_model", "ht_away_model", "features")):
self._score_model_cache = model_data
print(f"[SCORE] ✅ Score model loaded ({len(model_data['features'])} features)")
return self._score_model_cache
except Exception as e:
print(f"[SCORE] ⚠ Load failed (non-fatal, using heuristic): {e}")
self._score_model_cache = None
return None
def _predict_score_with_model(self, features: Dict[str, float]) -> Optional[Dict[str, float]]:
"""Predict FT/HT scores using XGBoost score model."""
score_model = self._get_score_model()
if score_model is None:
return None
try:
import pandas as _pd
model_features = score_model["features"]
row = {f: float(features.get(f, 0)) for f in model_features}
df = _pd.DataFrame([row])
ft_home = max(0.0, float(score_model["home_model"].predict(df)[0]))
ft_away = max(0.0, float(score_model["away_model"].predict(df)[0]))
ht_home = max(0.0, float(score_model["ht_home_model"].predict(df)[0]))
ht_away = max(0.0, float(score_model["ht_away_model"].predict(df)[0]))
return {
"ft_home": round(ft_home, 2),
"ft_away": round(ft_away, 2),
"ht_home": round(ht_home, 2),
"ht_away": round(ht_away, 2),
}
except Exception as e:
print(f"[SCORE] ⚠ Prediction error (fallback to heuristic): {e}")
return None
_V25_KEY_MAP = {
"ms": "MS",
"ou15": "OU15",
"ou25": "OU25",
"ou35": "OU35",
"btts": "BTTS",
"ht_result": "HT",
"ht_ou05": "HT_OU05",
"ht_ou15": "HT_OU15",
"htft": "HTFT",
"cards_ou45": "CARDS",
"handicap_ms": "HCAP",
"odd_even": "OE",
}
def _get_v25_signal(
self,
data: MatchData,
features: Optional[Dict[str, float]] = None,
) -> Dict[str, Any]:
"""
Get V25 ensemble predictions for all available markets.
Returns a dict keyed by UPPERCASE market name (MS, OU25, BTTS, etc.)
each with a 'probs' sub-dict that _prob_map can consume.
CRITICAL: Keys MUST be uppercase to match _build_v25_prediction lookups.
"""
v25 = self._get_v25_predictor()
feature_row = features or self._build_v25_features(data)
signal: Dict[str, Any] = {}
# ── League-specific model override ─────────────────────────────────
league_id = getattr(data, "league_id", None)
league_model = None
if league_id:
try:
league_model = get_league_model_loader().get(league_id)
except Exception:
league_model = None
if league_model:
# Predict all available markets with league-specific XGBoost
for mkey, sig_key in FILE_TO_SIGNAL.items():
probs = league_model.predict_market(mkey, feature_row)
if probs:
best_label = max(probs, key=probs.__getitem__)
signal[sig_key] = {
"probs": probs,
"raw_probs": probs,
"pick": best_label,
"probability": float(probs[best_label]),
"confidence": round(float(probs[best_label]) * 100.0, 1),
"source": "league_specific",
}
if signal:
print(f" [LEAGUE-MODEL] {league_id}: {len(signal)} markets predicted")
# Fill remaining markets from general V25 (markets not in league model)
# fall through to general prediction below for missing ones
def _temperature_scale(probs_dict: Dict[str, float], temperature: float = 1.5) -> Dict[str, float]:
"""
Apply temperature scaling to soften overconfident model outputs.
LightGBM often produces extreme probabilities (e.g., 0.999 / 0.001).
Temperature scaling converts to log-odds, divides by T, then re-normalizes.
T=1.0 no change, T>1 softer probabilities.
Standard approach for post-hoc model calibration (Guo et al., 2017).
V34: Reduced from 2.5 to 1.5 V25 model is already calibrated via
odds-aware training. Excessive flattening was destroying signal.
"""
import math
eps = 1e-7 # numerical stability
n = len(probs_dict)
# V34: Reduced temperature — odds-aware model is already calibrated
# Binary markets (2-class) tend to be more overconfident in LGB
if n <= 2:
T = max(temperature, 1.5) # was 2.0
elif n == 3:
T = max(temperature * 0.8, 1.2) # was 1.5 — 3-way slightly less aggressive
else:
T = max(temperature * 0.6, 1.0) # was 1.3 — 9-way (HTFT) already spread
# Convert to log-odds and apply temperature
labels = list(probs_dict.keys())
log_odds = []
for label in labels:
p = max(eps, min(1.0 - eps, float(probs_dict[label])))
log_odds.append(math.log(p) / T)
# Softmax re-normalization
max_lo = max(log_odds)
exp_vals = [math.exp(lo - max_lo) for lo in log_odds]
total = sum(exp_vals)
scaled = {}
for i, label in enumerate(labels):
scaled[label] = exp_vals[i] / total
return scaled
calibrator = get_calibrator()
_temperature = float(get_config().get('model_ensemble.temperature', 1.5))
# Map (market_key, label) → calibrator market key
_CAL_KEY_MAP: Dict[str, str] = {
"ms_1": "ms_home", "ms_x": "ms_draw", "ms_2": "ms_away",
"ou15_over": "ou15", "ou15_under": "ou15",
"ou25_over": "ou25", "ou25_under": "ou25",
"ou35_over": "ou35", "ou35_under": "ou35",
"btts_yes": "btts", "btts_no": "btts",
"ht_1": "ht_home", "ht_x": "ht_draw", "ht_2": "ht_away",
}
def _enrich_signal_entry(probs_dict: Dict[str, float], market_key: str = "") -> Dict[str, Any]:
"""Temperature scaling + Isotonic calibration pipeline."""
scaled_probs = _temperature_scale(probs_dict, temperature=_temperature)
# Isotonic calibration per outcome (if trained models exist)
if market_key:
calibrated = {}
for label, prob in scaled_probs.items():
raw_key = f"{market_key}_{label}".lower().replace(" ", "_")
cal_key = _CAL_KEY_MAP.get(raw_key, raw_key)
calibrated[label] = calibrator.calibrate(cal_key, prob)
total = sum(calibrated.values())
if total > 0:
calibrated = {k: v / total for k, v in calibrated.items()}
scaled_probs = calibrated
best_label = max(scaled_probs, key=scaled_probs.__getitem__)
best_prob = float(scaled_probs[best_label])
return {
"probs": scaled_probs,
"raw_probs": probs_dict,
"pick": best_label,
"probability": best_prob,
"confidence": round(best_prob * 100.0, 1),
}
# Core markets using dedicated methods (skip if league model already covered them)
if "MS" not in signal:
h, d, a = v25.predict_ms(feature_row)
signal["MS"] = _enrich_signal_entry({"1": h, "X": d, "2": a}, "ms")
print(f" [V25-SIGNAL] MS → H={h:.4f} D={d:.4f} A={a:.4f}")
else:
print(f" [LEAGUE-MODEL] MS → {signal['MS']['probs']}")
if "OU25" not in signal:
over25, under25 = v25.predict_ou25(feature_row)
signal["OU25"] = _enrich_signal_entry({"Over": over25, "Under": under25}, "ou25")
print(f" [V25-SIGNAL] OU25 → O={over25:.4f} U={under25:.4f}")
if "BTTS" not in signal:
btts_y, btts_n = v25.predict_btts(feature_row)
signal["BTTS"] = _enrich_signal_entry({"Yes": btts_y, "No": btts_n}, "btts")
print(f" [V25-SIGNAL] BTTS → Y={btts_y:.4f} N={btts_n:.4f}")
# Additional markets via generic predict_market (skip if league model covered them)
for model_key, label_map in [
("ou15", {"Over": 0, "Under": None}),
("ou35", {"Over": 0, "Under": None}),
("ht_result", {"1": 0, "X": 1, "2": 2}),
("ht_ou05", {"Over": 0, "Under": None}),
("ht_ou15", {"Over": 0, "Under": None}),
("htft", None),
("cards_ou45", {"Over": 0, "Under": None}),
("handicap_ms", {"1": 0, "X": 1, "2": 2}),
("odd_even", {"Odd": 0, "Even": None}),
]:
out_key = str(self._V25_KEY_MAP.get(model_key, model_key.upper()))
if out_key in signal:
continue # already predicted by league-specific model
if not v25.has_market(model_key):
continue
raw = v25.predict_market(model_key, feature_row)
if raw is None:
continue
if label_map is None:
# HTFT — 9 combinations
htft_labels = ["1/1", "1/X", "1/2", "X/1", "X/X", "X/2", "2/1", "2/X", "2/2"]
probs_dict = {}
for i, label in enumerate(htft_labels):
probs_dict[label] = float(raw[i]) if i < len(raw) else 0.0
signal[out_key] = _enrich_signal_entry(probs_dict, model_key)
elif len(label_map) == 2:
# Binary market
labels = list(label_map.keys())
p = float(raw[0]) if len(raw) >= 1 else None
if p is None:
print(f" [V25-SIGNAL] {out_key} → EMPTY raw output, skipped")
continue
signal[out_key] = _enrich_signal_entry({labels[0]: p, labels[1]: 1.0 - p}, model_key)
elif len(label_map) == 3:
# 3-class market
labels = list(label_map.keys())
probs_dict = {}
for i, label in enumerate(labels):
if i >= len(raw):
print(f" [V25-SIGNAL] {out_key} → insufficient probabilities in raw output")
break
probs_dict[label] = float(raw[i])
else:
signal[out_key] = _enrich_signal_entry(probs_dict, model_key)
if out_key in signal:
print(f" [V25-SIGNAL] {out_key}{signal[out_key]['probs']}")
print(f" [V25-SIGNAL] Total markets with real predictions: {len(signal)}")
if not signal:
raise RuntimeError("V25 model produced ZERO market predictions — cannot continue")
return signal
@staticmethod
def _prob_map(signal: Optional[Dict[str, Any]], market: str, defaults: Dict[str, float]) -> Dict[str, float]:
"""Extract normalised probabilities from signal.
If the signal contains real model output for this market, use it.
If the market is missing from the signal, log a warning and return
the defaults as a LAST RESORT (so the pipeline doesn't crash).
The defaults are ONLY used for non-core / secondary markets that
may not have a trained model yet (e.g. CARDS, HCAP, OE).
"""
market_payload = signal.get(market, {}) if isinstance(signal, dict) else {}
probs = market_payload.get("probs", {}) if isinstance(market_payload, dict) else {}
if not isinstance(probs, dict) or not probs:
print(f" ⚠️ [PROB_MAP] Market '{market}' NOT found in V25 signal — model output missing")
return dict(defaults)
out = {key: float(probs.get(key, value)) for key, value in defaults.items()}
total = sum(out.values())
if total <= 0:
print(f" ⚠️ [PROB_MAP] Market '{market}' has zero total probability")
return dict(defaults)
return {key: value / total for key, value in out.items()}
@staticmethod
def _is_cup_game(league_name: str) -> bool:
"""Detect cup/knockout competitions where home advantage is significantly weaker."""
name = (league_name or "").lower()
cup_keywords = (
"kupa", "cup", "coupe", "copa", "coppa", "pokal",
"trophy", "shield", "challenge",
"ziraat", "süper kupa", "super cup",
)
return any(kw in name for kw in cup_keywords)
@staticmethod
def _best_prob_pick(prob_map: Dict[str, float]) -> Tuple[str, float]:
if not prob_map:
return "", 0.0
pick = max(prob_map, key=prob_map.__getitem__)
return pick, float(prob_map[pick])
@staticmethod
def _poisson_score_top5(home_xg: float, away_xg: float, max_goals: int = 5) -> List[Dict[str, Any]]:
def poisson_p(lmbda: float, k: int) -> float:
return math.exp(-lmbda) * (lmbda ** k) / math.factorial(k)
scores: List[Tuple[str, float]] = []
for home_goals in range(max_goals + 1):
for away_goals in range(max_goals + 1):
prob = poisson_p(home_xg, home_goals) * poisson_p(away_xg, away_goals)
scores.append((f"{home_goals}-{away_goals}", prob))
scores.sort(key=lambda item: item[1], reverse=True)
return [
{"score": score, "prob": round(prob, 4)}
for score, prob in scores[:5]
]
def _build_v25_prediction(
self,
data: MatchData,
features: Dict[str, float],
v25_signal: Dict[str, Any],
) -> FullMatchPrediction:
prediction = FullMatchPrediction(
match_id=data.match_id,
home_team=data.home_team_name,
away_team=data.away_team_name,
)
ms_probs = self._prob_map(v25_signal, "MS", {"1": 0.33, "X": 0.34, "2": 0.33})
ou15_probs = self._prob_map(v25_signal, "OU15", {"Under": 0.5, "Over": 0.5})
ou25_probs = self._prob_map(v25_signal, "OU25", {"Under": 0.5, "Over": 0.5})
ou35_probs = self._prob_map(v25_signal, "OU35", {"Under": 0.5, "Over": 0.5})
btts_probs = self._prob_map(v25_signal, "BTTS", {"No": 0.5, "Yes": 0.5})
ht_probs = self._prob_map(v25_signal, "HT", {"1": 0.33, "X": 0.34, "2": 0.33})
ht_ou05_probs = self._prob_map(v25_signal, "HT_OU05", {"Under": 0.5, "Over": 0.5})
ht_ou15_probs = self._prob_map(v25_signal, "HT_OU15", {"Under": 0.5, "Over": 0.5})
htft_probs = self._prob_map(
v25_signal,
"HTFT",
{"1/1": 1 / 9, "1/X": 1 / 9, "1/2": 1 / 9, "X/1": 1 / 9, "X/X": 1 / 9, "X/2": 1 / 9, "2/1": 1 / 9, "2/X": 1 / 9, "2/2": 1 / 9},
)
oe_probs = self._prob_map(v25_signal, "OE", {"Even": 0.5, "Odd": 0.5})
cards_probs = self._prob_map(v25_signal, "CARDS", {"Under": 0.5, "Over": 0.5})
hcap_probs = self._prob_map(v25_signal, "HCAP", {"1": 0.33, "X": 0.34, "2": 0.33})
# Cup game: dampen home advantage — model trained on league data overestimates home edge
is_cup = self._is_cup_game(getattr(data, "league_name", "") or "")
if is_cup:
# Shift 8% of home probability toward away and draw (rotation, neutral venue effect)
cup_transfer = ms_probs["1"] * 0.08
ms_probs = {
"1": ms_probs["1"] - cup_transfer,
"X": ms_probs["X"] + cup_transfer * 0.4,
"2": ms_probs["2"] + cup_transfer * 0.6,
}
total = sum(ms_probs.values())
ms_probs = {k: v / total for k, v in ms_probs.items()}
prediction.ms_home_prob = ms_probs["1"]
prediction.ms_draw_prob = ms_probs["X"]
prediction.ms_away_prob = ms_probs["2"]
prediction.ms_pick, ms_top = self._best_prob_pick(ms_probs)
prediction.ms_confidence = ms_top * 100.0
prediction.dc_1x_prob = prediction.ms_home_prob + prediction.ms_draw_prob
prediction.dc_x2_prob = prediction.ms_draw_prob + prediction.ms_away_prob
prediction.dc_12_prob = prediction.ms_home_prob + prediction.ms_away_prob
dc_probs = {"1X": prediction.dc_1x_prob, "X2": prediction.dc_x2_prob, "12": prediction.dc_12_prob}
prediction.dc_pick, dc_top = self._best_prob_pick(dc_probs)
prediction.dc_confidence = dc_top * 100.0
prediction.over_15_prob = ou15_probs["Over"]
prediction.under_15_prob = ou15_probs["Under"]
prediction.ou15_pick = "1.5 Üst" if prediction.over_15_prob >= prediction.under_15_prob else "1.5 Alt"
prediction.ou15_confidence = max(prediction.over_15_prob, prediction.under_15_prob) * 100.0
prediction.over_25_prob = ou25_probs["Over"]
prediction.under_25_prob = ou25_probs["Under"]
prediction.ou25_pick = "2.5 Üst" if prediction.over_25_prob >= prediction.under_25_prob else "2.5 Alt"
prediction.ou25_confidence = max(prediction.over_25_prob, prediction.under_25_prob) * 100.0
prediction.over_35_prob = ou35_probs["Over"]
prediction.under_35_prob = ou35_probs["Under"]
prediction.ou35_pick = "3.5 Üst" if prediction.over_35_prob >= prediction.under_35_prob else "3.5 Alt"
prediction.ou35_confidence = max(prediction.over_35_prob, prediction.under_35_prob) * 100.0
prediction.btts_yes_prob = btts_probs["Yes"]
prediction.btts_no_prob = btts_probs["No"]
prediction.btts_pick = "KG Var" if prediction.btts_yes_prob >= prediction.btts_no_prob else "KG Yok"
prediction.btts_confidence = max(prediction.btts_yes_prob, prediction.btts_no_prob) * 100.0
prediction.ht_home_prob = ht_probs["1"]
prediction.ht_draw_prob = ht_probs["X"]
prediction.ht_away_prob = ht_probs["2"]
prediction.ht_pick, ht_top = self._best_prob_pick(ht_probs)
prediction.ht_confidence = ht_top * 100.0
prediction.ht_over_05_prob = ht_ou05_probs["Over"]
prediction.ht_under_05_prob = ht_ou05_probs["Under"]
prediction.ht_ou_pick = "İY 0.5 Üst" if prediction.ht_over_05_prob >= prediction.ht_under_05_prob else "İY 0.5 Alt"
prediction.ht_over_15_prob = ht_ou15_probs["Over"]
prediction.ht_under_15_prob = ht_ou15_probs["Under"]
prediction.ht_ou15_pick = "İY 1.5 Üst" if prediction.ht_over_15_prob >= prediction.ht_under_15_prob else "İY 1.5 Alt"
prediction.ht_ft_probs = htft_probs
prediction.odd_prob = oe_probs["Odd"]
prediction.even_prob = oe_probs["Even"]
prediction.odd_even_pick = "Tek" if prediction.odd_prob >= prediction.even_prob else "Çift"
prediction.cards_over_prob = cards_probs["Over"]
prediction.cards_under_prob = cards_probs["Under"]
prediction.card_pick = "4.5 Üst" if prediction.cards_over_prob >= prediction.cards_under_prob else "4.5 Alt"
prediction.cards_confidence = max(prediction.cards_over_prob, prediction.cards_under_prob) * 100.0
prediction.handicap_home_prob = hcap_probs["1"]
prediction.handicap_draw_prob = hcap_probs["X"]
prediction.handicap_away_prob = hcap_probs["2"]
prediction.handicap_pick, hcap_top = self._best_prob_pick(hcap_probs)
prediction.handicap_confidence = hcap_top * 100.0
# ── Score Prediction: Model-first, heuristic fallback ──────────
ms_edge = prediction.ms_home_prob - prediction.ms_away_prob
score_result = self._predict_score_with_model(features)
if score_result is not None:
# ML model predicted scores
prediction.home_xg = score_result["ft_home"]
prediction.away_xg = score_result["ft_away"]
prediction.total_xg = round(prediction.home_xg + prediction.away_xg, 2)
ht_home_xg = score_result["ht_home"]
ht_away_xg = score_result["ht_away"]
prediction.predicted_ft_score = f"{int(round(prediction.home_xg))}-{int(round(prediction.away_xg))}"
prediction.predicted_ht_score = f"{int(round(ht_home_xg))}-{int(round(ht_away_xg))}"
else:
# Heuristic fallback (original formula)
base_home_xg = max(0.25, (float(data.home_goals_avg or 1.3) + float(features.get("away_xga", data.away_conceded_avg) or 1.2)) / 2.0)
base_away_xg = max(0.25, (float(data.away_goals_avg or 1.3) + float(features.get("home_xga", data.home_conceded_avg) or 1.2)) / 2.0)
# ms_edge already computed above
total_target = max(
1.4,
min(
4.8,
(float(features.get("league_avg_goals", 2.7)) * 0.55)
+ ((float(data.home_goals_avg or 1.3) + float(data.away_goals_avg or 1.3)) * 0.45)
+ ((prediction.over_25_prob - prediction.under_25_prob) * 1.15),
),
)
home_xg = max(0.2, base_home_xg + (ms_edge * 0.55) + ((prediction.btts_yes_prob - 0.5) * 0.18))
away_xg = max(0.2, base_away_xg - (ms_edge * 0.55) + ((prediction.btts_yes_prob - 0.5) * 0.18))
scale = total_target / max(home_xg + away_xg, 0.1)
prediction.home_xg = round(home_xg * scale, 2)
prediction.away_xg = round(away_xg * scale, 2)
prediction.total_xg = round(prediction.home_xg + prediction.away_xg, 2)
# Cup game: reduce xG by 20% — rotation + lower motivation + defensive tactics
if is_cup:
prediction.home_xg = round(prediction.home_xg * 0.80, 2)
prediction.away_xg = round(prediction.away_xg * 0.80, 2)
prediction.total_xg = round(prediction.home_xg + prediction.away_xg, 2)
prediction.predicted_ft_score = f"{int(round(prediction.home_xg))}-{int(round(prediction.away_xg))}"
prediction.predicted_ht_score = f"{int(round(prediction.home_xg * 0.45))}-{int(round(prediction.away_xg * 0.45))}"
prediction.ft_scores_top5 = self._poisson_score_top5(prediction.home_xg, prediction.away_xg)
# Score prediction: find the most likely scoreline consistent with the MS pick
# Instead of just rounding xG (misleading), filter Poisson top scores by result direction
ms_pick = prediction.ms_pick # "1", "X", or "2"
top5 = prediction.ft_scores_top5
if top5 and ms_pick in ("1", "X", "2"):
def _result_of(score_str: str) -> str:
try:
h, a = map(int, score_str.split("-"))
if h > a: return "1"
if h < a: return "2"
return "X"
except Exception:
return "?"
# Filter to scorelines matching the predicted result
matching = [s for s in top5 if _result_of(s["score"]) == ms_pick]
if matching:
best = matching[0] # already sorted by probability desc
h_str, a_str = best["score"].split("-")
prediction.predicted_ft_score = best["score"]
# Recalculate HT score proportionally from the FT pick
h_val, a_val = int(h_str), int(a_str)
prediction.predicted_ht_score = f"{int(round(h_val * 0.45))}-{int(round(a_val * 0.45))}"
max_market_conf = max(
prediction.ms_confidence,
prediction.ou15_confidence,
prediction.ou25_confidence,
prediction.ou35_confidence,
prediction.btts_confidence,
prediction.ht_confidence,
prediction.cards_confidence,
prediction.handicap_confidence,
)
lineup_conf = max(0.0, min(1.0, float(getattr(data, "lineup_confidence", 0.0) or 0.0)))
lineup_penalty = 12.0 if data.lineup_source == "none" else max(1.5, (1.0 - lineup_conf) * 8.0) if data.lineup_source == "probable_xi" else 0.0
referee_penalty = 6.0 if not data.referee_name else 0.0
parity_penalty = 8.0 if abs(ms_edge) < 0.08 else 0.0
# Cup game penalty: model trained on league data has lower reliability for cup matches
cup_penalty = 10.0 if is_cup else 0.0
# Bookmaker margin penalty: high margin signals that even the market is uncertain
bm_margin = 0.0
odds_data = getattr(data, "odds_data", {}) or {}
_h, _d, _a = float(odds_data.get("ms_h") or 0), float(odds_data.get("ms_d") or 0), float(odds_data.get("ms_a") or 0)
if _h > 1.01 and _d > 1.01 and _a > 1.01:
bm_margin = (1 / _h + 1 / _d + 1 / _a) - 1
bookmaker_penalty = 12.0 if bm_margin > 0.20 else 6.0 if bm_margin > 0.15 else 0.0
prediction.risk_score = round(min(100.0, max(10.0, 100.0 - max_market_conf + lineup_penalty + referee_penalty + parity_penalty + cup_penalty + bookmaker_penalty)), 1)
if prediction.risk_score >= 78:
prediction.risk_level = "EXTREME"
elif prediction.risk_score >= 62:
prediction.risk_level = "HIGH"
elif prediction.risk_score >= 40:
prediction.risk_level = "MEDIUM"
else:
prediction.risk_level = "LOW"
prediction.is_surprise_risk = prediction.risk_level in {"HIGH", "EXTREME"} or prediction.ms_draw_prob >= 0.30
prediction.surprise_type = "balanced_match_risk" if abs(ms_edge) < 0.08 else "draw_pressure" if prediction.ms_draw_prob >= 0.30 else ""
prediction.risk_warnings = []
if is_cup:
prediction.risk_warnings.append("cup_game_home_advantage_reduced")
if bookmaker_penalty > 0:
prediction.risk_warnings.append(f"bookmaker_margin_high_{bm_margin*100:.0f}pct")
if data.lineup_source == "probable_xi":
prediction.risk_warnings.append("lineup_probable_not_confirmed")
if lineup_conf < 0.65:
prediction.risk_warnings.append("lineup_projection_low_confidence")
if data.lineup_source == "none":
prediction.risk_warnings.append("lineup_unavailable")
if not data.referee_name:
prediction.risk_warnings.append("missing_referee")
if prediction.ms_draw_prob >= 0.30:
prediction.risk_warnings.append("draw_probability_elevated")
prediction.upset_score = int(round(max(0.0, min(100.0, (prediction.ms_draw_prob + min(prediction.ms_home_prob, prediction.ms_away_prob)) * 100.0))))
prediction.upset_level = "HIGH" if prediction.upset_score >= 65 else "MEDIUM" if prediction.upset_score >= 45 else "LOW"
prediction.upset_reasons = [prediction.surprise_type] if prediction.surprise_type else []
surprise = self._build_surprise_profile(data, prediction)
prediction.surprise_score = surprise["score"]
prediction.surprise_comment = surprise["comment"]
prediction.surprise_reasons = surprise["reasons"]
prediction.surprise_breakdown = surprise.get("breakdown", [])
# Auto-flag is_surprise_risk when score crosses 45 even if other paths didn't fire
if surprise["score"] >= 45.0:
prediction.is_surprise_risk = True
prediction.team_confidence = round(max(35.0, min(95.0, 45.0 + (abs(ms_edge) * 85.0) + (abs(float(features.get("form_elo_diff", 0.0))) / 40.0))), 1)
prediction.player_confidence = round(max(20.0, min(95.0, 38.0 + (float(features.get("home_key_players", 0.0)) + float(features.get("away_key_players", 0.0))) * 2.0 - (float(features.get("home_missing_impact", 0.0)) + float(features.get("away_missing_impact", 0.0))) * 22.0)), 1)
prediction.odds_confidence = round(max(30.0, min(95.0, float(np.mean([prediction.ms_confidence, prediction.ou25_confidence, prediction.btts_confidence])))), 1)
prediction.referee_confidence = 62.0 if data.referee_name else 35.0
prediction.total_cards_pred = 4.8 if prediction.cards_over_prob >= prediction.cards_under_prob else 4.1
prediction.total_corners_pred = round(8.8 + (prediction.over_25_prob - 0.5) * 2.5, 1)
prediction.corner_pick = "9.5 Üst" if prediction.total_corners_pred >= 9.5 else "9.5 Alt"
prediction.analysis_details = {
"primary_model": "v25",
"features_source": "v25.pre_match",
"market_count": len([key for key in v25_signal.keys() if key != "value_bets"]),
"lineup_source": data.lineup_source,
}
return prediction
def _build_engine_breakdown(self, prediction: FullMatchPrediction) -> Dict[str, Any]:
"""
Engine breakdown with backward-compatible flat scores + rich detail siblings.
Shape:
{
team: 74.1, player: 55.7, odds: 55.2, referee: 62.0, # legacy flat scores
detail: { team: {score, label, ...}, player: {...}, ... }
}
"""
components = {
"team": ("Takım modeli", float(prediction.team_confidence)),
"player": ("Oyuncu / kadro modeli", float(prediction.player_confidence)),
"odds": ("Oran piyasası", float(prediction.odds_confidence)),
"referee": ("Hakem etkisi", float(prediction.referee_confidence)),
}
flat: Dict[str, Any] = {}
detail: Dict[str, Any] = {}
for key, (display, raw) in components.items():
score = round(raw, 1)
label, interpretation = self._confidence_label(score)
flat[key] = score
detail[key] = {
"score": score,
"label": label,
"display_name": display,
"interpretation": interpretation,
}
flat["detail"] = detail
return flat
+469
View File
@@ -0,0 +1,469 @@
"""Reversal Mixin — HT/FT reversal watchlist and cycle metrics.
Auto-extracted mixin module split from services/single_match_orchestrator.py.
All methods here are composed into SingleMatchOrchestrator via inheritance.
`self` attributes (self.dsn, self.enrichment, self.v25_predictor, etc.) are
initialised in the main __init__.
"""
from __future__ import annotations
import json
import re
import time
import math
import os
import pickle
from collections import defaultdict
from typing import Any, Dict, List, Optional, Set, Tuple, overload
import pandas as pd
import numpy as np
import psycopg2
from psycopg2.extras import RealDictCursor
from data.db import get_clean_dsn
from schemas.prediction import FullMatchPrediction
from schemas.match_data import MatchData
from models.v25_ensemble import V25Predictor, get_v25_predictor
try:
from models.v27_predictor import V27Predictor, compute_divergence, compute_value_edge
except ImportError:
class V27Predictor: # type: ignore[no-redef]
def __init__(self): self.models = {}
def load_models(self): return False
def predict_all(self, features): return {}
def compute_divergence(*args, **kwargs):
return {}
def compute_value_edge(*args, **kwargs):
return {}
from features.odds_band_analyzer import OddsBandAnalyzer
try:
from models.basketball_v25 import (
BasketballMatchPrediction,
get_basketball_v25_predictor,
)
except ImportError:
BasketballMatchPrediction = Any # type: ignore[misc]
def get_basketball_v25_predictor() -> Any:
raise ImportError("Basketball predictor is not available")
from core.engines.player_predictor import PlayerPrediction, get_player_predictor
from services.feature_enrichment import FeatureEnrichmentService
from services.betting_brain import BettingBrain
from services.v26_shadow_engine import V26ShadowEngine, get_v26_shadow_engine
from services.match_commentary import generate_match_commentary
from utils.top_leagues import load_top_league_ids
from utils.league_reliability import load_league_reliability
from config.config_loader import build_threshold_dict, get_threshold_default
from models.calibration import get_calibrator
class ReversalMixin:
def get_reversal_watchlist(
self,
count: int = 20,
horizon_hours: int = 72,
min_score: float = 45.0,
top_leagues_only: bool = False,
) -> Dict[str, Any]:
safe_count = max(1, min(100, int(count)))
safe_horizon = max(6, min(168, int(horizon_hours)))
safe_min_score = max(0.0, min(100.0, float(min_score)))
now_ms = int(time.time() * 1000)
horizon_ms = now_ms + (safe_horizon * 60 * 60 * 1000)
with psycopg2.connect(self.dsn) as conn:
with conn.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute(
"""
SELECT
lm.id,
lm.home_team_id,
lm.away_team_id,
lm.league_id,
lm.mst_utc
FROM live_matches lm
WHERE lm.sport = 'football'
AND lm.mst_utc >= %s
AND lm.mst_utc <= %s
ORDER BY lm.mst_utc ASC
LIMIT 200
""",
(now_ms, horizon_ms),
)
raw_candidates = cur.fetchall()
candidates = [
row
for row in raw_candidates
if row.get("home_team_id") and row.get("away_team_id")
]
if top_leagues_only:
candidates = [
row for row in candidates if self._is_top_league(row.get("league_id"))
]
team_ids: Set[str] = set()
pair_keys: Set[Tuple[str, str]] = set()
for row in candidates:
home_id = str(row["home_team_id"])
away_id = str(row["away_team_id"])
team_ids.add(home_id)
team_ids.add(away_id)
h, a = sorted((home_id, away_id))
pair_keys.add((h, a))
team_cycle = self._fetch_team_reversal_cycle_metrics(cur, team_ids, now_ms)
h2h_ctx = self._fetch_h2h_reversal_context(cur, pair_keys, now_ms)
watch_items_all: List[Dict[str, Any]] = []
scanned = 0
for row in candidates:
match_id = str(row["id"])
data = self._load_match_data(match_id)
if data is None:
continue
package = self.analyze_match(match_id)
if not package:
continue
scanned += 1
htft_probs = package.get("market_board", {}).get("HTFT", {}).get("probs", {})
prob_12 = float(htft_probs.get("1/2", 0.0))
prob_21 = float(htft_probs.get("2/1", 0.0))
if prob_12 <= 0.0 and prob_21 <= 0.0:
continue
overall_htft_pick = None
overall_htft_prob = 0.0
if htft_probs:
overall_htft_pick, overall_htft_prob = max(
htft_probs.items(),
key=lambda item: float(item[1]),
)
reversal_sum = prob_12 + prob_21
reversal_max = max(prob_12, prob_21)
top_pick = "2/1" if prob_21 >= prob_12 else "1/2"
top_prob = prob_21 if top_pick == "2/1" else prob_12
ms_h = self._to_float(data.odds_data.get("ms_h"), 0.0)
ms_a = self._to_float(data.odds_data.get("ms_a"), 0.0)
gap = abs(ms_h - ms_a) if ms_h > 1.0 and ms_a > 1.0 else 0.0
favorite_odd = min(ms_h, ms_a) if ms_h > 1.0 and ms_a > 1.0 else 0.0
# Reversal events are rare (~5% baseline), so convert raw probs to a more useful
# watchlist scale where p in [0.02, 0.08] becomes meaningfully separable.
base_score = (reversal_max * 100.0 * 8.0) + (reversal_sum * 100.0 * 4.0)
balance_bonus = 0.0
if gap > 0.0:
balance_bonus = max(0.0, (1.0 - min(gap, 1.2) / 1.2) * 7.0)
elif ms_h > 1.0 and ms_a > 1.0:
balance_bonus = 2.0
favorite_bonus = 0.0
if favorite_odd > 0.0 and favorite_odd <= 1.70 and reversal_max >= 0.02:
favorite_bonus = min(8.0, (1.70 - favorite_odd) * 12.0)
home_metrics = team_cycle.get(data.home_team_id, {})
away_metrics = team_cycle.get(data.away_team_id, {})
cycle_pressure = max(
float(home_metrics.get("cycle_pressure", 0.0)),
float(away_metrics.get("cycle_pressure", 0.0)),
)
cycle_bonus = cycle_pressure * 10.0
h, a = sorted((data.home_team_id, data.away_team_id))
pair_key = (h, a)
pair_ctx = h2h_ctx.get(pair_key, {})
blowout_bonus = 0.0
last_diff = int(pair_ctx.get("goal_diff", 0))
if abs(last_diff) >= 3:
blowout_bonus = 6.0
if abs(last_diff) >= 5:
blowout_bonus += 3.0
ou25_o = self._to_float(data.odds_data.get("ou25_o"), 0.0)
tempo_bonus = 0.0
if ou25_o > 1.0 and ou25_o <= 1.72:
tempo_bonus = 2.5
watch_score = max(
0.0,
min(
100.0,
base_score + balance_bonus + favorite_bonus + cycle_bonus + blowout_bonus + tempo_bonus,
),
)
reason_codes: List[str] = []
if top_prob >= 0.045:
reason_codes.append("reversal_prob_hot")
elif top_prob >= 0.030:
reason_codes.append("reversal_prob_warm")
if gap > 0.0 and gap <= 0.80:
reason_codes.append("balanced_matchup")
if favorite_bonus > 0.0:
reason_codes.append("strong_favorite_reversal_window")
if cycle_pressure >= 0.55:
reason_codes.append("team_reversal_cycle_pressure")
if blowout_bonus > 0.0:
reason_codes.append("h2h_blowout_rematch")
if tempo_bonus > 0.0:
reason_codes.append("high_tempo_profile")
if not reason_codes:
reason_codes.append("model_signal_only")
item = (
{
"match_id": data.match_id,
"match_name": f"{data.home_team_name} vs {data.away_team_name}",
"match_date_ms": data.match_date_ms,
"league_id": data.league_id,
"league": data.league_name,
"risk_band": self._watchlist_risk_band(watch_score),
"watch_score": round(watch_score, 2),
"top_pick": top_pick,
"top_pick_prob": round(top_prob, 4),
"top_pick_scope": "reversal_only",
"overall_htft_pick": overall_htft_pick,
"overall_htft_pick_prob": round(float(overall_htft_prob), 4),
"reversal_probs": {
"1/2": round(prob_12, 4),
"2/1": round(prob_21, 4),
},
"odds_snapshot": {
"ms_h": round(ms_h, 2) if ms_h > 0 else None,
"ms_a": round(ms_a, 2) if ms_a > 0 else None,
"ms_gap": round(gap, 3),
"favorite_odd": round(favorite_odd, 2) if favorite_odd > 0 else None,
},
"pattern_signals": {
"home_cycle_pressure": round(float(home_metrics.get("cycle_pressure", 0.0)), 3),
"away_cycle_pressure": round(float(away_metrics.get("cycle_pressure", 0.0)), 3),
"home_matches_since_last_reversal": int(home_metrics.get("matches_since_last_reversal", 99)),
"away_matches_since_last_reversal": int(away_metrics.get("matches_since_last_reversal", 99)),
"h2h_last_goal_diff": last_diff if pair_ctx else None,
"h2h_last_result": pair_ctx.get("result"),
},
"reason_codes": reason_codes,
}
)
watch_items_all.append(item)
watch_items_all.sort(
key=lambda item: (
float(item.get("watch_score", 0.0)),
float(item.get("top_pick_prob", 0.0)),
),
reverse=True,
)
selected = [
item for item in watch_items_all if float(item.get("watch_score", 0.0)) >= safe_min_score
][:safe_count]
preview = watch_items_all[: min(5, len(watch_items_all))]
return {
"engine": "v28.main",
"generated_at": __import__("datetime").datetime.utcnow().isoformat() + "Z",
"horizon_hours": safe_horizon,
"min_score": round(safe_min_score, 2),
"top_leagues_only": bool(top_leagues_only),
"scanned_matches": scanned,
"candidate_matches": len(candidates),
"listed_matches": len(selected),
"watchlist": selected,
"top_candidates_preview": preview,
}
def _fetch_team_reversal_cycle_metrics(
self,
cur: RealDictCursor,
team_ids: Set[str],
now_ms: int,
) -> Dict[str, Dict[str, float]]:
if not team_ids:
return {}
cur.execute(
"""
WITH team_matches AS (
SELECT
m.home_team_id AS team_id,
m.mst_utc,
CASE
WHEN m.ht_score_home > m.ht_score_away THEN 'L'
WHEN m.ht_score_home < m.ht_score_away THEN 'T'
ELSE 'D'
END AS ht_state,
CASE
WHEN m.score_home > m.score_away THEN 'W'
WHEN m.score_home < m.score_away THEN 'L'
ELSE 'D'
END AS ft_state
FROM matches m
WHERE m.status = 'FT'
AND m.score_home IS NOT NULL
AND m.score_away IS NOT NULL
AND m.ht_score_home IS NOT NULL
AND m.ht_score_away IS NOT NULL
AND m.home_team_id = ANY(%s)
AND m.mst_utc < %s
UNION ALL
SELECT
m.away_team_id AS team_id,
m.mst_utc,
CASE
WHEN m.ht_score_away > m.ht_score_home THEN 'L'
WHEN m.ht_score_away < m.ht_score_home THEN 'T'
ELSE 'D'
END AS ht_state,
CASE
WHEN m.score_away > m.score_home THEN 'W'
WHEN m.score_away < m.score_home THEN 'L'
ELSE 'D'
END AS ft_state
FROM matches m
WHERE m.status = 'FT'
AND m.score_home IS NOT NULL
AND m.score_away IS NOT NULL
AND m.ht_score_home IS NOT NULL
AND m.ht_score_away IS NOT NULL
AND m.away_team_id = ANY(%s)
AND m.mst_utc < %s
),
ranked AS (
SELECT
team_id,
mst_utc,
ht_state,
ft_state,
ROW_NUMBER() OVER (PARTITION BY team_id ORDER BY mst_utc DESC) AS rn
FROM team_matches
)
SELECT team_id, mst_utc, ht_state, ft_state
FROM ranked
WHERE rn <= 80
ORDER BY team_id ASC, mst_utc DESC
""",
(list(team_ids), now_ms, list(team_ids), now_ms),
)
rows = cur.fetchall()
by_team: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
for row in rows:
by_team[str(row["team_id"])].append(row)
out: Dict[str, Dict[str, float]] = {}
for team_id in team_ids:
team_rows = by_team.get(str(team_id), [])
if not team_rows:
out[str(team_id)] = {
"recent_reversal_rate": 0.0,
"matches_since_last_reversal": 99.0,
"avg_gap_matches": 12.0,
"cycle_pressure": 0.0,
}
continue
reversal_indexes: List[int] = []
recent_reversal = 0
recent_n = min(15, len(team_rows))
for idx, row in enumerate(team_rows, start=1):
ht_state = str(row.get("ht_state") or "")
ft_state = str(row.get("ft_state") or "")
is_reversal = (ht_state == "L" and ft_state == "L") or (ht_state == "T" and ft_state == "W")
if idx <= recent_n and is_reversal:
recent_reversal += 1
if is_reversal:
reversal_indexes.append(idx)
recent_rate = (recent_reversal / recent_n) if recent_n > 0 else 0.0
since_last = float(reversal_indexes[0]) if reversal_indexes else 99.0
gaps: List[float] = []
if len(reversal_indexes) >= 2:
for i in range(1, len(reversal_indexes)):
gaps.append(float(reversal_indexes[i] - reversal_indexes[i - 1]))
avg_gap = (sum(gaps) / len(gaps)) if gaps else 12.0
if avg_gap <= 0:
avg_gap = 12.0
cycle_pressure = 0.0
if reversal_indexes:
tolerance = max(3.0, avg_gap * 0.7)
diff = abs(since_last - avg_gap)
cycle_pressure = max(0.0, 1.0 - (diff / tolerance))
out[str(team_id)] = {
"recent_reversal_rate": round(recent_rate, 4),
"matches_since_last_reversal": round(since_last, 2),
"avg_gap_matches": round(avg_gap, 2),
"cycle_pressure": round(cycle_pressure, 4),
}
return out
def _fetch_h2h_reversal_context(
self,
cur: RealDictCursor,
pair_keys: Set[Tuple[str, str]],
now_ms: int,
) -> Dict[Tuple[str, str], Dict[str, Any]]:
if not pair_keys:
return {}
team_ids = sorted({team_id for pair in pair_keys for team_id in pair})
cur.execute(
"""
SELECT
m.home_team_id,
m.away_team_id,
m.score_home,
m.score_away,
m.ht_score_home,
m.ht_score_away,
m.mst_utc
FROM matches m
WHERE m.status = 'FT'
AND m.score_home IS NOT NULL
AND m.score_away IS NOT NULL
AND m.home_team_id = ANY(%s)
AND m.away_team_id = ANY(%s)
AND m.mst_utc < %s
ORDER BY m.mst_utc DESC
LIMIT 4000
""",
(team_ids, team_ids, now_ms),
)
rows = cur.fetchall()
out: Dict[Tuple[str, str], Dict[str, Any]] = {}
for row in rows:
home_id = str(row["home_team_id"])
away_id = str(row["away_team_id"])
h, a = sorted((home_id, away_id))
key = (h, a)
if key not in pair_keys or key in out:
continue
score_home = int(row["score_home"])
score_away = int(row["score_away"])
goal_diff = score_home - score_away
out[key] = {
"goal_diff": goal_diff,
"result": f"{score_home}-{score_away}",
"match_date_ms": int(row["mst_utc"] or 0),
}
if len(out) >= len(pair_keys):
break
return out
@staticmethod
def _watchlist_risk_band(score: float) -> str:
if score >= 68.0:
return "HIGH"
if score >= 54.0:
return "MEDIUM"
return "LOW"
@@ -0,0 +1,350 @@
"""Upper Brain Mixin — V27 cross-check guards and assessments.
Auto-extracted mixin module split from services/single_match_orchestrator.py.
All methods here are composed into SingleMatchOrchestrator via inheritance.
`self` attributes (self.dsn, self.enrichment, self.v25_predictor, etc.) are
initialised in the main __init__.
"""
from __future__ import annotations
import json
import re
import time
import math
import os
import pickle
from collections import defaultdict
from typing import Any, Dict, List, Optional, Set, Tuple, overload
import pandas as pd
import numpy as np
import psycopg2
from psycopg2.extras import RealDictCursor
from data.db import get_clean_dsn
from schemas.prediction import FullMatchPrediction
from schemas.match_data import MatchData
from models.v25_ensemble import V25Predictor, get_v25_predictor
try:
from models.v27_predictor import V27Predictor, compute_divergence, compute_value_edge
except ImportError:
class V27Predictor: # type: ignore[no-redef]
def __init__(self): self.models = {}
def load_models(self): return False
def predict_all(self, features): return {}
def compute_divergence(*args, **kwargs):
return {}
def compute_value_edge(*args, **kwargs):
return {}
from features.odds_band_analyzer import OddsBandAnalyzer
try:
from models.basketball_v25 import (
BasketballMatchPrediction,
get_basketball_v25_predictor,
)
except ImportError:
BasketballMatchPrediction = Any # type: ignore[misc]
def get_basketball_v25_predictor() -> Any:
raise ImportError("Basketball predictor is not available")
from core.engines.player_predictor import PlayerPrediction, get_player_predictor
from services.feature_enrichment import FeatureEnrichmentService
from services.betting_brain import BettingBrain
from services.v26_shadow_engine import V26ShadowEngine, get_v26_shadow_engine
from services.match_commentary import generate_match_commentary
from utils.top_leagues import load_top_league_ids
from utils.league_reliability import load_league_reliability
from config.config_loader import build_threshold_dict, get_threshold_default
from models.calibration import get_calibrator
class UpperBrainMixin:
def _apply_upper_brain_guards(self, package: Dict[str, Any]) -> Dict[str, Any]:
return BettingBrain().judge(package)
v27_engine = package.get("v27_engine")
if not isinstance(v27_engine, dict) or not v27_engine.get("triple_value"):
return package
guarded = dict(package)
vetoed_keys = set()
guarded_keys = set()
def mark_guard(item: Dict[str, Any]) -> Dict[str, Any]:
if not isinstance(item, dict):
return item
out = dict(item)
assessment = self._upper_brain_assessment(out, guarded)
if not assessment.get("applies"):
return out
key = f"{out.get('market')}:{out.get('pick')}"
guarded_keys.add(key)
out["upper_brain"] = assessment
reason_key = "decision_reasons" if "decision_reasons" in out else "reasons"
reasons = list(out.get(reason_key) or [])
for reason in assessment.get("reason_codes", []):
if reason not in reasons:
reasons.append(reason)
out[reason_key] = reasons[:6]
if assessment.get("veto"):
vetoed_keys.add(key)
out["playable"] = False
out["stake_units"] = 0.0
out["bet_grade"] = "PASS"
out["is_guaranteed"] = False
out["pick_reason"] = "upper_brain_veto"
if "signal_tier" in out:
out["signal_tier"] = "PASS"
elif assessment.get("downgrade"):
out["is_guaranteed"] = False
if out.get("signal_tier") == "CORE":
out["signal_tier"] = "LEAN"
if out.get("pick_reason") == "high_accuracy_market":
out["pick_reason"] = "upper_brain_downgraded"
return out
main_pick = mark_guard(guarded.get("main_pick") or {})
value_pick = mark_guard(guarded.get("value_pick") or {}) if guarded.get("value_pick") else None
supporting = [
mark_guard(row)
for row in list(guarded.get("supporting_picks") or [])
if isinstance(row, dict)
]
bet_summary = [
mark_guard(row)
for row in list(guarded.get("bet_summary") or [])
if isinstance(row, dict)
]
main_safe = bool(main_pick and main_pick.get("playable") and not main_pick.get("upper_brain", {}).get("veto"))
if not main_safe:
candidates = [
row for row in supporting
if row.get("playable")
and not row.get("upper_brain", {}).get("veto")
and float(row.get("odds", 0.0) or 0.0) >= 1.30
]
candidates.sort(key=lambda row: float(row.get("play_score", 0.0) or 0.0), reverse=True)
if candidates:
main_pick = dict(candidates[0])
main_pick["is_guaranteed"] = False
main_pick["pick_reason"] = "upper_brain_reselected"
reasons = list(main_pick.get("decision_reasons") or [])
if "upper_brain_reselected_after_veto" not in reasons:
reasons.append("upper_brain_reselected_after_veto")
main_pick["decision_reasons"] = reasons[:6]
elif main_pick:
main_pick["is_guaranteed"] = False
main_pick["pick_reason"] = "upper_brain_no_safe_pick"
if main_pick:
supporting = [
row for row in supporting
if not (
row.get("market") == main_pick.get("market")
and row.get("pick") == main_pick.get("pick")
)
][:6]
guarded["main_pick"] = main_pick if main_pick else None
guarded["value_pick"] = value_pick
guarded["supporting_picks"] = supporting
guarded["bet_summary"] = bet_summary
playable = bool(main_pick and main_pick.get("playable") and not main_pick.get("upper_brain", {}).get("veto"))
advice = dict(guarded.get("bet_advice") or {})
advice["playable"] = playable
advice["suggested_stake_units"] = float(main_pick.get("stake_units", 0.0)) if playable else 0.0
if playable:
advice["reason"] = "playable_pick_found"
elif vetoed_keys:
advice["reason"] = "upper_brain_no_safe_pick"
else:
advice["reason"] = "no_bet_conditions_met"
guarded["bet_advice"] = advice
guarded["upper_brain"] = {
"applied": True,
"guarded_count": len(guarded_keys),
"vetoed_count": len(vetoed_keys),
"vetoed": sorted(vetoed_keys)[:8],
"rules": {
"min_band_sample": 8,
"max_v25_v27_divergence": 0.18,
"dc_requires_triple_value": True,
},
}
guarded.setdefault("analysis_details", {})
guarded["analysis_details"]["upper_brain_guards_applied"] = True
guarded["analysis_details"]["upper_brain_vetoed_count"] = len(vetoed_keys)
return guarded
def _upper_brain_assessment(
self,
item: Dict[str, Any],
package: Dict[str, Any],
) -> Dict[str, Any]:
market = str(item.get("market") or "")
pick = str(item.get("pick") or "")
if not market or not pick:
return {"applies": False}
v27_engine = package.get("v27_engine") or {}
triple_value = v27_engine.get("triple_value") or {}
model_prob = self._upper_brain_market_probability(item, package)
v27_prob = self._upper_brain_v27_probability(market, pick, v27_engine)
triple_key = self._upper_brain_triple_key(market, pick)
triple = triple_value.get(triple_key) if triple_key else None
veto = False
downgrade = False
reasons: List[str] = []
divergence = None
if model_prob is not None and v27_prob is not None:
divergence = abs(float(model_prob) - float(v27_prob))
if divergence >= 0.18:
veto = True
reasons.append("upper_brain_v25_v27_divergence")
elif divergence >= 0.12:
downgrade = True
reasons.append("upper_brain_v25_v27_warning")
if isinstance(triple, dict):
band_sample = int(float(triple.get("band_sample", 0) or 0))
is_value = bool(triple.get("is_value"))
if market == "DC":
if band_sample < 8:
veto = True
reasons.append("upper_brain_band_sample_too_low")
elif not is_value:
veto = True
reasons.append("upper_brain_triple_value_rejected")
elif market in {"MS", "OU25"} and band_sample > 0 and band_sample < 8:
downgrade = True
reasons.append("upper_brain_band_sample_thin")
elif market in {"OU15", "HT_OU05"} and band_sample < 8:
downgrade = True
reasons.append("upper_brain_band_sample_thin")
consensus = str(v27_engine.get("consensus") or "").upper()
if consensus == "DISAGREE" and market in {"MS", "DC"} and not veto:
downgrade = True
reasons.append("upper_brain_consensus_disagree")
applies = bool(reasons or triple is not None or v27_prob is not None)
return {
"applies": applies,
"veto": veto,
"downgrade": downgrade,
"reason_codes": reasons,
"model_prob": round(float(model_prob), 4) if model_prob is not None else None,
"v27_prob": round(float(v27_prob), 4) if v27_prob is not None else None,
"divergence": round(float(divergence), 4) if divergence is not None else None,
"triple_key": triple_key,
"triple_value": triple,
}
def _upper_brain_market_probability(
self,
item: Dict[str, Any],
package: Dict[str, Any],
) -> Optional[float]:
raw_prob = item.get("probability")
if raw_prob is not None:
try:
return float(raw_prob)
except (TypeError, ValueError):
pass
market = str(item.get("market") or "")
pick = str(item.get("pick") or "")
board = package.get("market_board") or {}
payload = board.get(market) if isinstance(board, dict) else None
probs = payload.get("probs") if isinstance(payload, dict) else None
if not isinstance(probs, dict):
return None
prob_key = self._upper_brain_prob_key(market, pick)
if prob_key is None:
return None
return self._safe_float(probs.get(prob_key))
def _upper_brain_v27_probability(
self,
market: str,
pick: str,
v27_engine: Dict[str, Any],
) -> Optional[float]:
predictions = v27_engine.get("predictions") or {}
ms = predictions.get("ms") or {}
ou25 = predictions.get("ou25") or {}
if market == "MS":
ms_key = {"1": "home", "X": "draw", "2": "away"}.get(pick or "")
return self._safe_float(ms.get(ms_key), 0.0) if ms_key else 0.0
if market == "DC":
if pick == "1X":
return self._safe_float(ms.get("home"), 0.0) + self._safe_float(ms.get("draw"), 0.0)
if pick == "X2":
return self._safe_float(ms.get("draw"), 0.0) + self._safe_float(ms.get("away"), 0.0)
if pick == "12":
return self._safe_float(ms.get("home"), 0.0) + self._safe_float(ms.get("away"), 0.0)
if market == "OU25":
prob_key = self._upper_brain_prob_key(market, pick)
return self._safe_float(ou25.get(prob_key), 0.0) if prob_key else 0.0
return 0.0
@staticmethod
def _upper_brain_prob_key(market: str, pick: str) -> Optional[str]:
pick_norm = str(pick or "").strip().casefold()
if market in {"MS", "HT", "HCAP"}:
return pick if pick in {"1", "X", "2"} else None
if market == "DC":
return pick.upper() if pick.upper() in {"1X", "X2", "12"} else None
if market in {"OU15", "OU25", "OU35", "HT_OU05", "HT_OU15", "CARDS"}:
if "over" in pick_norm or "st" in pick_norm:
return "over"
if "under" in pick_norm or "alt" in pick_norm:
return "under"
if market == "BTTS":
if "yes" in pick_norm or "var" in pick_norm:
return "yes"
if "no" in pick_norm or "yok" in pick_norm:
return "no"
if market == "OE":
if "odd" in pick_norm or "tek" in pick_norm:
return "odd"
if "even" in pick_norm or "ift" in pick_norm:
return "even"
if market == "HTFT" and "/" in pick:
return pick
return None
def _upper_brain_triple_key(self, market: str, pick: str) -> Optional[str]:
prob_key = self._upper_brain_prob_key(market, pick)
if market == "MS":
return {"1": "home", "2": "away"}.get(pick)
if market == "DC":
return f"dc_{pick.lower()}" if pick.upper() in {"1X", "X2", "12"} else None
if market in {"OU15", "OU25", "OU35"} and prob_key == "over":
return f"{market.lower()}_over"
if market == "BTTS" and prob_key == "yes":
return "btts_yes"
if market == "HT":
return {"1": "ht_home", "2": "ht_away"}.get(pick)
if market in {"HT_OU05", "HT_OU15"} and prob_key == "over":
return f"{market.lower()}_over"
if market == "OE" and prob_key == "odd":
return "oe_odd"
if market == "CARDS" and prob_key == "over":
return "cards_over"
if market == "HTFT" and "/" in pick:
return f"htft_{pick.replace('/', '').lower()}"
return None
+174
View File
@@ -0,0 +1,174 @@
"""Utility Mixin — generic helpers (safe_float, label normalisation, JSON parsing).
Auto-extracted mixin module split from services/single_match_orchestrator.py.
All methods here are composed into SingleMatchOrchestrator via inheritance.
`self` attributes (self.dsn, self.enrichment, self.v25_predictor, etc.) are
initialised in the main __init__.
"""
from __future__ import annotations
import json
import re
import time
import math
import os
import pickle
from collections import defaultdict
from typing import Any, Dict, List, Optional, Set, Tuple, overload
import pandas as pd
import numpy as np
import psycopg2
from psycopg2.extras import RealDictCursor
from data.db import get_clean_dsn
from schemas.prediction import FullMatchPrediction
from schemas.match_data import MatchData
from models.v25_ensemble import V25Predictor, get_v25_predictor
try:
from models.v27_predictor import V27Predictor, compute_divergence, compute_value_edge
except ImportError:
class V27Predictor: # type: ignore[no-redef]
def __init__(self): self.models = {}
def load_models(self): return False
def predict_all(self, features): return {}
def compute_divergence(*args, **kwargs):
return {}
def compute_value_edge(*args, **kwargs):
return {}
from features.odds_band_analyzer import OddsBandAnalyzer
try:
from models.basketball_v25 import (
BasketballMatchPrediction,
get_basketball_v25_predictor,
)
except ImportError:
BasketballMatchPrediction = Any # type: ignore[misc]
def get_basketball_v25_predictor() -> Any:
raise ImportError("Basketball predictor is not available")
from core.engines.player_predictor import PlayerPrediction, get_player_predictor
from services.feature_enrichment import FeatureEnrichmentService
from services.betting_brain import BettingBrain
from services.v26_shadow_engine import V26ShadowEngine, get_v26_shadow_engine
from services.match_commentary import generate_match_commentary
from utils.top_leagues import load_top_league_ids
from utils.league_reliability import load_league_reliability
from config.config_loader import build_threshold_dict, get_threshold_default
from models.calibration import get_calibrator
class UtilsMixin:
@staticmethod
@overload
def _safe_float(value: Any, default: float) -> float: ...
@staticmethod
@overload
def _safe_float(value: Any, default: None = ...) -> Optional[float]: ...
@staticmethod
def _safe_float(value: Any, default: Optional[float] = None) -> Optional[float]:
try:
return float(value)
except (TypeError, ValueError):
return default
@staticmethod
def _safe_float(value: Any, default: float = 0.0) -> float:
try:
return float(value)
except (TypeError, ValueError):
return default
@staticmethod
def _calibrator_key(market: str, pick: str) -> Optional[str]:
"""Map (market, pick) → trained-calibrator key in models/calibration."""
m = (market or "").upper()
p = (pick or "").strip().casefold()
if m == "MS":
if p == "1":
return "ms_home"
if p == "x" or p == "0":
return "ms_draw"
if p == "2":
return "ms_away"
return None
if m == "DC":
return "dc"
if m == "OU15" and ("over" in p or "üst" in p or "ust" in p):
return "ou15"
if m == "OU25" and ("over" in p or "üst" in p or "ust" in p):
return "ou25"
if m == "OU35" and ("over" in p or "üst" in p or "ust" in p):
return "ou35"
if m == "BTTS" and ("yes" in p or "var" in p):
return "btts"
if m == "HT":
if p == "1":
return "ht_home"
if p == "x" or p == "0":
return "ht_draw"
if p == "2":
return "ht_away"
return None
if m == "HTFT":
return "ht_ft"
return None
@staticmethod
def _confidence_label(score: float) -> Tuple[str, str]:
"""Turkish UX label + interpretation for a 0-100 confidence score."""
if score >= 75:
return "YUKSEK", "Bu sinyal güçlü ve güvenilir"
if score >= 60:
return "ORTA", "Sinyal makul, çelişen veri yok"
if score >= 45:
return "DUSUK", "Sinyal zayıf, dikkatli yorumla"
return "COK_DUSUK", "Veri yetersiz veya çelişkili — bu motoru bu maç için ihmal et"
@staticmethod
def _to_float(value: Any, default: float) -> float:
try:
if value is None:
return default
return float(value)
except Exception:
return default
@staticmethod
def _normalize_text(value: Any) -> str:
text = str(value or "").casefold().replace("", "i")
return " ".join(text.split())
def _selection_value(
self,
selections: Dict[str, Any],
aliases: Tuple[str, ...],
default: float,
) -> float:
if not isinstance(selections, dict):
return default
normalized_aliases = {self._normalize_text(alias) for alias in aliases}
for key, value in selections.items():
key_norm = self._normalize_text(key)
if key_norm in normalized_aliases:
return self._to_float(value, default)
# Secondary match for entries like "2,5 Üst" or "Toplam Alt"
for key, value in selections.items():
key_norm = self._normalize_text(key)
if any(alias in key_norm for alias in normalized_aliases):
return self._to_float(value, default)
return default
def _parse_json_dict(self, payload: Any) -> Optional[Dict[str, Any]]:
if isinstance(payload, str):
try:
payload = json.loads(payload)
except Exception:
return None
return payload if isinstance(payload, dict) else None
File diff suppressed because it is too large Load Diff
@@ -1,75 +0,0 @@
import sys
import unittest
from decimal import Decimal
from pathlib import Path
from unittest.mock import MagicMock
AI_ENGINE_ROOT = Path(__file__).resolve().parents[1]
if str(AI_ENGINE_ROOT) not in sys.path:
sys.path.insert(0, str(AI_ENGINE_ROOT))
from core.engines.odds_predictor import OddsPredictorEngine
from features.sidelined_analyzer import SidelinedAnalyzer
class EngineNullSafetyTests(unittest.TestCase):
def test_odds_predictor_accepts_decimal_inputs_without_crashing(self):
engine = OddsPredictorEngine()
prediction = engine.predict(
odds_data={
"ms_h": Decimal("2.10"),
"ms_d": Decimal("3.25"),
"ms_a": Decimal("3.60"),
"ou25_o": Decimal("1.90"),
},
)
self.assertGreater(prediction.market_home_prob, 0.0)
self.assertGreater(prediction.market_draw_prob, 0.0)
self.assertGreater(prediction.market_away_prob, 0.0)
def test_sidelined_analyzer_handles_non_numeric_fields(self):
analyzer = SidelinedAnalyzer.__new__(SidelinedAnalyzer)
analyzer.position_weights = {"K": 0.35, "D": 0.20, "O": 0.25, "F": 0.30}
analyzer.max_rating = 10
analyzer.adaptation_threshold = 10
analyzer.adaptation_discount = 0.5
analyzer.goalkeeper_penalty = 0.15
analyzer.confidence_boost = 10
analyzer.max_impact = 0.85
analyzer.key_player_threshold = 3
analyzer.recent_matches_lookback = 15
analyzer._fetch_player_stats = MagicMock(return_value={})
result = analyzer.analyze(
{
"totalSidelined": 2,
"players": [
{
"playerId": "p1",
"playerName": "Player One",
"positionShort": "O",
"matchesMissed": "N/A",
"average": "?",
"type": "injury",
},
{
"playerId": "p2",
"playerName": "Player Two",
"positionShort": "K",
"matchesMissed": "12",
"average": "6.7",
"type": "suspension",
},
],
},
)
self.assertEqual(result.total_sidelined, 2)
self.assertGreaterEqual(result.impact_score, 0.0)
self.assertTrue(len(result.player_details) >= 2)
if __name__ == "__main__":
unittest.main()
@@ -8,9 +8,10 @@ AI_ENGINE_ROOT = Path(__file__).resolve().parents[1]
if str(AI_ENGINE_ROOT) not in sys.path:
sys.path.insert(0, str(AI_ENGINE_ROOT))
from models.v20_ensemble import FullMatchPrediction
from schemas.prediction import FullMatchPrediction
from schemas.match_data import MatchData
from models.basketball_v25 import BasketballMatchPrediction
from services.single_match_orchestrator import MatchData, SingleMatchOrchestrator
from services.single_match_orchestrator import SingleMatchOrchestrator
class _CursorContext: