Merge branch 'main' of https://gitea.bilgich.com/fahricansecer/iddaai-be
Deploy Iddaai Backend / build-and-deploy (push) Successful in 54s
Deploy Iddaai Backend / build-and-deploy (push) Successful in 54s
This commit is contained in:
@@ -11,13 +11,27 @@ jobs:
|
||||
- name: Kodu Cek
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Docker Build
|
||||
- name: Docker Build (Backend)
|
||||
run: docker build -t iddaai-be:latest .
|
||||
|
||||
- name: Eski Konteyneri Sil
|
||||
run: docker rm -f iddaai-be || true
|
||||
- name: Docker Build (AI Engine)
|
||||
run: docker build -t iddaai-ai-engine:latest ./ai-engine
|
||||
|
||||
- name: Yeni Versiyonu Baslat
|
||||
- name: Eski Konteynerleri Sil
|
||||
run: |
|
||||
docker rm -f iddaai-be || true
|
||||
docker rm -f iddaai-ai-engine || true
|
||||
|
||||
- name: AI Engine'i Baslat
|
||||
run: |
|
||||
docker run -d \
|
||||
--name iddaai-ai-engine \
|
||||
--restart unless-stopped \
|
||||
--network iddaai_iddaai-network \
|
||||
-e DATABASE_URL='${{ secrets.DATABASE_URL }}' \
|
||||
iddaai-ai-engine:latest
|
||||
|
||||
- name: Backend'i Baslat
|
||||
run: |
|
||||
docker run -d \
|
||||
--name iddaai-be \
|
||||
@@ -29,7 +43,17 @@ jobs:
|
||||
-e REDIS_HOST='${{ secrets.REDIS_HOST }}' \
|
||||
-e REDIS_PORT='${{ secrets.REDIS_PORT }}' \
|
||||
-e REDIS_PASSWORD='${{ secrets.REDIS_PASSWORD }}' \
|
||||
-e AI_ENGINE_URL='${{ secrets.AI_ENGINE_URL }}' \
|
||||
-e AI_ENGINE_URL='http://iddaai-ai-engine:8000' \
|
||||
-e JWT_SECRET='${{ secrets.JWT_SECRET }}' \
|
||||
-e JWT_ACCESS_EXPIRATION='1d' \
|
||||
iddaai-be:latest /bin/sh -c "npx prisma migrate deploy && node dist/src/main.js"
|
||||
|
||||
- name: Saglik Kontrolu
|
||||
run: |
|
||||
sleep 10
|
||||
echo "=== AI Engine logs ==="
|
||||
docker logs --tail 30 iddaai-ai-engine || true
|
||||
echo "=== Backend logs ==="
|
||||
docker logs --tail 30 iddaai-be || true
|
||||
echo "=== AI Engine health ==="
|
||||
docker exec iddaai-ai-engine python -c "import urllib.request; print(urllib.request.urlopen('http://127.0.0.1:8000/health').read().decode())" || echo "AI engine health check failed"
|
||||
@@ -47,6 +47,7 @@ public/uploads/
|
||||
# Large Datasets and ML Models
|
||||
ai-engine/models/*
|
||||
!ai-engine/models/*.py
|
||||
!ai-engine/models/v25/
|
||||
models/*
|
||||
!models/*.py
|
||||
colab_export/
|
||||
|
||||
@@ -1,3 +1,14 @@
|
||||
model_ensemble:
|
||||
xgb_weight: 0.50
|
||||
lgb_weight: 0.50
|
||||
temperature: 1.5
|
||||
default_ms_odds:
|
||||
home: 2.65
|
||||
draw: 3.20
|
||||
away: 2.65
|
||||
elo_staleness_days: 14
|
||||
odds_staleness_hours: 48
|
||||
|
||||
engine_weights:
|
||||
team: 0.30
|
||||
player: 0.25
|
||||
|
||||
@@ -1,16 +1,10 @@
|
||||
# ai-engine/core/engines/__init__.py
|
||||
"""
|
||||
V20 Ensemble Prediction Engines
|
||||
Prediction Engines
|
||||
"""
|
||||
|
||||
from .team_predictor import TeamPredictorEngine, get_team_predictor
|
||||
from .player_predictor import PlayerPredictorEngine, get_player_predictor
|
||||
from .odds_predictor import OddsPredictorEngine, get_odds_predictor
|
||||
from .referee_predictor import RefereePredictorEngine, get_referee_predictor
|
||||
|
||||
__all__ = [
|
||||
"TeamPredictorEngine", "get_team_predictor",
|
||||
"PlayerPredictorEngine", "get_player_predictor",
|
||||
"OddsPredictorEngine", "get_odds_predictor",
|
||||
"RefereePredictorEngine", "get_referee_predictor"
|
||||
]
|
||||
|
||||
@@ -1,237 +0,0 @@
|
||||
"""
|
||||
Odds Predictor Engine - V20 Ensemble Component
|
||||
Uses market odds and Poisson mathematics for predictions.
|
||||
|
||||
Weight: 30% in ensemble
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from typing import Dict, Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from features.poisson_engine import get_poisson_engine
|
||||
from features.value_calculator import get_value_calculator
|
||||
|
||||
|
||||
@dataclass
|
||||
class OddsPrediction:
|
||||
"""Odds engine prediction output."""
|
||||
# Market-implied probabilities
|
||||
market_home_prob: float = 0.33
|
||||
market_draw_prob: float = 0.33
|
||||
market_away_prob: float = 0.33
|
||||
|
||||
# Poisson xG
|
||||
poisson_home_xg: float = 1.3
|
||||
poisson_away_xg: float = 1.1
|
||||
|
||||
# Over/Under probabilities
|
||||
over_15_prob: float = 0.75
|
||||
over_25_prob: float = 0.55
|
||||
over_35_prob: float = 0.30
|
||||
|
||||
# BTTS
|
||||
btts_yes_prob: float = 0.50
|
||||
|
||||
# Most likely scores
|
||||
most_likely_score: str = "1-1"
|
||||
second_likely_score: str = "1-0"
|
||||
third_likely_score: str = "2-1"
|
||||
|
||||
# Value bet opportunities
|
||||
value_bets: Optional[list] = None
|
||||
|
||||
confidence: float = 0.0
|
||||
|
||||
def __post_init__(self):
|
||||
if self.value_bets is None:
|
||||
self.value_bets = []
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"market_home_prob": round(self.market_home_prob * 100, 1),
|
||||
"market_draw_prob": round(self.market_draw_prob * 100, 1),
|
||||
"market_away_prob": round(self.market_away_prob * 100, 1),
|
||||
"poisson_home_xg": round(self.poisson_home_xg, 2),
|
||||
"poisson_away_xg": round(self.poisson_away_xg, 2),
|
||||
"over_15_prob": round(self.over_15_prob * 100, 1),
|
||||
"over_25_prob": round(self.over_25_prob * 100, 1),
|
||||
"over_35_prob": round(self.over_35_prob * 100, 1),
|
||||
"btts_yes_prob": round(self.btts_yes_prob * 100, 1),
|
||||
"most_likely_score": self.most_likely_score,
|
||||
"second_likely_score": self.second_likely_score,
|
||||
"third_likely_score": self.third_likely_score,
|
||||
"value_bets": self.value_bets,
|
||||
"confidence": round(self.confidence, 1)
|
||||
}
|
||||
|
||||
|
||||
class OddsPredictorEngine:
|
||||
"""
|
||||
Odds-based prediction engine.
|
||||
|
||||
Uses:
|
||||
- Market odds to extract implied probabilities
|
||||
- Poisson distribution for mathematical xG
|
||||
- Value calculator for EV+ opportunities
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.poisson_engine = get_poisson_engine()
|
||||
try:
|
||||
self.value_calc = get_value_calculator()
|
||||
except Exception:
|
||||
self.value_calc = None # type: ignore[assignment]
|
||||
self.default_ms_h = 2.65
|
||||
self.default_ms_d = 3.20
|
||||
self.default_ms_a = 2.65
|
||||
print("✅ OddsPredictorEngine initialized")
|
||||
|
||||
def _odds_to_prob(self, odds: float) -> float:
|
||||
"""Convert decimal odds to probability."""
|
||||
try:
|
||||
odds = float(odds)
|
||||
except (TypeError, ValueError):
|
||||
return 0.0
|
||||
if odds <= 1.0:
|
||||
return 0.0
|
||||
return 1.0 / odds
|
||||
|
||||
def predict(self,
|
||||
odds_data: Dict[str, float],
|
||||
home_goals_avg: float = 1.5,
|
||||
home_conceded_avg: float = 1.2,
|
||||
away_goals_avg: float = 1.2,
|
||||
away_conceded_avg: float = 1.4) -> OddsPrediction:
|
||||
"""
|
||||
Generate odds-based prediction.
|
||||
|
||||
Args:
|
||||
odds_data: Dict with keys like 'ms_h', 'ms_d', 'ms_a', 'ou25_o', 'btts_y'
|
||||
home_goals_avg: Home team's average goals scored
|
||||
home_conceded_avg: Home team's average goals conceded
|
||||
away_goals_avg: Away team's average goals scored
|
||||
away_conceded_avg: Away team's average goals conceded
|
||||
|
||||
Returns:
|
||||
OddsPrediction with market and Poisson analysis
|
||||
"""
|
||||
|
||||
# 1. Extract market probabilities from odds
|
||||
ms_h = odds_data.get("ms_h", self.default_ms_h)
|
||||
ms_d = odds_data.get("ms_d", self.default_ms_d)
|
||||
ms_a = odds_data.get("ms_a", self.default_ms_a)
|
||||
|
||||
# Remove vig to get fair probabilities
|
||||
raw_probs = [
|
||||
self._odds_to_prob(ms_h),
|
||||
self._odds_to_prob(ms_d),
|
||||
self._odds_to_prob(ms_a)
|
||||
]
|
||||
total = sum(raw_probs) or 1
|
||||
|
||||
market_home = raw_probs[0] / total
|
||||
market_draw = raw_probs[1] / total
|
||||
market_away = raw_probs[2] / total
|
||||
|
||||
# 2. Poisson prediction
|
||||
poisson_pred = self.poisson_engine.predict(
|
||||
home_goals_avg, home_conceded_avg,
|
||||
away_goals_avg, away_conceded_avg
|
||||
)
|
||||
|
||||
# 3. Get most likely scores
|
||||
likely_scores = poisson_pred.most_likely_scores[:3] if poisson_pred.most_likely_scores else []
|
||||
score_1 = likely_scores[0]["score"] if len(likely_scores) > 0 else "1-1"
|
||||
score_2 = likely_scores[1]["score"] if len(likely_scores) > 1 else "1-0"
|
||||
score_3 = likely_scores[2]["score"] if len(likely_scores) > 2 else "2-1"
|
||||
|
||||
# 4. Value bet detection
|
||||
value_bets = []
|
||||
|
||||
# Check if our Poisson model disagrees with market significantly
|
||||
if abs(poisson_pred.home_win_prob - market_home) > 0.10:
|
||||
if poisson_pred.home_win_prob > market_home:
|
||||
value_bets.append({
|
||||
"market": "MS 1",
|
||||
"edge": round((poisson_pred.home_win_prob - market_home) * 100, 1),
|
||||
"confidence": "medium"
|
||||
})
|
||||
else:
|
||||
value_bets.append({
|
||||
"market": "MS 2",
|
||||
"edge": round((poisson_pred.away_win_prob - market_away) * 100, 1),
|
||||
"confidence": "medium"
|
||||
})
|
||||
|
||||
# O/U value check
|
||||
ou25_o = odds_data.get("ou25_o", 1.9)
|
||||
market_over25 = self._odds_to_prob(ou25_o)
|
||||
if abs(poisson_pred.over_25_prob - market_over25) > 0.08:
|
||||
pick = "2.5 Üst" if poisson_pred.over_25_prob > market_over25 else "2.5 Alt"
|
||||
edge = abs(poisson_pred.over_25_prob - market_over25) * 100
|
||||
value_bets.append({
|
||||
"market": pick,
|
||||
"edge": round(edge, 1),
|
||||
"confidence": "high" if edge > 10 else "medium"
|
||||
})
|
||||
|
||||
# Calculate confidence
|
||||
# Higher when market and Poisson agree
|
||||
agreement = 1.0 - abs(poisson_pred.home_win_prob - market_home)
|
||||
confidence = 50.0 + (agreement * 40) + (len(value_bets) * 5)
|
||||
|
||||
return OddsPrediction(
|
||||
market_home_prob=market_home,
|
||||
market_draw_prob=market_draw,
|
||||
market_away_prob=market_away,
|
||||
poisson_home_xg=poisson_pred.home_xg,
|
||||
poisson_away_xg=poisson_pred.away_xg,
|
||||
over_15_prob=poisson_pred.over_15_prob,
|
||||
over_25_prob=poisson_pred.over_25_prob,
|
||||
over_35_prob=poisson_pred.over_35_prob,
|
||||
btts_yes_prob=poisson_pred.btts_yes_prob,
|
||||
most_likely_score=score_1,
|
||||
second_likely_score=score_2,
|
||||
third_likely_score=score_3,
|
||||
value_bets=value_bets,
|
||||
confidence=min(99.9, confidence)
|
||||
)
|
||||
|
||||
|
||||
# Singleton
|
||||
_engine: Optional[OddsPredictorEngine] = None
|
||||
|
||||
|
||||
def get_odds_predictor() -> OddsPredictorEngine:
|
||||
global _engine
|
||||
if _engine is None:
|
||||
_engine = OddsPredictorEngine()
|
||||
return _engine
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
engine = get_odds_predictor()
|
||||
|
||||
print("\n🧪 Odds Predictor Engine Test")
|
||||
print("=" * 50)
|
||||
|
||||
pred = engine.predict(
|
||||
odds_data={
|
||||
"ms_h": 1.85,
|
||||
"ms_d": 3.40,
|
||||
"ms_a": 4.20,
|
||||
"ou25_o": 1.90
|
||||
},
|
||||
home_goals_avg=1.8,
|
||||
home_conceded_avg=1.0,
|
||||
away_goals_avg=1.2,
|
||||
away_conceded_avg=1.5
|
||||
)
|
||||
|
||||
print(f"\n📊 Prediction:")
|
||||
for k, v in pred.to_dict().items():
|
||||
print(f" {k}: {v}")
|
||||
@@ -24,33 +24,30 @@ class PlayerPrediction:
|
||||
extract_training_data.py so that inference values match the
|
||||
distribution the model was trained on (~3-36 range).
|
||||
"""
|
||||
home_squad_quality: float = 12.0 # training-scale composite (~3-36)
|
||||
home_squad_quality: float = 12.0
|
||||
away_squad_quality: float = 12.0
|
||||
squad_diff: float = 0.0 # home - away (training scale)
|
||||
squad_diff: float = 0.0
|
||||
home_key_players: int = 0
|
||||
away_key_players: int = 0
|
||||
home_missing_impact: float = 0.0 # 0-1, how much weaker due to missing players
|
||||
home_missing_impact: float = 0.0
|
||||
away_missing_impact: float = 0.0
|
||||
home_goals_form: int = 0 # Goals in last 5 matches
|
||||
home_goals_form: int = 0
|
||||
away_goals_form: int = 0
|
||||
home_lineup_goals_per90: float = 0.0
|
||||
away_lineup_goals_per90: float = 0.0
|
||||
home_lineup_assists_per90: float = 0.0
|
||||
away_lineup_assists_per90: float = 0.0
|
||||
home_squad_continuity: float = 0.5
|
||||
away_squad_continuity: float = 0.5
|
||||
home_top_scorer_form: int = 0
|
||||
away_top_scorer_form: int = 0
|
||||
home_avg_player_exp: float = 0.0
|
||||
away_avg_player_exp: float = 0.0
|
||||
home_goals_diversity: float = 0.0
|
||||
away_goals_diversity: float = 0.0
|
||||
lineup_available: bool = False
|
||||
confidence: float = 0.0
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"home_squad_quality": round(self.home_squad_quality, 1),
|
||||
"away_squad_quality": round(self.away_squad_quality, 1),
|
||||
"squad_diff": round(self.squad_diff, 1),
|
||||
"home_key_players": self.home_key_players,
|
||||
"away_key_players": self.away_key_players,
|
||||
"home_missing_impact": round(self.home_missing_impact, 2),
|
||||
"away_missing_impact": round(self.away_missing_impact, 2),
|
||||
"home_goals_form": self.home_goals_form,
|
||||
"away_goals_form": self.away_goals_form,
|
||||
"lineup_available": self.lineup_available,
|
||||
"confidence": round(self.confidence, 1)
|
||||
}
|
||||
|
||||
|
||||
class PlayerPredictorEngine:
|
||||
"""
|
||||
@@ -90,8 +87,9 @@ class PlayerPredictorEngine:
|
||||
"""
|
||||
|
||||
# Get squad features
|
||||
home_analysis = None
|
||||
away_analysis = None
|
||||
if home_lineup and away_lineup:
|
||||
# Use provided lineups (for live matches)
|
||||
home_analysis = self.squad_engine.analyze_squad_from_list(
|
||||
home_lineup, home_team_id
|
||||
)
|
||||
@@ -99,7 +97,6 @@ class PlayerPredictorEngine:
|
||||
away_lineup, away_team_id
|
||||
)
|
||||
lineup_available = True
|
||||
# Build features dict from analysis objects
|
||||
features = {
|
||||
"home_starting_11": home_analysis.starting_count or 11,
|
||||
"home_goals_last_5": home_analysis.total_goals_last_5,
|
||||
@@ -113,7 +110,6 @@ class PlayerPredictorEngine:
|
||||
"away_forwards": away_analysis.forward_count or 2,
|
||||
}
|
||||
elif match_id:
|
||||
# Try to get from database
|
||||
try:
|
||||
features = self.squad_engine.get_features(
|
||||
match_id, home_team_id, away_team_id
|
||||
@@ -133,49 +129,27 @@ class PlayerPredictorEngine:
|
||||
)
|
||||
lineup_available = False
|
||||
|
||||
# Extract features
|
||||
home_goals = int(features.get("home_goals_last_5", 0))
|
||||
away_goals = int(features.get("away_goals_last_5", 0))
|
||||
home_key = int(features.get("home_key_players", 0))
|
||||
away_key = int(features.get("away_key_players", 0))
|
||||
home_assists = features.get("home_assists_last_5", 0)
|
||||
away_assists = features.get("away_assists_last_5", 0)
|
||||
home_starting = features.get("home_starting_11", 11)
|
||||
away_starting = features.get("away_starting_11", 11)
|
||||
home_fwd = features.get("home_forwards", 2)
|
||||
away_fwd = features.get("away_forwards", 2)
|
||||
|
||||
# Calculate squad quality — MUST match extract_training_data.py formula
|
||||
# Formula: starting_count * 0.3 + goals * 2.0 + assists * 1.0
|
||||
# + key_players * 3.0 + fwd_count * 1.5
|
||||
# Typical range: ~3 – 36 (model trained on this distribution)
|
||||
home_quality = (
|
||||
home_starting * 0.3 +
|
||||
home_goals * 2.0 +
|
||||
home_assists * 1.0 +
|
||||
home_key * 3.0 +
|
||||
home_fwd * 1.5
|
||||
)
|
||||
away_quality = (
|
||||
away_starting * 0.3 +
|
||||
away_goals * 2.0 +
|
||||
away_assists * 1.0 +
|
||||
away_key * 3.0 +
|
||||
away_fwd * 1.5
|
||||
)
|
||||
|
||||
# Squad difference
|
||||
# Squad quality — matches V25 extract_training_data.py:579
|
||||
home_quality = home_starting * 0.3 + home_key * 3.0 + home_fwd * 1.5
|
||||
away_quality = away_starting * 0.3 + away_key * 3.0 + away_fwd * 1.5
|
||||
squad_diff = home_quality - away_quality
|
||||
|
||||
# Missing player impact
|
||||
# Priority: sidelined data (position-weighted) > lineup count (basic)
|
||||
if sidelined_data:
|
||||
home_impact, away_impact = self.sidelined_analyzer.analyze_match(sidelined_data)
|
||||
home_missing = min(1.0, max(0.0, home_impact.impact_score))
|
||||
away_missing = min(1.0, max(0.0, away_impact.impact_score))
|
||||
sidelined_available = True
|
||||
else:
|
||||
# Fallback: basic lineup count method
|
||||
expected_xi = 11
|
||||
actual_home_xi = features.get("home_starting_11", 11)
|
||||
actual_away_xi = features.get("away_starting_11", 11)
|
||||
@@ -183,7 +157,13 @@ class PlayerPredictorEngine:
|
||||
away_missing = (expected_xi - actual_away_xi) / expected_xi if actual_away_xi < expected_xi else 0
|
||||
sidelined_available = False
|
||||
|
||||
# Confidence: more data sources = higher confidence
|
||||
# Player-level features (matches extract_training_data.py:594-650)
|
||||
player_feats = self._compute_player_level_features(
|
||||
home_lineup or [], away_lineup or [],
|
||||
home_team_id, away_team_id,
|
||||
home_analysis, away_analysis,
|
||||
)
|
||||
|
||||
confidence = 70.0 if lineup_available else 35.0
|
||||
if home_goals + away_goals > 10:
|
||||
confidence += 15
|
||||
@@ -202,10 +182,138 @@ class PlayerPredictorEngine:
|
||||
away_missing_impact=away_missing,
|
||||
home_goals_form=home_goals,
|
||||
away_goals_form=away_goals,
|
||||
home_lineup_goals_per90=player_feats['home_lineup_goals_per90'],
|
||||
away_lineup_goals_per90=player_feats['away_lineup_goals_per90'],
|
||||
home_lineup_assists_per90=player_feats['home_lineup_assists_per90'],
|
||||
away_lineup_assists_per90=player_feats['away_lineup_assists_per90'],
|
||||
home_squad_continuity=player_feats['home_squad_continuity'],
|
||||
away_squad_continuity=player_feats['away_squad_continuity'],
|
||||
home_top_scorer_form=player_feats['home_top_scorer_form'],
|
||||
away_top_scorer_form=player_feats['away_top_scorer_form'],
|
||||
home_avg_player_exp=player_feats['home_avg_player_exp'],
|
||||
away_avg_player_exp=player_feats['away_avg_player_exp'],
|
||||
home_goals_diversity=player_feats['home_goals_diversity'],
|
||||
away_goals_diversity=player_feats['away_goals_diversity'],
|
||||
lineup_available=lineup_available,
|
||||
confidence=max(5.0, confidence)
|
||||
)
|
||||
|
||||
def _compute_player_level_features(
|
||||
self,
|
||||
home_lineup: List[str],
|
||||
away_lineup: List[str],
|
||||
home_team_id: str,
|
||||
away_team_id: str,
|
||||
home_analysis,
|
||||
away_analysis,
|
||||
) -> Dict[str, float]:
|
||||
defaults = {
|
||||
'home_lineup_goals_per90': 0.0, 'away_lineup_goals_per90': 0.0,
|
||||
'home_lineup_assists_per90': 0.0, 'away_lineup_assists_per90': 0.0,
|
||||
'home_squad_continuity': 0.5, 'away_squad_continuity': 0.5,
|
||||
'home_top_scorer_form': 0, 'away_top_scorer_form': 0,
|
||||
'home_avg_player_exp': 0.0, 'away_avg_player_exp': 0.0,
|
||||
'home_goals_diversity': 0.0, 'away_goals_diversity': 0.0,
|
||||
}
|
||||
conn = self.squad_engine.get_conn()
|
||||
if conn is None:
|
||||
return defaults
|
||||
|
||||
try:
|
||||
from psycopg2.extras import RealDictCursor
|
||||
result = {}
|
||||
for prefix, lineup, team_id in [
|
||||
('home', home_lineup, home_team_id),
|
||||
('away', away_lineup, away_team_id),
|
||||
]:
|
||||
if not lineup:
|
||||
for k in ('lineup_goals_per90', 'lineup_assists_per90',
|
||||
'squad_continuity', 'top_scorer_form',
|
||||
'avg_player_exp', 'goals_diversity'):
|
||||
result[f'{prefix}_{k}'] = defaults[f'{prefix}_{k}']
|
||||
continue
|
||||
|
||||
g90, a90, total_exp = 0.0, 0.0, 0
|
||||
best_scorer_total, best_scorer_id = 0, None
|
||||
scorers_in_lineup = 0
|
||||
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
for pid in lineup:
|
||||
cur.execute("""
|
||||
SELECT
|
||||
COUNT(*) as starts,
|
||||
COALESCE(SUM(CASE WHEN e.event_type = 'goal'
|
||||
AND (e.event_subtype IS NULL OR e.event_subtype NOT ILIKE '%%penaltı kaçırma%%')
|
||||
THEN 1 ELSE 0 END), 0) as goals,
|
||||
COALESCE((SELECT COUNT(*) FROM match_player_events
|
||||
WHERE assist_player_id = %s), 0) as assists
|
||||
FROM match_player_participation mpp
|
||||
LEFT JOIN match_player_events e
|
||||
ON e.match_id = mpp.match_id AND e.player_id = mpp.player_id
|
||||
WHERE mpp.player_id = %s AND mpp.is_starting = true
|
||||
""", (pid, pid))
|
||||
row = cur.fetchone()
|
||||
if not row or not row['starts']:
|
||||
continue
|
||||
starts = row['starts']
|
||||
goals = row['goals'] or 0
|
||||
assists = row['assists'] or 0
|
||||
g90 += goals / starts
|
||||
a90 += assists / starts
|
||||
total_exp += starts
|
||||
if goals > 0:
|
||||
scorers_in_lineup += 1
|
||||
if goals > best_scorer_total:
|
||||
best_scorer_total = goals
|
||||
best_scorer_id = pid
|
||||
|
||||
n_st = len(lineup) or 1
|
||||
|
||||
# Top scorer recent form (goals in last 5 starts)
|
||||
top_scorer_form = 0
|
||||
if best_scorer_id:
|
||||
cur.execute("""
|
||||
SELECT COUNT(*) as goals
|
||||
FROM match_player_events mpe
|
||||
WHERE mpe.player_id = %s AND mpe.event_type = 'goal'
|
||||
AND mpe.match_id IN (
|
||||
SELECT match_id FROM match_player_participation
|
||||
WHERE player_id = %s AND is_starting = true
|
||||
ORDER BY match_id DESC LIMIT 5
|
||||
)
|
||||
""", (best_scorer_id, best_scorer_id))
|
||||
tsf_row = cur.fetchone()
|
||||
if tsf_row:
|
||||
top_scorer_form = tsf_row['goals'] or 0
|
||||
|
||||
# Squad continuity (overlap with previous match lineup)
|
||||
squad_continuity = 0.5
|
||||
cur.execute("""
|
||||
SELECT mpp.player_id
|
||||
FROM match_player_participation mpp
|
||||
JOIN matches m ON mpp.match_id = m.id
|
||||
WHERE mpp.team_id = %s AND mpp.is_starting = true
|
||||
AND m.status = 'FT'
|
||||
ORDER BY m.mst_utc DESC
|
||||
LIMIT 11
|
||||
""", (team_id,))
|
||||
prev_starters = {r['player_id'] for r in cur.fetchall()}
|
||||
if prev_starters:
|
||||
overlap = len(set(lineup) & prev_starters)
|
||||
squad_continuity = overlap / n_st
|
||||
|
||||
result[f'{prefix}_lineup_goals_per90'] = round(g90, 3)
|
||||
result[f'{prefix}_lineup_assists_per90'] = round(a90, 3)
|
||||
result[f'{prefix}_squad_continuity'] = round(squad_continuity, 3)
|
||||
result[f'{prefix}_top_scorer_form'] = top_scorer_form
|
||||
result[f'{prefix}_avg_player_exp'] = round(total_exp / n_st, 1)
|
||||
result[f'{prefix}_goals_diversity'] = round(scorers_in_lineup / n_st, 3)
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
print(f"[PlayerPredictor] Player-level features failed: {e}")
|
||||
return defaults
|
||||
|
||||
def get_1x2_modifier(self, prediction: PlayerPrediction) -> Dict[str, float]:
|
||||
"""
|
||||
Calculate 1X2 probability modifiers based on squad analysis.
|
||||
|
||||
@@ -1,188 +0,0 @@
|
||||
"""
|
||||
Referee Predictor Engine - V20 Ensemble Component
|
||||
Analyzes referee patterns for cards, goals, and home bias.
|
||||
|
||||
Weight: 15% in ensemble
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from typing import Dict, Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from features.referee_engine import get_referee_engine
|
||||
|
||||
|
||||
@dataclass
|
||||
class RefereePrediction:
|
||||
"""Referee engine prediction output."""
|
||||
referee_name: str = ""
|
||||
matches_officiated: int = 0
|
||||
|
||||
# Card tendencies
|
||||
avg_yellow_cards: float = 4.0
|
||||
avg_red_cards: float = 0.2
|
||||
is_card_heavy: bool = False # Above average cards
|
||||
|
||||
# Goal tendencies
|
||||
avg_goals_per_match: float = 2.5
|
||||
over_25_rate: float = 0.50
|
||||
is_high_scoring: bool = False # Above average goals
|
||||
|
||||
# Home bias
|
||||
home_win_rate: float = 0.45
|
||||
home_bias: float = 0.0 # -1 to +1, positive = favors home
|
||||
|
||||
# Penalty tendency
|
||||
penalty_rate: float = 0.15
|
||||
|
||||
confidence: float = 0.0
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"referee_name": self.referee_name,
|
||||
"matches_officiated": self.matches_officiated,
|
||||
"avg_yellow_cards": round(self.avg_yellow_cards, 1),
|
||||
"avg_red_cards": round(self.avg_red_cards, 2),
|
||||
"is_card_heavy": self.is_card_heavy,
|
||||
"avg_goals_per_match": round(self.avg_goals_per_match, 2),
|
||||
"over_25_rate": round(self.over_25_rate * 100, 1),
|
||||
"is_high_scoring": self.is_high_scoring,
|
||||
"home_win_rate": round(self.home_win_rate * 100, 1),
|
||||
"home_bias": round(self.home_bias, 2),
|
||||
"penalty_rate": round(self.penalty_rate * 100, 1),
|
||||
"confidence": round(self.confidence, 1)
|
||||
}
|
||||
|
||||
|
||||
class RefereePredictorEngine:
|
||||
"""
|
||||
Referee-based prediction engine.
|
||||
|
||||
Analyzes:
|
||||
- Card tendency (sarı/kırmızı kart ortalaması)
|
||||
- Goal tendency (maç başına gol, 2.5 üst oranı)
|
||||
- Home bias (ev sahibi lehine karar oranı)
|
||||
- Penalty tendency (penaltı verme oranı)
|
||||
"""
|
||||
|
||||
# League average benchmarks
|
||||
LEAGUE_AVG_GOALS = 2.65
|
||||
LEAGUE_AVG_YELLOW = 4.0
|
||||
LEAGUE_HOME_WIN_RATE = 0.45
|
||||
|
||||
def __init__(self):
|
||||
self.referee_engine = get_referee_engine()
|
||||
print("✅ RefereePredictorEngine initialized")
|
||||
|
||||
def predict(self,
|
||||
match_id: Optional[str] = None,
|
||||
referee_name: Optional[str] = None,
|
||||
league_id: Optional[str] = None) -> RefereePrediction:
|
||||
"""
|
||||
Generate referee-based prediction.
|
||||
|
||||
Args:
|
||||
match_id: Match ID to find referee
|
||||
referee_name: Or provide referee name directly
|
||||
league_id: League ID to scope stats (prevents name collisions)
|
||||
|
||||
Returns:
|
||||
RefereePrediction with referee analysis
|
||||
"""
|
||||
|
||||
# Get referee features
|
||||
if match_id:
|
||||
features = self.referee_engine.get_features(match_id, league_id=league_id or "")
|
||||
# Live flows may already have referee_name while match_officials table is sparse.
|
||||
# Prefer the richer profile if direct-name lookup has more history.
|
||||
if referee_name:
|
||||
name_features = self.referee_engine.get_features_by_name(referee_name, league_id=league_id or "")
|
||||
if (name_features.get("referee_matches", 0) or 0) > (features.get("referee_matches", 0) or 0):
|
||||
features = name_features
|
||||
elif referee_name:
|
||||
features = self.referee_engine.get_features_by_name(referee_name, league_id=league_id or "")
|
||||
else:
|
||||
# Return default
|
||||
return RefereePrediction(confidence=10.0)
|
||||
|
||||
ref_name = str(features.get("referee_name", "Unknown"))
|
||||
matches = int(features.get("referee_matches", 0))
|
||||
|
||||
if matches < 5:
|
||||
# Not enough data
|
||||
return RefereePrediction(
|
||||
referee_name=ref_name,
|
||||
matches_officiated=matches,
|
||||
confidence=20.0
|
||||
)
|
||||
|
||||
# Extract features
|
||||
avg_yellow = features.get("referee_avg_yellow", 4.0)
|
||||
avg_red = features.get("referee_avg_red", 0.2)
|
||||
avg_goals = features.get("referee_avg_goals", 2.5)
|
||||
over25_rate = features.get("referee_over25_rate", 0.5)
|
||||
home_win_rate = features.get("referee_home_win_rate", 0.45) if "referee_home_win_rate" in features else 0.45
|
||||
home_bias = features.get("referee_home_bias", 0.0)
|
||||
penalty_rate = features.get("referee_penalty_rate", 0.15)
|
||||
|
||||
# Determine tendencies
|
||||
is_card_heavy = (avg_yellow + avg_red * 4) > (self.LEAGUE_AVG_YELLOW + 1)
|
||||
is_high_scoring = avg_goals > self.LEAGUE_AVG_GOALS
|
||||
|
||||
# Confidence based on matches officiated
|
||||
confidence = min(90.0, 30.0 + matches * 2)
|
||||
|
||||
return RefereePrediction(
|
||||
referee_name=ref_name,
|
||||
matches_officiated=matches,
|
||||
avg_yellow_cards=avg_yellow,
|
||||
avg_red_cards=avg_red,
|
||||
is_card_heavy=is_card_heavy,
|
||||
avg_goals_per_match=avg_goals,
|
||||
over_25_rate=over25_rate,
|
||||
is_high_scoring=is_high_scoring,
|
||||
home_win_rate=home_win_rate,
|
||||
home_bias=home_bias,
|
||||
penalty_rate=penalty_rate,
|
||||
confidence=confidence
|
||||
)
|
||||
|
||||
def get_modifiers(self, prediction: RefereePrediction) -> Dict[str, float]:
|
||||
"""
|
||||
Get modifiers to apply to other predictions based on referee profile.
|
||||
"""
|
||||
return {
|
||||
# Home team gets slight boost if referee has home bias
|
||||
"home_modifier": 1.0 + (prediction.home_bias * 0.05),
|
||||
# O/U modifier
|
||||
"over_25_modifier": 1.0 + (prediction.avg_goals_per_match - self.LEAGUE_AVG_GOALS) * 0.1,
|
||||
# Card modifier for card markets
|
||||
"cards_modifier": 1.0 + (prediction.avg_yellow_cards - self.LEAGUE_AVG_YELLOW) * 0.05
|
||||
}
|
||||
|
||||
|
||||
# Singleton
|
||||
_engine: Optional[RefereePredictorEngine] = None
|
||||
|
||||
|
||||
def get_referee_predictor() -> RefereePredictorEngine:
|
||||
global _engine
|
||||
if _engine is None:
|
||||
_engine = RefereePredictorEngine()
|
||||
return _engine
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
engine = get_referee_predictor()
|
||||
|
||||
print("\n🧪 Referee Predictor Engine Test")
|
||||
print("=" * 50)
|
||||
|
||||
pred = engine.predict(referee_name="Cüneyt Çakır")
|
||||
|
||||
print(f"\n📊 Prediction:")
|
||||
for k, v in pred.to_dict().items():
|
||||
print(f" {k}: {v}")
|
||||
@@ -1,286 +0,0 @@
|
||||
"""
|
||||
Team Predictor Engine - V20 Ensemble Component
|
||||
Combines ELO ratings, form stats, H2H records and team statistics.
|
||||
|
||||
Weight: 30% in ensemble
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from typing import Dict, Optional, Tuple, Any
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
# Add parent to path
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from features.elo_system import get_elo_system
|
||||
from features.h2h_engine import get_h2h_engine
|
||||
from features.momentum_engine import get_momentum_engine, MomentumData
|
||||
from features.team_stats_engine import get_team_stats_engine
|
||||
|
||||
|
||||
@dataclass
|
||||
class TeamPrediction:
|
||||
"""Team engine prediction output."""
|
||||
home_win_prob: float = 0.33
|
||||
draw_prob: float = 0.33
|
||||
away_win_prob: float = 0.33
|
||||
home_xg: float = 1.3
|
||||
away_xg: float = 1.1
|
||||
form_advantage: float = 0.0 # -1 to +1, positive = home advantage
|
||||
h2h_advantage: float = 0.0 # -1 to +1
|
||||
elo_diff: float = 0.0
|
||||
confidence: float = 0.0
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"home_win_prob": round(self.home_win_prob * 100, 1),
|
||||
"draw_prob": round(self.draw_prob * 100, 1),
|
||||
"away_win_prob": round(self.away_win_prob * 100, 1),
|
||||
"home_xg": round(self.home_xg, 2),
|
||||
"away_xg": round(self.away_xg, 2),
|
||||
"form_advantage": round(self.form_advantage, 2),
|
||||
"h2h_advantage": round(self.h2h_advantage, 2),
|
||||
"elo_diff": round(self.elo_diff, 0),
|
||||
"confidence": round(self.confidence, 1)
|
||||
}
|
||||
|
||||
raw_features: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
class TeamPredictorEngine:
|
||||
"""
|
||||
Team-based prediction engine.
|
||||
|
||||
Uses:
|
||||
- ELO Rating System (venue-adjusted, league-weighted)
|
||||
- H2H Engine (head-to-head history)
|
||||
- Momentum Engine (recent form)
|
||||
- Team Stats Engine (possession, shots, corners)
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.elo_system = get_elo_system()
|
||||
self.h2h_engine = get_h2h_engine()
|
||||
self.momentum_engine = get_momentum_engine()
|
||||
self.team_stats_engine = get_team_stats_engine()
|
||||
|
||||
print("✅ TeamPredictorEngine initialized")
|
||||
|
||||
def predict(self,
|
||||
home_team_id: str,
|
||||
away_team_id: str,
|
||||
match_date_ms: int,
|
||||
home_team_name: str = "",
|
||||
away_team_name: str = "") -> TeamPrediction:
|
||||
"""
|
||||
Generate team-based prediction.
|
||||
|
||||
Args:
|
||||
home_team_id: Home team ID
|
||||
away_team_id: Away team ID
|
||||
match_date_ms: Match date in milliseconds
|
||||
home_team_name: Home team name (for ELO)
|
||||
away_team_name: Away team name (for ELO)
|
||||
|
||||
Returns:
|
||||
TeamPrediction with 1X2 probabilities and xG
|
||||
"""
|
||||
|
||||
# 1. Get ELO predictions
|
||||
elo_pred = self.elo_system.predict_match(home_team_id, away_team_id)
|
||||
elo_features = self.elo_system.get_match_features(home_team_id, away_team_id)
|
||||
|
||||
# 2. Get H2H features
|
||||
try:
|
||||
h2h_features = self.h2h_engine.get_features(
|
||||
home_team_id, away_team_id, match_date_ms
|
||||
)
|
||||
except Exception:
|
||||
h2h_features = {
|
||||
"h2h_home_win_rate": 0.5,
|
||||
"h2h_away_win_rate": 0.5,
|
||||
"h2h_avg_goals": 2.5,
|
||||
"h2h_btts_rate": 0.5
|
||||
}
|
||||
|
||||
# 3. Get Momentum/Form features
|
||||
try:
|
||||
# key: form_score should be 0-1 derived from momentum_score (-1 to 1)
|
||||
home_mom_data = self.momentum_engine.calculate_momentum(home_team_id, match_date_ms)
|
||||
away_mom_data = self.momentum_engine.calculate_momentum(away_team_id, match_date_ms)
|
||||
|
||||
home_form_score = (home_mom_data.momentum_score + 1) / 2
|
||||
away_form_score = (away_mom_data.momentum_score + 1) / 2
|
||||
except Exception as e:
|
||||
print(f"⚠️ MomentumEngine error: {e}")
|
||||
home_mom_data = MomentumData()
|
||||
away_mom_data = MomentumData()
|
||||
home_form_score = 0.5
|
||||
away_form_score = 0.5
|
||||
|
||||
# 4. Get Team Stats
|
||||
home_stats = self.team_stats_engine.get_features(home_team_id, match_date_ms)
|
||||
away_stats = self.team_stats_engine.get_features(away_team_id, match_date_ms)
|
||||
|
||||
# 5. Combine predictions
|
||||
# ELO-based 1X2 (60% weight)
|
||||
elo_home = elo_pred.get("home_win_prob", 0.33)
|
||||
elo_draw = elo_pred.get("draw_prob", 0.33)
|
||||
elo_away = elo_pred.get("away_win_prob", 0.33)
|
||||
|
||||
# Adjust based on H2H (20% weight)
|
||||
h2h_home_rate = h2h_features.get("h2h_home_win_rate", 0.5)
|
||||
h2h_away_rate = h2h_features.get("h2h_away_win_rate", 0.5)
|
||||
|
||||
# Adjust based on form (20% weight)
|
||||
home_form = home_form_score
|
||||
away_form = away_form_score
|
||||
form_diff = (home_form - away_form) # -1 to +1
|
||||
|
||||
# Weighted combination
|
||||
final_home = elo_home * 0.6 + h2h_home_rate * 0.2 + (0.5 + form_diff * 0.3) * 0.2
|
||||
final_away = elo_away * 0.6 + h2h_away_rate * 0.2 + (0.5 - form_diff * 0.3) * 0.2
|
||||
final_draw = 1.0 - final_home - final_away
|
||||
|
||||
# Normalize
|
||||
total = final_home + final_draw + final_away
|
||||
if total > 0:
|
||||
final_home /= total
|
||||
final_draw /= total
|
||||
final_away /= total
|
||||
|
||||
# Calculate xG based on stats and form (conservative base)
|
||||
home_conversion = home_stats.get("shot_conversion_rate", 0.1)
|
||||
away_conversion = away_stats.get("shot_conversion_rate", 0.1)
|
||||
|
||||
base_home_xg = 1.35 + (home_conversion * 3.0)
|
||||
base_away_xg = 1.10 + (away_conversion * 2.5)
|
||||
|
||||
# Defense weakness factor: opponent's defensive quality affects xG
|
||||
# Higher shots on target against = weaker defense
|
||||
away_def_weakness = away_stats.get("shot_accuracy", 0.35) # opponent's shot accuracy as proxy
|
||||
home_def_weakness = home_stats.get("shot_accuracy", 0.35)
|
||||
|
||||
# Adjust xG: stronger opponent defense → lower xG
|
||||
home_xg = base_home_xg * (1 + form_diff * 0.15) * (0.8 + away_def_weakness * 0.6)
|
||||
away_xg = base_away_xg * (1 - form_diff * 0.15) * (0.8 + home_def_weakness * 0.6)
|
||||
|
||||
# Apply xG Underperformance Penalty directly to calculated xG
|
||||
# If a team chronically underperforms its xG, we subtract that historical difference here
|
||||
if hasattr(home_mom_data, 'xg_underperformance') and home_mom_data.xg_underperformance > 0.2:
|
||||
home_xg -= min(0.5, home_mom_data.xg_underperformance * 0.5)
|
||||
|
||||
if hasattr(away_mom_data, 'xg_underperformance') and away_mom_data.xg_underperformance > 0.2:
|
||||
away_xg -= min(0.5, away_mom_data.xg_underperformance * 0.5)
|
||||
|
||||
# H2H adjustment (more conservative)
|
||||
h2h_avg_goals = h2h_features.get("h2h_avg_goals", 2.5)
|
||||
if h2h_avg_goals > 3.0:
|
||||
home_xg *= 1.05
|
||||
away_xg *= 1.05
|
||||
elif h2h_avg_goals < 2.0:
|
||||
home_xg *= 0.95
|
||||
away_xg *= 0.95
|
||||
|
||||
# Clamp xG to reasonable range
|
||||
home_xg = max(0.5, min(3.5, home_xg))
|
||||
away_xg = max(0.3, min(3.0, away_xg))
|
||||
|
||||
# Calculate confidence
|
||||
# Higher when ELO, H2H, and Form all agree
|
||||
elo_winner = "H" if elo_home > max(elo_draw, elo_away) else ("A" if elo_away > elo_draw else "D")
|
||||
h2h_winner = "H" if h2h_home_rate > h2h_away_rate else "A"
|
||||
form_winner = "H" if form_diff > 0.1 else ("A" if form_diff < -0.1 else "D")
|
||||
|
||||
agreement = sum([
|
||||
elo_winner == h2h_winner,
|
||||
elo_winner == form_winner,
|
||||
h2h_winner == form_winner
|
||||
])
|
||||
|
||||
max_prob = max(final_home, final_draw, final_away)
|
||||
confidence = max_prob * 100 * (0.7 + agreement * 0.1)
|
||||
|
||||
# Collect Raw Features for XGBoost
|
||||
# Note: home_mom_data is an object now
|
||||
def get_rate(val): return val if val is not None else 0.5
|
||||
|
||||
raw_features = {
|
||||
**elo_features, # 8 features
|
||||
|
||||
# Form Features (need key mapping to match extract_training_data.py)
|
||||
"home_goals_avg": 1.5 + home_mom_data.goals_trend, # Proxy
|
||||
"home_conceded_avg": 1.5 - home_mom_data.conceded_trend, # Proxy
|
||||
"away_goals_avg": 1.5 + away_mom_data.goals_trend,
|
||||
"away_conceded_avg": 1.5 - away_mom_data.conceded_trend,
|
||||
|
||||
"home_clean_sheet_rate": 0.2, # Not in new MomentumData
|
||||
"away_clean_sheet_rate": 0.2,
|
||||
"home_scoring_rate": 0.8,
|
||||
"away_scoring_rate": 0.8,
|
||||
|
||||
"home_winning_streak": home_mom_data.winning_streak,
|
||||
"away_winning_streak": away_mom_data.winning_streak,
|
||||
"home_unbeaten_streak": home_mom_data.unbeaten_streak,
|
||||
"away_unbeaten_streak": away_mom_data.unbeaten_streak,
|
||||
|
||||
# H2H Features
|
||||
**h2h_features,
|
||||
|
||||
# Team Stats
|
||||
"home_avg_possession": home_stats.get("avg_possession", 0.5),
|
||||
"away_avg_possession": away_stats.get("avg_possession", 0.5),
|
||||
"home_avg_shots_on_target": home_stats.get("avg_shots_on_target", 3.5),
|
||||
"away_avg_shots_on_target": away_stats.get("avg_shots_on_target", 3.5),
|
||||
"home_shot_conversion": home_stats.get("shot_conversion_rate", 0.1),
|
||||
"away_shot_conversion": away_stats.get("shot_conversion_rate", 0.1),
|
||||
"home_avg_corners": home_stats.get("avg_corners", 4.5),
|
||||
"away_avg_corners": away_stats.get("avg_corners", 4.5),
|
||||
|
||||
# Derived
|
||||
"home_xga": 1.5 - home_mom_data.conceded_trend, # reusing as proxy
|
||||
"away_xga": 1.5 - away_mom_data.conceded_trend
|
||||
}
|
||||
|
||||
return TeamPrediction(
|
||||
home_win_prob=final_home,
|
||||
draw_prob=final_draw,
|
||||
away_win_prob=final_away,
|
||||
home_xg=home_xg,
|
||||
away_xg=away_xg,
|
||||
form_advantage=form_diff,
|
||||
h2h_advantage=h2h_home_rate - h2h_away_rate,
|
||||
elo_diff=elo_features.get("elo_diff", 0),
|
||||
confidence=confidence,
|
||||
raw_features=raw_features
|
||||
)
|
||||
|
||||
|
||||
# Singleton
|
||||
_engine: Optional[TeamPredictorEngine] = None
|
||||
|
||||
|
||||
def get_team_predictor() -> TeamPredictorEngine:
|
||||
global _engine
|
||||
if _engine is None:
|
||||
_engine = TeamPredictorEngine()
|
||||
return _engine
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
engine = get_team_predictor()
|
||||
|
||||
print("\n🧪 Team Predictor Engine Test")
|
||||
print("=" * 50)
|
||||
|
||||
# Test with sample IDs
|
||||
pred = engine.predict(
|
||||
home_team_id="test_home",
|
||||
away_team_id="test_away",
|
||||
match_date_ms=1707393600000
|
||||
)
|
||||
|
||||
print(f"\n📊 Prediction:")
|
||||
for k, v in pred.to_dict().items():
|
||||
print(f" {k}: {v}")
|
||||
@@ -15,13 +15,9 @@ Orijinal Faktörler:
|
||||
- Tarihsel upset pattern
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from typing import Dict, Any, Optional, Tuple, List
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
try:
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
+171
-16
@@ -7,11 +7,14 @@ import time
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import Any
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
import uvicorn
|
||||
from dotenv import load_dotenv
|
||||
from fastapi import FastAPI, HTTPException, Request
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse
|
||||
import subprocess
|
||||
from pydantic import BaseModel
|
||||
|
||||
try:
|
||||
@@ -21,6 +24,7 @@ except ImportError:
|
||||
HAS_BASKETBALL = False
|
||||
from services.single_match_orchestrator import get_single_match_orchestrator
|
||||
from services.v26_shadow_engine import get_v26_shadow_engine
|
||||
from models.league_model import get_league_model_loader
|
||||
|
||||
load_dotenv()
|
||||
|
||||
@@ -37,6 +41,23 @@ class CouponRequest(BaseModel):
|
||||
min_confidence: float | None = None
|
||||
|
||||
|
||||
class RetrainRequest(BaseModel):
|
||||
reason: str | None = "manual"
|
||||
markets: str | None = None # comma-separated, e.g. "MS,OU25,BTTS"
|
||||
trials: int | None = 50
|
||||
|
||||
|
||||
# ─── Retrain state tracking ──────────────────────────────────
|
||||
_retrain_state: dict[str, Any] = {
|
||||
"running": False,
|
||||
"last_started": None,
|
||||
"last_completed": None,
|
||||
"last_status": None,
|
||||
"last_error": None,
|
||||
"pid": None,
|
||||
}
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(_: FastAPI):
|
||||
try:
|
||||
@@ -114,6 +135,8 @@ def read_root() -> dict[str, Any]:
|
||||
"GET /v20plus/reversal-watchlist",
|
||||
"POST /v20plus/coupon",
|
||||
"GET /v20plus/daily-banker",
|
||||
"POST /v1/admin/retrain",
|
||||
"GET /v1/admin/retrain/status",
|
||||
],
|
||||
}
|
||||
|
||||
@@ -124,6 +147,14 @@ def health_check() -> dict[str, Any]:
|
||||
orchestrator = get_single_match_orchestrator()
|
||||
shadow_engine = get_v26_shadow_engine()
|
||||
|
||||
# Per-market V25 model status
|
||||
v25_readiness: dict[str, Any] = {"fully_loaded": False}
|
||||
try:
|
||||
v25_predictor = orchestrator._get_v25_predictor()
|
||||
v25_readiness = v25_predictor.readiness_summary()
|
||||
except Exception as v25_err:
|
||||
v25_readiness = {"fully_loaded": False, "error": str(v25_err)}
|
||||
|
||||
if HAS_BASKETBALL:
|
||||
basketball_predictor = get_basketball_v25_predictor()
|
||||
basketball_readiness = basketball_predictor.readiness_summary()
|
||||
@@ -132,34 +163,51 @@ def health_check() -> dict[str, Any]:
|
||||
basketball_readiness = {"fully_loaded": False, "error": "Basketball module not found"}
|
||||
ready = True
|
||||
|
||||
league_readiness = get_league_model_loader().readiness_summary()
|
||||
overall_ready = ready and v25_readiness.get("fully_loaded", False)
|
||||
return {
|
||||
"status": "healthy" if ready else "degraded",
|
||||
"status": "healthy" if overall_ready else "degraded",
|
||||
"engine": "v28.main",
|
||||
"mode": os.getenv("AI_ENGINE_MODE", "v28"),
|
||||
"ready": ready,
|
||||
"ready": overall_ready,
|
||||
"v25_football": v25_readiness,
|
||||
"league_specific": league_readiness,
|
||||
"basketball_v25": basketball_readiness,
|
||||
"v26_shadow": shadow_engine.readiness_summary(),
|
||||
"prediction_service_ready": True,
|
||||
"model_loaded": ready,
|
||||
"model_loaded": overall_ready,
|
||||
"orchestrator_mode": getattr(orchestrator, "engine_mode", "v28"),
|
||||
}
|
||||
except Exception as error:
|
||||
return {"status": "unhealthy", "ready": False, "error": str(error)}
|
||||
|
||||
|
||||
_REQUIRED_RESPONSE_FIELDS = ("match_info", "market_board", "main_pick", "bet_summary", "data_quality")
|
||||
|
||||
|
||||
@app.post("/v20plus/analyze/{match_id}")
|
||||
async def analyze_match_v20plus(match_id: str) -> dict[str, Any]:
|
||||
started_at = time.time()
|
||||
orchestrator = get_single_match_orchestrator()
|
||||
result = orchestrator.analyze_match(match_id)
|
||||
result = await asyncio.to_thread(orchestrator.analyze_match, match_id)
|
||||
elapsed_ms = int((time.time() - started_at) * 1000)
|
||||
|
||||
if not result:
|
||||
raise HTTPException(status_code=404, detail=f"Match not found: {match_id}")
|
||||
|
||||
# Response validation: log missing required fields (non-fatal)
|
||||
missing_fields = [f for f in _REQUIRED_RESPONSE_FIELDS if f not in result]
|
||||
if missing_fields:
|
||||
print(f"⚠️ [API] analyze/{match_id} response missing fields: {missing_fields} ({elapsed_ms}ms)")
|
||||
|
||||
result["timing_ms"] = elapsed_ms
|
||||
return result
|
||||
|
||||
|
||||
@app.get("/v20plus/analyze-htms/{match_id}")
|
||||
async def analyze_match_htms_v20plus(match_id: str) -> dict[str, Any]:
|
||||
orchestrator = get_single_match_orchestrator()
|
||||
result = orchestrator.analyze_match_htms(match_id)
|
||||
result = await asyncio.to_thread(orchestrator.analyze_match_htms, match_id)
|
||||
if not result:
|
||||
raise HTTPException(status_code=404, detail=f"Match not found: {match_id}")
|
||||
return result
|
||||
@@ -230,11 +278,12 @@ async def analyze_match_htft_v20plus(match_id: str, timeout_sec: int = 30) -> di
|
||||
@app.post("/v20plus/coupon")
|
||||
async def generate_coupon_v20plus(request: CouponRequest) -> dict[str, Any]:
|
||||
orchestrator = get_single_match_orchestrator()
|
||||
return orchestrator.build_coupon(
|
||||
match_ids=request.match_ids,
|
||||
strategy=request.strategy or "BALANCED",
|
||||
max_matches=request.max_matches,
|
||||
min_confidence=request.min_confidence,
|
||||
return await asyncio.to_thread(
|
||||
orchestrator.build_coupon,
|
||||
request.match_ids,
|
||||
request.strategy or "BALANCED",
|
||||
request.max_matches,
|
||||
request.min_confidence,
|
||||
)
|
||||
|
||||
|
||||
@@ -244,7 +293,7 @@ async def get_daily_banker_v20plus(count: int = 3) -> dict[str, Any]:
|
||||
raise HTTPException(status_code=400, detail="count must be >= 1")
|
||||
|
||||
orchestrator = get_single_match_orchestrator()
|
||||
bankers = orchestrator.get_daily_bankers(count=count)
|
||||
bankers = await asyncio.to_thread(orchestrator.get_daily_bankers, count)
|
||||
return {"count": len(bankers), "bankers": bankers}
|
||||
|
||||
@app.get("/v20plus/reversal-watchlist")
|
||||
@@ -262,14 +311,120 @@ async def get_reversal_watchlist_v20plus(
|
||||
raise HTTPException(status_code=400, detail="min_score must be between 0 and 100")
|
||||
|
||||
orchestrator = get_single_match_orchestrator()
|
||||
return orchestrator.get_reversal_watchlist(
|
||||
count=count,
|
||||
horizon_hours=horizon_hours,
|
||||
min_score=min_score,
|
||||
top_leagues_only=top_leagues_only,
|
||||
return await asyncio.to_thread(
|
||||
orchestrator.get_reversal_watchlist,
|
||||
count,
|
||||
horizon_hours,
|
||||
min_score,
|
||||
top_leagues_only,
|
||||
)
|
||||
|
||||
|
||||
# ─── ADMIN: Retrain Pipeline ─────────────────────────────────
|
||||
|
||||
def _run_retrain_pipeline(markets: str | None, trials: int):
|
||||
"""Background function: extract data → train model → reload."""
|
||||
global _retrain_state
|
||||
ai_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
scripts_dir = os.path.join(ai_dir, "scripts")
|
||||
python = os.path.join(ai_dir, "venv", "bin", "python3")
|
||||
if not os.path.exists(python):
|
||||
python = sys.executable # fallback
|
||||
|
||||
try:
|
||||
# Step 1: Extract training data
|
||||
print("🔄 [RETRAIN] Step 1/3: Extracting training data...", flush=True)
|
||||
result = subprocess.run(
|
||||
[python, os.path.join(scripts_dir, "extract_training_data.py")],
|
||||
capture_output=True, text=True, timeout=600, cwd=ai_dir,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"Extract failed:\n{result.stderr[-500:]}")
|
||||
print(f"✅ [RETRAIN] Extract done", flush=True)
|
||||
|
||||
# Step 2: Train V25 Pro
|
||||
print("🔄 [RETRAIN] Step 2/3: Training V25 Pro model...", flush=True)
|
||||
train_cmd = [python, os.path.join(scripts_dir, "train_v25_pro.py")]
|
||||
if markets:
|
||||
train_cmd += ["--markets", markets]
|
||||
train_cmd += ["--trials", str(trials)]
|
||||
|
||||
result = subprocess.run(
|
||||
train_cmd, capture_output=True, text=True, timeout=3600, cwd=ai_dir,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"Training failed:\n{result.stderr[-500:]}")
|
||||
print(f"✅ [RETRAIN] Training done", flush=True)
|
||||
|
||||
# Step 3: Reload models in memory
|
||||
print("🔄 [RETRAIN] Step 3/3: Reloading models...", flush=True)
|
||||
try:
|
||||
orchestrator = get_single_match_orchestrator()
|
||||
v25 = orchestrator._get_v25_predictor()
|
||||
v25._loaded = False
|
||||
v25.load_models()
|
||||
print("✅ [RETRAIN] Models reloaded in memory", flush=True)
|
||||
except Exception as reload_err:
|
||||
print(f"⚠️ [RETRAIN] Hot reload failed (restart needed): {reload_err}", flush=True)
|
||||
|
||||
_retrain_state.update({
|
||||
"running": False,
|
||||
"last_completed": datetime.now().isoformat(),
|
||||
"last_status": "success",
|
||||
"last_error": None,
|
||||
})
|
||||
print("🎉 [RETRAIN] Pipeline complete!", flush=True)
|
||||
|
||||
except Exception as err:
|
||||
_retrain_state.update({
|
||||
"running": False,
|
||||
"last_completed": datetime.now().isoformat(),
|
||||
"last_status": "failed",
|
||||
"last_error": str(err),
|
||||
})
|
||||
print(f"❌ [RETRAIN] Pipeline failed: {err}", flush=True)
|
||||
|
||||
|
||||
@app.post("/v1/admin/retrain")
|
||||
async def admin_retrain(request: RetrainRequest) -> dict[str, Any]:
|
||||
"""Trigger full retrain pipeline: extract → train → reload."""
|
||||
if _retrain_state["running"]:
|
||||
return {
|
||||
"status": "already_running",
|
||||
"message": f"Retrain in progress since {_retrain_state['last_started']}",
|
||||
}
|
||||
|
||||
_retrain_state.update({
|
||||
"running": True,
|
||||
"last_started": datetime.now().isoformat(),
|
||||
"last_status": "running",
|
||||
"last_error": None,
|
||||
})
|
||||
|
||||
# Run in background thread
|
||||
import threading
|
||||
thread = threading.Thread(
|
||||
target=_run_retrain_pipeline,
|
||||
args=(request.markets, request.trials or 50),
|
||||
daemon=True,
|
||||
)
|
||||
thread.start()
|
||||
|
||||
return {
|
||||
"status": "triggered",
|
||||
"message": "Retrain pipeline started in background",
|
||||
"reason": request.reason,
|
||||
"markets": request.markets or "all",
|
||||
"trials": request.trials or 50,
|
||||
}
|
||||
|
||||
|
||||
@app.get("/v1/admin/retrain/status")
|
||||
async def admin_retrain_status() -> dict[str, Any]:
|
||||
"""Check retrain pipeline status."""
|
||||
return {**_retrain_state}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
port = int(os.getenv("PORT", "8000"))
|
||||
uvicorn.run("main:app", host="0.0.0.0", port=port, reload=True)
|
||||
|
||||
@@ -46,6 +46,9 @@ SUPPORTED_MARKETS = [
|
||||
"ht_ft", # Half-Time/Full-Time
|
||||
"dc", # Double Chance
|
||||
"ht", # Half-Time Result
|
||||
"ht_home", # Half-Time Home win
|
||||
"ht_draw", # Half-Time Draw
|
||||
"ht_away", # Half-Time Away win
|
||||
]
|
||||
|
||||
|
||||
@@ -111,6 +114,9 @@ class Calibrator:
|
||||
"ht_ft": 0.92,
|
||||
"dc": 0.97,
|
||||
"ht": 0.92,
|
||||
"ht_home": 0.92,
|
||||
"ht_draw": 0.92,
|
||||
"ht_away": 0.92,
|
||||
}
|
||||
self._load_calibrators()
|
||||
|
||||
|
||||
@@ -0,0 +1,191 @@
|
||||
"""
|
||||
League-Specific Model Loader
|
||||
=============================
|
||||
Loads per-league XGBoost models + isotonic calibrators trained by
|
||||
scripts/train_league_models.py and provides a unified prediction interface.
|
||||
|
||||
Falls back to general V25 for any market/league without a dedicated model.
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import pickle
|
||||
from functools import lru_cache
|
||||
from typing import Dict, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import xgboost as xgb
|
||||
|
||||
AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
LEAGUE_MODEL_DIR = os.path.join(AI_ENGINE_DIR, "models", "league_specific")
|
||||
|
||||
# Market file name → (num_class, label_list)
|
||||
MARKET_META: Dict[str, Tuple[int, list]] = {
|
||||
"ms": (3, ["1", "X", "2"]),
|
||||
"ou15": (2, ["Over", "Under"]),
|
||||
"ou25": (2, ["Over", "Under"]),
|
||||
"ou35": (2, ["Over", "Under"]),
|
||||
"btts": (2, ["Yes", "No"]),
|
||||
"ht": (3, ["1", "X", "2"]),
|
||||
"ht_ou05": (2, ["Over", "Under"]),
|
||||
"ht_ou15": (2, ["Over", "Under"]),
|
||||
"htft": (9, ["1/1","1/X","1/2","X/1","X/X","X/2","2/1","2/X","2/2"]),
|
||||
"oe": (2, ["Odd", "Even"]),
|
||||
"cards": (2, ["Over", "Under"]),
|
||||
"handicap": (3, ["1", "X", "2"]),
|
||||
}
|
||||
|
||||
# Signal key map (file key → uppercase signal key used in _get_v25_signal)
|
||||
FILE_TO_SIGNAL = {
|
||||
"ms": "MS", "ou15": "OU15", "ou25": "OU25", "ou35": "OU35",
|
||||
"btts": "BTTS", "ht": "HT", "ht_ou05": "HT_OU05", "ht_ou15": "HT_OU15",
|
||||
"htft": "HTFT", "oe": "OE", "cards": "CARDS", "handicap": "HCAP",
|
||||
}
|
||||
|
||||
|
||||
class LeagueModel:
|
||||
"""Holds XGBoost models + isotonic calibrators for one league."""
|
||||
|
||||
def __init__(self, league_id: str):
|
||||
self.league_id = league_id
|
||||
self.league_dir = os.path.join(LEAGUE_MODEL_DIR, league_id)
|
||||
self.models: Dict[str, xgb.Booster] = {} # market_key → booster
|
||||
self.calibrators: Dict[str, object] = {} # cal_key → isotonic
|
||||
self.feature_cols: Optional[list] = None
|
||||
self._loaded = False
|
||||
|
||||
def load(self) -> bool:
|
||||
if not os.path.isdir(self.league_dir):
|
||||
return False
|
||||
try:
|
||||
fc_path = os.path.join(self.league_dir, "feature_cols.json")
|
||||
if os.path.exists(fc_path):
|
||||
with open(fc_path) as f:
|
||||
self.feature_cols = json.load(f)
|
||||
|
||||
for mkey in MARKET_META:
|
||||
xgb_path = os.path.join(self.league_dir, f"xgb_{mkey}.json")
|
||||
if os.path.exists(xgb_path) and os.path.getsize(xgb_path) > 100:
|
||||
b = xgb.Booster()
|
||||
b.load_model(xgb_path)
|
||||
self.models[mkey] = b
|
||||
|
||||
for fname in os.listdir(self.league_dir):
|
||||
if fname.startswith("cal_") and fname.endswith(".pkl"):
|
||||
cal_key = fname[4:-4] # strip cal_ and .pkl
|
||||
with open(os.path.join(self.league_dir, fname), "rb") as f:
|
||||
self.calibrators[cal_key] = pickle.load(f)
|
||||
|
||||
self._loaded = bool(self.models or self.calibrators)
|
||||
return self._loaded
|
||||
except Exception as e:
|
||||
print(f"[LeagueModel] Load failed for {self.league_id}: {e}")
|
||||
return False
|
||||
|
||||
def has_market(self, mkey: str) -> bool:
|
||||
return mkey in self.models
|
||||
|
||||
def predict_market(
|
||||
self,
|
||||
mkey: str,
|
||||
feature_row: Dict[str, float],
|
||||
) -> Optional[Dict[str, float]]:
|
||||
"""
|
||||
Predict one market using league-specific XGBoost + isotonic calibration.
|
||||
Returns {label: prob} dict or None if no model available.
|
||||
"""
|
||||
if mkey not in self.models:
|
||||
return None
|
||||
|
||||
num_class, labels = MARKET_META[mkey]
|
||||
fc = self.feature_cols
|
||||
if fc is None:
|
||||
# Fallback to whatever the booster expects (it knows its feature names)
|
||||
fc = list(self.models[mkey].feature_names or [])
|
||||
|
||||
try:
|
||||
X = pd.DataFrame([{col: feature_row.get(col, 0.0) for col in fc}])
|
||||
dmat = xgb.DMatrix(X)
|
||||
raw = self.models[mkey].predict(dmat)
|
||||
|
||||
if num_class > 2:
|
||||
probs_arr = raw.reshape(-1, num_class)[0]
|
||||
probs = {labels[i]: float(probs_arr[i]) for i in range(num_class)}
|
||||
# Apply isotonic calibration per class
|
||||
cal_total = 0.0
|
||||
for i, label in enumerate(labels):
|
||||
cal_key = f"{mkey}_{i}"
|
||||
if cal_key in self.calibrators:
|
||||
p_cal = float(self.calibrators[cal_key].predict([probs_arr[i]])[0])
|
||||
probs[label] = max(0.01, min(0.99, p_cal))
|
||||
cal_total += probs[label]
|
||||
if cal_total > 0:
|
||||
probs = {k: v / cal_total for k, v in probs.items()}
|
||||
else:
|
||||
p = float(raw[0])
|
||||
cal_key = mkey
|
||||
if cal_key in self.calibrators:
|
||||
p = float(self.calibrators[cal_key].predict([p])[0])
|
||||
p = max(0.01, min(0.99, p))
|
||||
probs = {labels[0]: p, labels[1]: 1.0 - p}
|
||||
|
||||
return probs
|
||||
except Exception as e:
|
||||
print(f"[LeagueModel] predict_market({mkey}) failed for {self.league_id}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
class LeagueModelLoader:
|
||||
"""
|
||||
In-memory cache for league-specific models.
|
||||
Thread-safe for single-process async servers (FastAPI/uvicorn).
|
||||
"""
|
||||
|
||||
def __init__(self, max_cached: int = 80):
|
||||
self._cache: Dict[str, Optional[LeagueModel]] = {}
|
||||
self._max_cached = max_cached
|
||||
|
||||
def get(self, league_id: str) -> Optional[LeagueModel]:
|
||||
"""Return loaded LeagueModel for this league, or None if unavailable."""
|
||||
if league_id in self._cache:
|
||||
return self._cache[league_id]
|
||||
|
||||
# Evict oldest entry if cache is full
|
||||
if len(self._cache) >= self._max_cached:
|
||||
oldest = next(iter(self._cache))
|
||||
del self._cache[oldest]
|
||||
|
||||
model = LeagueModel(league_id)
|
||||
loaded = model.load()
|
||||
self._cache[league_id] = model if loaded else None
|
||||
if loaded:
|
||||
n_models = len(model.models)
|
||||
n_cals = len(model.calibrators)
|
||||
print(f"[LeagueModel] Loaded {league_id}: {n_models} XGB models, {n_cals} calibrators")
|
||||
return self._cache[league_id]
|
||||
|
||||
def available_leagues(self) -> list:
|
||||
if not os.path.isdir(LEAGUE_MODEL_DIR):
|
||||
return []
|
||||
return [d for d in os.listdir(LEAGUE_MODEL_DIR)
|
||||
if os.path.isdir(os.path.join(LEAGUE_MODEL_DIR, d))]
|
||||
|
||||
def readiness_summary(self) -> dict:
|
||||
leagues = self.available_leagues()
|
||||
return {
|
||||
"league_specific_dir": LEAGUE_MODEL_DIR,
|
||||
"available_leagues": len(leagues),
|
||||
"cached": len([v for v in self._cache.values() if v is not None]),
|
||||
}
|
||||
|
||||
|
||||
# ── Singleton ──────────────────────────────────────────────────────
|
||||
_loader: Optional[LeagueModelLoader] = None
|
||||
|
||||
|
||||
def get_league_model_loader() -> LeagueModelLoader:
|
||||
global _loader
|
||||
if _loader is None:
|
||||
_loader = LeagueModelLoader()
|
||||
return _loader
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,154 @@
|
||||
[
|
||||
"home_overall_elo",
|
||||
"away_overall_elo",
|
||||
"elo_diff",
|
||||
"home_home_elo",
|
||||
"away_away_elo",
|
||||
"home_form_elo",
|
||||
"away_form_elo",
|
||||
"form_elo_diff",
|
||||
"home_goals_avg",
|
||||
"home_conceded_avg",
|
||||
"away_goals_avg",
|
||||
"away_conceded_avg",
|
||||
"home_clean_sheet_rate",
|
||||
"away_clean_sheet_rate",
|
||||
"home_scoring_rate",
|
||||
"away_scoring_rate",
|
||||
"home_winning_streak",
|
||||
"away_winning_streak",
|
||||
"home_unbeaten_streak",
|
||||
"away_unbeaten_streak",
|
||||
"h2h_total_matches",
|
||||
"h2h_home_win_rate",
|
||||
"h2h_draw_rate",
|
||||
"h2h_avg_goals",
|
||||
"h2h_btts_rate",
|
||||
"h2h_over25_rate",
|
||||
"home_avg_possession",
|
||||
"away_avg_possession",
|
||||
"home_avg_shots_on_target",
|
||||
"away_avg_shots_on_target",
|
||||
"home_shot_conversion",
|
||||
"away_shot_conversion",
|
||||
"home_avg_corners",
|
||||
"away_avg_corners",
|
||||
"odds_ms_h",
|
||||
"odds_ms_d",
|
||||
"odds_ms_a",
|
||||
"implied_home",
|
||||
"implied_draw",
|
||||
"implied_away",
|
||||
"odds_ht_ms_h",
|
||||
"odds_ht_ms_d",
|
||||
"odds_ht_ms_a",
|
||||
"odds_ou05_o",
|
||||
"odds_ou05_u",
|
||||
"odds_ou15_o",
|
||||
"odds_ou15_u",
|
||||
"odds_ou25_o",
|
||||
"odds_ou25_u",
|
||||
"odds_ou35_o",
|
||||
"odds_ou35_u",
|
||||
"odds_ht_ou05_o",
|
||||
"odds_ht_ou05_u",
|
||||
"odds_ht_ou15_o",
|
||||
"odds_ht_ou15_u",
|
||||
"odds_btts_y",
|
||||
"odds_btts_n",
|
||||
"odds_ms_h_present",
|
||||
"odds_ms_d_present",
|
||||
"odds_ms_a_present",
|
||||
"odds_ht_ms_h_present",
|
||||
"odds_ht_ms_d_present",
|
||||
"odds_ht_ms_a_present",
|
||||
"odds_ou05_o_present",
|
||||
"odds_ou05_u_present",
|
||||
"odds_ou15_o_present",
|
||||
"odds_ou15_u_present",
|
||||
"odds_ou25_o_present",
|
||||
"odds_ou25_u_present",
|
||||
"odds_ou35_o_present",
|
||||
"odds_ou35_u_present",
|
||||
"odds_ht_ou05_o_present",
|
||||
"odds_ht_ou05_u_present",
|
||||
"odds_ht_ou15_o_present",
|
||||
"odds_ht_ou15_u_present",
|
||||
"odds_btts_y_present",
|
||||
"odds_btts_n_present",
|
||||
"home_xga",
|
||||
"away_xga",
|
||||
"league_avg_goals",
|
||||
"league_zero_goal_rate",
|
||||
"upset_atmosphere",
|
||||
"upset_motivation",
|
||||
"upset_fatigue",
|
||||
"upset_potential",
|
||||
"referee_home_bias",
|
||||
"referee_avg_goals",
|
||||
"referee_cards_total",
|
||||
"referee_avg_yellow",
|
||||
"referee_experience",
|
||||
"home_momentum_score",
|
||||
"away_momentum_score",
|
||||
"momentum_diff",
|
||||
"home_squad_quality",
|
||||
"away_squad_quality",
|
||||
"squad_diff",
|
||||
"home_key_players",
|
||||
"away_key_players",
|
||||
"home_missing_impact",
|
||||
"away_missing_impact",
|
||||
"home_goals_form",
|
||||
"away_goals_form",
|
||||
"home_lineup_goals_per90",
|
||||
"away_lineup_goals_per90",
|
||||
"home_lineup_assists_per90",
|
||||
"away_lineup_assists_per90",
|
||||
"home_squad_continuity",
|
||||
"away_squad_continuity",
|
||||
"home_top_scorer_form",
|
||||
"away_top_scorer_form",
|
||||
"home_avg_player_exp",
|
||||
"away_avg_player_exp",
|
||||
"home_goals_diversity",
|
||||
"away_goals_diversity",
|
||||
"h2h_home_goals_avg",
|
||||
"h2h_away_goals_avg",
|
||||
"h2h_recent_trend",
|
||||
"h2h_venue_advantage",
|
||||
"home_rolling5_goals",
|
||||
"home_rolling5_conceded",
|
||||
"home_rolling10_goals",
|
||||
"home_rolling10_conceded",
|
||||
"home_rolling20_goals",
|
||||
"home_rolling20_conceded",
|
||||
"away_rolling5_goals",
|
||||
"away_rolling5_conceded",
|
||||
"away_rolling10_goals",
|
||||
"away_rolling10_conceded",
|
||||
"home_rolling5_cs",
|
||||
"away_rolling5_cs",
|
||||
"home_venue_goals",
|
||||
"home_venue_conceded",
|
||||
"away_venue_goals",
|
||||
"away_venue_conceded",
|
||||
"home_goal_trend",
|
||||
"away_goal_trend",
|
||||
"home_days_rest",
|
||||
"away_days_rest",
|
||||
"match_month",
|
||||
"is_season_start",
|
||||
"is_season_end",
|
||||
"attack_vs_defense_home",
|
||||
"attack_vs_defense_away",
|
||||
"xg_diff",
|
||||
"form_momentum_interaction",
|
||||
"elo_form_consistency",
|
||||
"upset_x_elo_gap",
|
||||
"league_home_win_rate",
|
||||
"league_draw_rate",
|
||||
"league_btts_rate",
|
||||
"league_ou25_rate",
|
||||
"league_reliability_score"
|
||||
]
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,891 @@
|
||||
tree
|
||||
version=v4
|
||||
num_class=1
|
||||
num_tree_per_iteration=1
|
||||
label_index=0
|
||||
max_feature_idx=151
|
||||
objective=binary sigmoid:1
|
||||
feature_names=Column_0 Column_1 Column_2 Column_3 Column_4 Column_5 Column_6 Column_7 Column_8 Column_9 Column_10 Column_11 Column_12 Column_13 Column_14 Column_15 Column_16 Column_17 Column_18 Column_19 Column_20 Column_21 Column_22 Column_23 Column_24 Column_25 Column_26 Column_27 Column_28 Column_29 Column_30 Column_31 Column_32 Column_33 Column_34 Column_35 Column_36 Column_37 Column_38 Column_39 Column_40 Column_41 Column_42 Column_43 Column_44 Column_45 Column_46 Column_47 Column_48 Column_49 Column_50 Column_51 Column_52 Column_53 Column_54 Column_55 Column_56 Column_57 Column_58 Column_59 Column_60 Column_61 Column_62 Column_63 Column_64 Column_65 Column_66 Column_67 Column_68 Column_69 Column_70 Column_71 Column_72 Column_73 Column_74 Column_75 Column_76 Column_77 Column_78 Column_79 Column_80 Column_81 Column_82 Column_83 Column_84 Column_85 Column_86 Column_87 Column_88 Column_89 Column_90 Column_91 Column_92 Column_93 Column_94 Column_95 Column_96 Column_97 Column_98 Column_99 Column_100 Column_101 Column_102 Column_103 Column_104 Column_105 Column_106 Column_107 Column_108 Column_109 Column_110 Column_111 Column_112 Column_113 Column_114 Column_115 Column_116 Column_117 Column_118 Column_119 Column_120 Column_121 Column_122 Column_123 Column_124 Column_125 Column_126 Column_127 Column_128 Column_129 Column_130 Column_131 Column_132 Column_133 Column_134 Column_135 Column_136 Column_137 Column_138 Column_139 Column_140 Column_141 Column_142 Column_143 Column_144 Column_145 Column_146 Column_147 Column_148 Column_149 Column_150 Column_151
|
||||
feature_infos=[1150.3663761896189:1903.4781806887747] [1158.5088961211511:1916.84579108047] [-496.81477567713546:573.10259120534784] [1159.8767670517543:1884.5959848901657] [1151.7894548779084:1919.4116678360419] [1426.1496448360797:1585.9817930954068] [1427.9817118206745:1588.9895054335384] [-113.02538114266532:114.69704651598067] [0:5.9333333333333336] [0:6.2666666666666666] [0:6.0666666666666664] [0:5.2000000000000002] [0:1] [0:1] [0:1] [0:1] [0:5] [0:5] [0:5] [0:5] [0:8] [0:1] [0:1] [0:11] [0:1] [0:1] [0.20999999999999999:0.81000000000000005] [0.22500000000000001:0.76000000000000001] [0:13] [0:13] [0:6.333333333333333] [0:6.666666666666667] [0:4.5] [0:4.5] [0:22.550000000000001] [0:17.5] [0:35.5] [0.065285302506677897:0.80962131918207236] [0.1044438556117265:0.4288719106684401] [0.056651501780225801:0.79902050363656307] [0:26] [0:5.0899999999999999] [0:26.5] [0:1.0900000000000001] [0:13.050000000000001] [0:1.76] [0:13.25] [0:3.7400000000000002] [0:7.6500000000000004] [0:8.5600000000000005] [0:3.5299999999999998] [0:1.72] [0:7.3300000000000001] [0:5.0199999999999996] [0:2.5800000000000001] [0:3.77] [0:4.1299999999999999] [0:1] [0:1] [0:1] [0:1] [0:1] [0:1] [0:1] [0:1] [0:1] [0:1] [0:1] [0:1] [0:1] [0:1] [0:1] [0:1] [0:1] [0:1] [0:1] [0:1] [0:6.2666666666666666] [0:5.2000000000000002] [2.0046118370484245:4.1840324763193504] [0.018042399639151999:0.1552651806302843] [0:0.45000000000000001] none none [0:0.1575] [-0.92000000000000004:1] [0:7] none none [0:1] [-1:0.61250000000000004] [-1:0.59166666666666667] [-1.2908333333333333:1.3799999999999999] [0:40.799999999999997] [0:36.299999999999997] [-29.999999999999996:31.499999999999996] [0:10] [0:10] [0:1] [0:1] [0:5.7999999999999998] [0:5] [0:5] [0:6.25] [0:4] [0:5] [0:1] [0:1] [0:10] [0:11] [0:42.100000000000001] [0:41.799999999999997] [0:1] [0:1] [0:8] [0:8] [-1:1] [0:1] [0:5.7999999999999998] [0:6.5] [0:5.2000000000000002] [0:6.5] [0:4.4119999999999999] [0:6.5] [0:5.3330000000000002] [0:4.7999999999999998] [0:5.3330000000000002] [0:4.5] [0:1] [0:1] [0:7] [0:8] [0:6] [0:7] [-1.8:1.8] [-2.2000000000000002:2] [1:30] [1.2:30] [1:12] [0:1] [0:1] [-4.4669999999999996:4.5999999999999996] [-5.2000000000000002:5] [-4.133:5.3330000000000002] [-0.045499999999999999:0.088099999999999998] [0.063600000000000004:1] [0:0.088800000000000004] [0.35623409669211198:0.51556156968876865] [0.1051136363636363:0.32744043043812449] [0.40430438124519602:0.64185836716283262] [0.32129131437355879:0.77537212449255755] [0.70399999999999996:1]
|
||||
tree_sizes=946 1005 993 986 1007 997 994 1005 992 1009 984 1007 997 684 1002 999 897 992 991 990 1001 905 1003 995 896 1010 908 1005 1007 1014 1012 1005 1005 691 688
|
||||
|
||||
Tree=0
|
||||
num_leaves=8
|
||||
num_cat=0
|
||||
split_feature=45 54 46 96 4 3 16
|
||||
split_gain=174.584 35.5641 24.9346 16.8552 14.3832 11.0732 10.8094
|
||||
threshold=1.155 1.405 2.5850000000000004 4.5000000000000009 1704.4687685416313 1441.3975280143418 1.0000000180025095e-35
|
||||
decision_type=2 2 2 2 2 2 2
|
||||
left_child=1 3 5 -1 -4 -2 -3
|
||||
right_child=2 6 4 -5 -6 -7 -8
|
||||
leaf_value=0.87683759709368503 0.84782001969484888 0.90404985782878589 0.85793883408869098 0.90097077069702014 0.73127673195863474 0.81008235044809496 0.93515098554525133
|
||||
leaf_weight=939.87957760691643 113.51603642106056 283.89385342597961 418.36988925933838 472.32632339000702 9.9611878395080549 330.17187193036079 212.09029108285904
|
||||
leaf_count=4529 547 1368 2016 2276 48 1591 1022
|
||||
internal_value=0.875686 0.893342 0.837052 0.884909 0.85499 0.819737 0.91735
|
||||
internal_weight=2780.21 1908.19 872.019 1412.21 428.331 443.688 495.984
|
||||
internal_count=13397 9195 4202 6805 2064 2138 2390
|
||||
is_linear=0
|
||||
shrinkage=1
|
||||
|
||||
|
||||
Tree=1
|
||||
num_leaves=8
|
||||
num_cat=0
|
||||
split_feature=47 48 49 86 141 133 150
|
||||
split_gain=140.072 28.5634 26.9673 23.4898 19.0464 14.6224 10.7313
|
||||
threshold=1.6850000000000003 2.1650000000000005 3.3850000000000002 3.3279569892473124 1.2620000000000002 0.31650000000000006 0.48538789856891795
|
||||
decision_type=2 2 2 2 2 2 2
|
||||
left_child=1 3 5 -1 -4 -2 -3
|
||||
right_child=2 6 4 -5 -6 -7 -8
|
||||
leaf_value=0.0068305934170468773 -0.15031955758812821 -0.031384052219233315 -0.052289652303345355 0.062911487393225371 0.031971580220150592 -0.011449057668411599 0.04709896679065162
|
||||
leaf_weight=1284.6507234424353 8.3659095764160138 19.989385187625885 501.71516834199429 86.587033972144127 30.929726287722588 495.32643516361713 353.17835873365402
|
||||
leaf_count=6221 40 98 2370 419 146 2373 1730
|
||||
internal_value=-0.000699774 0.0173296 -0.0310475 0.0103722 -0.0473963 -0.0137584 0.0428942
|
||||
internal_weight=2780.74 1744.41 1036.34 1371.24 532.645 503.692 373.168
|
||||
internal_count=13397 8468 4929 6640 2516 2413 1828
|
||||
is_linear=0
|
||||
shrinkage=0.104222
|
||||
|
||||
|
||||
Tree=2
|
||||
num_leaves=8
|
||||
num_cat=0
|
||||
split_feature=45 50 53 39 96 28 150
|
||||
split_gain=110.988 23.3948 18.1465 13.3271 12.0676 11.8858 11.7377
|
||||
threshold=1.155 1.4150000000000003 3.3250000000000006 0.36187197152743827 4.5000000000000009 3.9500000000000006 0.48538789856891795
|
||||
decision_type=2 2 2 2 2 2 2
|
||||
left_child=1 4 5 -4 -1 -2 -3
|
||||
right_child=2 6 3 -5 -6 -7 -8
|
||||
leaf_value=0.0012888166907150367 -0.037269165059036741 -0.040251887739062138 -0.05134333704385069 -0.13896201271441588 0.020859848952961575 -0.011322757210899898 0.041785901483228478
|
||||
leaf_weight=991.85861267149448 395.00907251238823 20.026126876473427 106.85419258475304 22.895305529236794 522.50144763290882 372.75306884944439 348.38899676501751
|
||||
leaf_count=4802 1851 97 494 106 2555 1751 1741
|
||||
internal_value=-0.000605268 0.0137719 -0.0307651 -0.0668133 0.00804152 -0.0246723 0.0373256
|
||||
internal_weight=2780.29 1882.78 897.512 129.749 1514.36 767.762 368.415
|
||||
internal_count=13397 9195 4202 600 7357 3602 1838
|
||||
is_linear=0
|
||||
shrinkage=0.104222
|
||||
|
||||
|
||||
Tree=3
|
||||
num_leaves=8
|
||||
num_cat=0
|
||||
split_feature=45 48 86 11 53 144 104
|
||||
split_gain=91.2855 30.6398 20.2224 17.4942 14.4128 10.3549 10.2483
|
||||
threshold=1.155 2.2850000000000006 3.3279569892473124 0.30952380952380959 3.4950000000000006 0.0075500000000000003 1.0005000000000002
|
||||
decision_type=2 2 2 2 2 2 2
|
||||
left_child=1 2 -1 -3 6 -6 -2
|
||||
right_child=4 3 -4 -5 5 -7 -8
|
||||
leaf_value=0.0040069597334026 -0.027086096902533729 -0.1715719381102207 0.053726932967164444 0.048773966775503941 -0.061900987427835959 -0.23396750965062757 0.016338062312498316
|
||||
leaf_weight=1510.6300098896027 782.38381478190422 3.9652889966964713 94.400425717234612 257.22131448984146 73.248439565300941 3.9991354644298545 63.84617380797863
|
||||
leaf_count=7369 3628 20 471 1306 331 18 297
|
||||
internal_value=-0.000943352 0.0123188 0.00693156 0.0454226 -0.0277441 -0.0708364 -0.0238097
|
||||
internal_weight=2789.69 1866.22 1605.03 261.187 923.478 77.2476 846.23
|
||||
internal_count=13440 9166 7840 1326 4274 349 3925
|
||||
is_linear=0
|
||||
shrinkage=0.104222
|
||||
|
||||
|
||||
Tree=4
|
||||
num_leaves=8
|
||||
num_cat=0
|
||||
split_feature=49 35 6 53 44 126 38
|
||||
split_gain=70.6071 28.8927 21.165 13.8246 12.9797 16.2351 8.96833
|
||||
threshold=2.8350000000000004 3.3550000000000004 1541.1282734213053 1.9650000000000001 6.5150000000000006 1.8450000000000002 0.27603289214361998
|
||||
decision_type=2 2 2 2 2 2 2
|
||||
left_child=1 2 -1 -3 5 -2 -6
|
||||
right_child=4 3 -4 -5 6 -7 -8
|
||||
leaf_value=-0.0027517198054468608 -0.029319791370752923 0.043695199840511914 0.083541694807877556 0.015347179328011443 -0.028790250204289367 -0.099217570104222469 -0.0022072689999685109
|
||||
leaf_weight=937.29022094607353 417.85833202302456 320.03798474371433 31.915322333574295 449.0833810120821 218.10735833644867 39.50090055167675 374.63410261273384
|
||||
leaf_count=4582 1898 1642 157 2211 1027 181 1742
|
||||
internal_value=-0.000829778 0.0120602 9.06525e-05 0.0271434 -0.0221677 -0.0353586 -0.0119891
|
||||
internal_weight=2788.43 1738.33 969.206 769.121 1050.1 457.359 592.741
|
||||
internal_count=13440 8592 4739 3853 4848 2079 2769
|
||||
is_linear=0
|
||||
shrinkage=0.104222
|
||||
|
||||
|
||||
Tree=5
|
||||
num_leaves=8
|
||||
num_cat=0
|
||||
split_feature=51 50 35 143 44 147 11
|
||||
split_gain=63.712 19.0005 13.7227 11.8206 10.6018 10.4763 6.84163
|
||||
threshold=1.2550000000000001 1.4650000000000001 5.035000000000001 -1.7974999999999997 7.2550000000000008 0.47199397614381194 2.6904761904761911
|
||||
decision_type=2 2 2 2 2 2 2
|
||||
left_child=1 2 -1 6 -5 -3 -2
|
||||
right_child=3 5 -4 4 -6 -7 -8
|
||||
leaf_value=0.0047544573525090221 -0.065940592898403594 0.046851958603355899 0.063639968219378742 -0.028402060075216558 -0.0037183287940315865 -0.010933729592520421 -0.2419531318087022
|
||||
leaf_weight=1547.0932241082191 6.1367039680480939 207.7856714874506 44.20663620531559 674.41967558860779 262.60369572043419 40.760551080107689 3.9397892653942108
|
||||
leaf_count=7584 28 1099 222 3053 1221 215 18
|
||||
internal_value=-0.000729499 0.0105766 0.00639068 -0.0226927 -0.0214845 0.0373753 -0.134921
|
||||
internal_weight=2786.95 1839.85 1591.3 947.1 937.023 248.546 10.0765
|
||||
internal_count=13440 9120 7806 4320 4274 1314 46
|
||||
is_linear=0
|
||||
shrinkage=0.104222
|
||||
|
||||
|
||||
Tree=6
|
||||
num_leaves=8
|
||||
num_cat=0
|
||||
split_feature=41 130 54 48 99 34 105
|
||||
split_gain=42.6482 15.4832 11.8244 11.5616 10.5481 8.08864 5.28375
|
||||
threshold=1.9950000000000003 2.1835000000000004 1.6950000000000001 2.0250000000000004 0.13392857142857142 2.1550000000000007 1.0765000000000002
|
||||
decision_type=2 2 2 2 2 2 2
|
||||
left_child=1 3 4 -1 -2 -3 -4
|
||||
right_child=2 5 6 -5 -6 -7 -8
|
||||
leaf_value=-0.012930103603477631 -0.025837088447404372 0.009985288944886676 0.093598551064641447 0.0827918792790409 0.017739499999769957 0.067684395437451708 -0.011462427181900188
|
||||
leaf_weight=1452.7631230950356 63.837418377399445 116.17519751191139 30.260349631309509 13.827319353818892 1098.0612086355686 34.140560820698738 6.2758191227912894
|
||||
leaf_count=6763 321 560 163 70 5525 162 34
|
||||
internal_value=0.00228156 -0.00876197 0.0171813 -0.0120271 0.0153451 0.0230937 0.0755539
|
||||
internal_weight=2815.34 1616.91 1198.43 1466.59 1161.9 150.316 36.5362
|
||||
internal_count=13598 7555 6043 6833 5846 722 197
|
||||
is_linear=0
|
||||
shrinkage=0.104222
|
||||
|
||||
|
||||
Tree=7
|
||||
num_leaves=8
|
||||
num_cat=0
|
||||
split_feature=41 135 130 21 53 102 16
|
||||
split_gain=34.421 15.4189 13.9177 13.4109 11.0563 13.2179 9.32957
|
||||
threshold=1.9950000000000003 -0.78349999999999997 2.1340000000000003 0.4642857142857143 2.0250000000000004 2.1120000000000005 1.5000000000000002
|
||||
decision_type=2 2 2 2 2 2 2
|
||||
left_child=1 3 -3 -1 6 -6 -2
|
||||
right_child=4 2 -4 -5 5 -7 -8
|
||||
leaf_value=-0.039029586512136658 0.021256889774663217 -0.0093694924607743823 0.024219953555788796 -0.18450744298912586 0.010656362766355395 -0.067587490182026186 0.059808579820180507
|
||||
leaf_weight=29.468837857246399 324.64188092947006 1435.6618839651346 147.77664601802826 8.9744612574577314 753.81818389892578 24.195777088403702 86.313260063529015
|
||||
leaf_count=138 1721 6659 716 42 3733 122 467
|
||||
internal_value=0.00205658 -0.00781819 -0.00623458 -0.0730278 0.0155268 0.00822224 0.0293551
|
||||
internal_weight=2810.85 1621.88 1583.44 38.4433 1188.97 778.014 410.955
|
||||
internal_count=13598 7555 7375 180 6043 3855 2188
|
||||
is_linear=0
|
||||
shrinkage=0.104222
|
||||
|
||||
|
||||
Tree=8
|
||||
num_leaves=8
|
||||
num_cat=0
|
||||
split_feature=86 41 30 92 147 8 85
|
||||
split_gain=36.407 25.9777 15.256 11.9724 10.8984 10.2187 7.72256
|
||||
threshold=3.2679487179487183 1.8750000000000002 0.46841755319148942 -0.76249999999999984 0.40846280364372473 1.1083333333333336 -0.021724137931034445
|
||||
decision_type=2 2 2 2 2 2 2
|
||||
left_child=1 2 -1 -3 5 -2 -6
|
||||
right_child=4 3 -4 -5 6 -7 -8
|
||||
leaf_value=-0.012159457825842523 0.064307005786210347 -0.085506751396820332 -0.10592968794102031 0.0081406001340509019 0.027805156010338752 -0.1311253894779951 0.074917050140623179
|
||||
leaf_weight=1030.2416722476482 5.0686567574739438 14.960604444146155 19.18836584687233 1572.8768468499184 63.189212292432785 6.7883874922990799 94.08995346724987
|
||||
leaf_count=4724 25 75 92 7835 328 34 485
|
||||
internal_value=0.00185481 -0.00115146 -0.0138749 0.00725775 0.0487278 -0.0475971 0.055992
|
||||
internal_weight=2806.4 2637.27 1049.43 1587.84 169.136 11.857 157.279
|
||||
internal_count=13598 12726 4816 7910 872 59 813
|
||||
is_linear=0
|
||||
shrinkage=0.104222
|
||||
|
||||
|
||||
Tree=9
|
||||
num_leaves=8
|
||||
num_cat=0
|
||||
split_feature=49 150 52 133 30 91 30
|
||||
split_gain=26.3296 13.6848 11.0511 10.5586 10.2017 14.0051 13.9274
|
||||
threshold=2.7850000000000006 0.60942003008724055 2.9650000000000003 2.7320000000000007 0.10822072072072071 -0.0024999999999999497 0.12681311751103638
|
||||
decision_type=2 2 2 2 2 2 2
|
||||
left_child=1 3 -3 -1 5 -2 -6
|
||||
right_child=4 2 -4 -5 6 -7 -8
|
||||
leaf_value=0.0056197323214787877 0.0092359089138059399 0.013793895571876906 0.055269467591862222 -0.056030521488072138 0.020769131467870516 -0.044189842169712931 -0.012941111969465513
|
||||
leaf_weight=1335.4049420952797 73.188210651278496 194.0794630497694 108.95535556972027 30.86374768614769 165.2346598058939 196.06340071558952 684.92604845762253
|
||||
leaf_count=6712 329 987 589 157 747 879 3130
|
||||
internal_value=0.000378046 0.00867093 0.0287076 0.00422669 -0.0119885 -0.0296678 -0.00638913
|
||||
internal_weight=2788.72 1669.3 303.035 1366.27 1119.41 269.252 850.161
|
||||
internal_count=13530 8445 1576 6869 5085 1208 3877
|
||||
is_linear=0
|
||||
shrinkage=0.104222
|
||||
|
||||
|
||||
Tree=10
|
||||
num_leaves=8
|
||||
num_cat=0
|
||||
split_feature=86 41 0 6 110 102 53
|
||||
split_gain=38.5149 20.3558 13.2957 10.1636 9.93058 14.9217 6.02783
|
||||
threshold=3.0790020790020796 1.925 1413.5914823844723 1484.8073996434571 2.9500000000000006 0.65050000000000019 2.2150000000000003
|
||||
decision_type=2 2 2 2 2 2 2
|
||||
left_child=1 2 -1 -3 6 -6 -2
|
||||
right_child=4 3 -4 -5 5 -7 -8
|
||||
leaf_value=0.0140954684441122 0.085154679849317738 0.026410531501952182 -0.017501548280296479 0.0014621478522681489 -0.046636790759956186 0.037447698249607933 0.031794502065436499
|
||||
leaf_weight=167.52074213325977 60.752241030335426 211.7988056242466 1059.5275938510895 1090.8869259208441 27.780392900109291 130.96362222731113 37.02421498298645
|
||||
leaf_count=765 323 1076 4848 5512 140 685 181
|
||||
internal_value=0.000347101 -0.00355495 -0.0131877 0.00551857 0.0388267 0.0227309 0.0649542
|
||||
internal_weight=2786.25 2529.73 1227.05 1302.69 256.52 158.744 97.7765
|
||||
internal_count=13530 12201 5613 6588 1329 825 504
|
||||
is_linear=0
|
||||
shrinkage=0.104222
|
||||
|
||||
|
||||
Tree=11
|
||||
num_leaves=8
|
||||
num_cat=0
|
||||
split_feature=86 122 0 132 136 148 145
|
||||
split_gain=31.5628 19.7563 17.5348 15.2483 8.699 11.6526 8.53686
|
||||
threshold=3.0790020790020796 1.0250000000000001 1444.5595683182339 0.68350000000000011 11.950000000000001 0.25070180469638537 0.63955000000000006
|
||||
decision_type=2 2 2 2 2 2 2
|
||||
left_child=1 2 -1 -3 6 -6 -2
|
||||
right_child=4 3 -4 -5 5 -7 -8
|
||||
leaf_value=-0.0029563172992086786 0.032749371502154893 -0.027078470578097272 -0.04142343191974563 0.004355282116605525 0.042836068356502124 -0.043229223026257771 0.082015555649337976
|
||||
leaf_weight=268.76511310040951 128.29142379760742 184.50364246964455 247.01660700142384 1831.7398000508547 37.845243006944656 31.138597756624222 54.413731098175049
|
||||
leaf_count=1259 680 882 1128 8932 205 158 286
|
||||
internal_value=0.000321355 -0.0031776 -0.0213793 0.00147871 0.0355201 0.00398648 0.0474254
|
||||
internal_weight=2783.71 2532.03 515.782 2016.24 251.689 68.9838 182.705
|
||||
internal_count=13530 12201 2387 9814 1329 363 966
|
||||
is_linear=0
|
||||
shrinkage=0.104222
|
||||
|
||||
|
||||
Tree=12
|
||||
num_leaves=8
|
||||
num_cat=0
|
||||
split_feature=86 41 94 4 100 132 111
|
||||
split_gain=16.7583 14.9692 11.4051 9.36529 9.15568 6.8883 6.86331
|
||||
threshold=3.2679487179487183 2.5250000000000004 19.050000000000004 1454.7040164610194 2.1000000000000005 2.5855000000000006 1.6500000000000001
|
||||
decision_type=2 2 2 2 2 2 2
|
||||
left_child=1 2 -1 4 -2 -3 -5
|
||||
right_child=3 5 -4 6 -6 -7 -8
|
||||
leaf_value=-0.0077031612935488866 -0.037300665068035477 0.045270021664037818 0.0069515620157151858 0.08783804546768309 0.1145092464596736 -0.063294467679354607 0.032433508236858831
|
||||
leaf_weight=1642.7547204941511 28.476281866431236 95.693837076425552 889.01127083599567 33.729012683033943 5.0763863474130622 6.7911695241928092 86.767844557762146
|
||||
leaf_count=7824 156 540 4317 188 29 38 478
|
||||
internal_value=0.000977474 -0.000976496 -0.00255725 0.0343884 -0.0143067 0.0380708 0.0479478
|
||||
internal_weight=2788.3 2634.25 2531.77 154.05 33.5527 102.485 120.497
|
||||
internal_count=13570 12719 12141 851 185 578 666
|
||||
is_linear=0
|
||||
shrinkage=0.104222
|
||||
|
||||
|
||||
Tree=13
|
||||
num_leaves=5
|
||||
num_cat=0
|
||||
split_feature=49 91 105 52
|
||||
split_gain=12.5399 11.9283 13.2239 11.4266
|
||||
threshold=6.7950000000000008 0.46333333333333332 0.66550000000000009 3.1450000000000005
|
||||
decision_type=2 2 2 2
|
||||
left_child=1 3 -3 -1
|
||||
right_child=-2 2 -4 -5
|
||||
leaf_value=-0.00096173862485333158 -0.18852641170062942 -0.26944729151735436 0.0083924991906715318 0.019698628791158867
|
||||
leaf_weight=2444.542382568121 3.7820855826139441 3.7145474255084983 3.7179097086191177 330.02280321717262
|
||||
leaf_count=11674 16 18 18 1844
|
||||
internal_value=0.000884212 0.00114235 -0.13063 0.00149578
|
||||
internal_weight=2785.78 2782 7.43246 2774.57
|
||||
internal_count=13570 13554 36 13518
|
||||
is_linear=0
|
||||
shrinkage=0.104222
|
||||
|
||||
|
||||
Tree=14
|
||||
num_leaves=8
|
||||
num_cat=0
|
||||
split_feature=35 136 1 3 120 119 137
|
||||
split_gain=12.9995 12.1396 9.67922 10.2069 8.87434 8.76233 7.82142
|
||||
threshold=3.4050000000000007 3.1500000000000008 1408.1043210310934 1436.2136975970254 1.4145000000000001 0.55000000000000016 12.850000000000003
|
||||
decision_type=2 2 2 2 2 2 2
|
||||
left_child=1 5 3 -2 -3 -1 -4
|
||||
right_child=2 4 6 -5 -6 -7 -8
|
||||
leaf_value=-0.065982224130305259 -0.028897601514020019 -0.010800039930687538 0.014598348000302748 0.051779760894765055 0.0040416852382515198 0.035952394418601007 -0.011457004864636652
|
||||
leaf_weight=9.93890532851219 20.277691304683685 1238.979572609067 441.32176646590233 106.29307742416859 676.56466819345951 115.69292894005775 174.66667002439499
|
||||
leaf_count=46 107 5755 2341 568 3282 548 923
|
||||
internal_value=0.000796137 -0.00349957 0.0126042 0.0388534 -0.00555799 0.0278839 0.00721016
|
||||
internal_weight=2783.74 2041.18 742.559 126.571 1915.54 125.632 615.988
|
||||
internal_count=13570 9631 3939 675 9037 594 3264
|
||||
is_linear=0
|
||||
shrinkage=0.104222
|
||||
|
||||
|
||||
Tree=15
|
||||
num_leaves=8
|
||||
num_cat=0
|
||||
split_feature=94 53 50 125 104 49 120
|
||||
split_gain=15.421 19.7004 11.5702 10.8982 8.46618 7.68903 0.284565
|
||||
threshold=17.550000000000004 3.5150000000000001 2.0050000000000003 0.45000000000000007 0.45750000000000007 1.8950000000000002 2.4365000000000001
|
||||
decision_type=2 2 2 2 2 2 2
|
||||
left_child=2 5 3 -1 -3 -2 -4
|
||||
right_child=1 4 6 -5 -6 -7 -8
|
||||
leaf_value=-0.040237205117941127 0.029193543717025244 -0.027898057080941927 0.13173751892789104 -0.0049749588986612815 -0.15821400423700094 0.0070086816806797644 0.089170490566897243
|
||||
leaf_weight=101.41680394113064 210.2422444075346 11.58236499130726 5.4282936453819257 1551.7522683441639 10.168696627020834 879.85487067699432 2.6328659802675247
|
||||
leaf_count=475 1114 49 33 7519 43 4276 17
|
||||
internal_value=-0.000172415 0.00932761 -0.00653072 -0.00713841 -0.0888634 0.0112876 0.117955
|
||||
internal_weight=2773.08 1111.85 1661.23 1653.17 21.7511 1090.1 8.06116
|
||||
internal_count=13526 5482 8044 7994 92 5390 50
|
||||
is_linear=0
|
||||
shrinkage=0.104222
|
||||
|
||||
|
||||
Tree=16
|
||||
num_leaves=7
|
||||
num_cat=0
|
||||
split_feature=54 49 149 143 133 120
|
||||
split_gain=13.5064 12.5454 12.7609 9.52104 4.64479 6.69198
|
||||
threshold=1.6950000000000001 4.1950000000000012 0.53851874003189804 -2.5639999999999996 1.2110000000000001 3.0500000000000003
|
||||
decision_type=2 2 2 2 2 2
|
||||
left_child=1 3 -3 -1 -2 -6
|
||||
right_child=4 2 -4 -5 5 -7
|
||||
leaf_value=-0.11165723984306615 -0.022805170561939463 -0.034049153155287885 0.071318390164509041 0.0013999129723307169 0.11023664759120146 -0.064625521974505357
|
||||
leaf_weight=8.1074528247117978 4.7248373478651073 177.6567175835371 13.41981780529022 2544.4961924999952 20.898206591606144 2.6760432869195929
|
||||
leaf_count=41 29 758 58 12491 132 17
|
||||
internal_value=-0.000148839 -0.000887801 -0.0266458 0.00104042 0.0714722 0.0903692
|
||||
internal_weight=2771.98 2743.68 191.077 2552.6 28.2991 23.5742
|
||||
internal_count=13526 13348 816 12532 178 149
|
||||
is_linear=0
|
||||
shrinkage=0.104222
|
||||
|
||||
|
||||
Tree=17
|
||||
num_leaves=8
|
||||
num_cat=0
|
||||
split_feature=86 89 86 49 89 92 95
|
||||
split_gain=12.3529 16.6857 29.867 13.2745 12.903 19.095 7.28162
|
||||
threshold=2.9298029556650254 1.0000000180025095e-35 1.8603896103896107 3.3850000000000002 0.27000000000000007 -0.62124999999999986 8.25
|
||||
decision_type=2 2 2 2 2 2 2
|
||||
left_child=1 3 -3 -1 6 -6 -2
|
||||
right_child=4 2 -4 -5 5 -7 -8
|
||||
leaf_value=0.0076740310795042049 0.058197093639699833 -0.094247277907232085 -0.010401082561217358 -0.014939641022089629 -0.10188514738956803 0.012348640807380216 -0.023647947189664047
|
||||
leaf_weight=1310.5064302459359 93.891582764685154 49.551189616322517 670.03426378965378 359.25059229135513 16.932199478149414 257.1773796081543 13.500689059495924
|
||||
leaf_count=6543 499 228 3182 1562 87 1350 75
|
||||
internal_value=-0.000128236 -0.00290891 -0.016176 0.00280861 0.0172867 0.00528897 0.0479069
|
||||
internal_weight=2770.84 2389.34 719.585 1669.76 381.502 274.11 107.392
|
||||
internal_count=13526 11515 3410 8105 2011 1437 574
|
||||
is_linear=0
|
||||
shrinkage=0.104222
|
||||
|
||||
|
||||
Tree=18
|
||||
num_leaves=8
|
||||
num_cat=0
|
||||
split_feature=149 56 148 55 148 53 92
|
||||
split_gain=15.7381 11.9383 11.3441 10.9087 9.49514 8.82734 5.7139
|
||||
threshold=0.53433908520280893 1.7550000000000001 0.26796584848230992 1.905 0.24185130950380487 2.1250000000000004 -0.0099999999999999482
|
||||
decision_type=2 2 2 2 2 2 2
|
||||
left_child=1 4 -3 5 -1 -2 -5
|
||||
right_child=3 2 -4 6 -6 -7 -8
|
||||
leaf_value=-0.054435660661113758 0.0153280247340685 0.02819107947162831 -0.017585503842290271 -0.01840079708748131 -0.010724207322160621 -0.0015876644117591623 0.10180483948952145
|
||||
leaf_weight=56.825720146298409 617.56049958616495 165.87729875743389 91.08600726723671 5.4966678321361568 1074.6435647159815 732.60994145274162 19.602076262235641
|
||||
leaf_count=283 3334 829 448 27 4917 3565 97
|
||||
internal_value=-0.000486987 -0.00831442 0.0119646 0.00741537 -0.01292 0.00614953 0.0754826
|
||||
internal_weight=2763.7 1388.43 256.963 1375.27 1131.47 1350.17 25.0987
|
||||
internal_count=13500 6477 1277 7023 5200 6899 124
|
||||
is_linear=0
|
||||
shrinkage=0.104222
|
||||
|
||||
|
||||
Tree=19
|
||||
num_leaves=8
|
||||
num_cat=0
|
||||
split_feature=149 55 135 1 27 115 54
|
||||
split_gain=12.6285 9.13776 8.91602 12.9607 10.4883 7.46851 5.43256
|
||||
threshold=0.53433908520280893 1.905 0.30550000000000005 1494.2870431281676 0.50845238095238099 2.3665000000000007 1.3650000000000004
|
||||
decision_type=2 2 2 2 2 2 2
|
||||
left_child=2 5 3 -1 -4 -2 -3
|
||||
right_child=1 6 4 -5 -6 -7 -8
|
||||
leaf_value=0.00092966247827827358 0.0028048803388890822 0.099877401856329082 -0.0069739024068449437 -0.021042388628781803 0.03967412519492558 0.027656530085268364 -0.011168774035797255
|
||||
leaf_weight=536.86709239333868 1198.4237125739455 17.76616628468037 128.78820982575417 638.33134125173092 88.204329490661621 147.51364935934544 6.5457842200994483
|
||||
leaf_count=2501 6137 84 598 2961 417 762 40
|
||||
internal_value=-0.000430022 0.0066729 -0.007421 -0.0110049 0.0119887 0.00552878 0.0699864
|
||||
internal_weight=2762.44 1370.25 1392.19 1175.2 216.993 1345.94 24.312
|
||||
internal_count=13500 7023 6477 5462 1015 6899 124
|
||||
is_linear=0
|
||||
shrinkage=0.104222
|
||||
|
||||
|
||||
Tree=20
|
||||
num_leaves=8
|
||||
num_cat=0
|
||||
split_feature=86 29 98 85 111 54 1
|
||||
split_gain=13.4852 11.6743 11.2269 10.3561 7.51975 9.02896 6.85752
|
||||
threshold=3.3717607973421928 3.8452380952380958 0.53589743589743599 -0.45393665158371038 1.7500000000000002 1.3250000000000004 1430.9302281483838
|
||||
decision_type=2 2 2 2 2 2 2
|
||||
left_child=1 2 -1 -3 6 -6 -2
|
||||
right_child=4 3 -4 -5 5 -7 -8
|
||||
leaf_value=0.010161271773601144 -0.030831897558424644 -0.065476683381506764 -0.016436452248208454 0.004566632375329463 -0.017458853996767588 0.051773327327659352 0.10280712840651235
|
||||
leaf_weight=220.18627671897411 5.0186129659414318 23.250052616000175 793.86604422330856 1612.9528871029615 39.005949392914772 43.020874515175819 24.570655010640621
|
||||
leaf_count=1065 29 113 3791 7879 209 264 150
|
||||
internal_value=-0.000380007 -0.0018746 -0.0106611 0.00357096 0.0351053 0.0188533 0.0801381
|
||||
internal_weight=2761.87 2650.26 1014.05 1636.2 111.616 82.0268 29.5893
|
||||
internal_count=13500 12848 4856 7992 652 473 179
|
||||
is_linear=0
|
||||
shrinkage=0.104222
|
||||
|
||||
|
||||
Tree=21
|
||||
num_leaves=7
|
||||
num_cat=0
|
||||
split_feature=96 132 136 141 123 120
|
||||
split_gain=13.2003 16.5388 14.2713 13.2653 11.1928 9.3524
|
||||
threshold=4.5000000000000009 0.64600000000000013 7.8500000000000005 -2.3269999999999995 0.73000000000000009 3.1835000000000004
|
||||
decision_type=2 2 2 2 2 2
|
||||
left_child=1 2 -1 -2 -3 -5
|
||||
right_child=3 4 -4 5 -6 -7
|
||||
leaf_value=-0.060960557629992744 -0.18516760103419116 0.033438636082733196 -0.0037890386549494811 0.011554321509567651 -0.0045660462097220139 -0.095189759824081052
|
||||
leaf_weight=104.0578725785017 3.772067964076995 88.859501846134663 87.140555322170258 867.02598301321268 1593.5797092542052 8.9989516139030439
|
||||
leaf_count=484 19 416 405 4401 7693 51
|
||||
internal_value=-0.000914735 -0.00585967 -0.0349056 0.00961608 -0.00255862 0.0104568
|
||||
internal_weight=2753.43 1873.64 191.198 879.797 1682.44 876.025
|
||||
internal_count=13469 8998 889 4471 8109 4452
|
||||
is_linear=0
|
||||
shrinkage=0.104222
|
||||
|
||||
|
||||
Tree=22
|
||||
num_leaves=8
|
||||
num_cat=0
|
||||
split_feature=86 96 132 53 17 141 14
|
||||
split_gain=11.7147 10.9619 13.1395 10.1877 3.25091 5.30827 0.81688
|
||||
threshold=3.9466666666666668 4.5000000000000009 0.64600000000000013 2.0250000000000004 1.0000000180025095e-35 0.45950000000000008 0.63333333333333341
|
||||
decision_type=2 2 2 2 2 2 2
|
||||
left_child=1 2 -1 -3 5 -2 -6
|
||||
right_child=4 3 -4 -5 6 -7 -8
|
||||
leaf_value=-0.03177439277102298 0.091437039897303479 0.024857814401883283 -0.0029740838488449607 0.00058337476930965298 0.07050981561257999 -0.054418786094624118 0.13381725959363758
|
||||
leaf_weight=191.83759662508965 7.081265255808832 274.07831323891878 1669.0744777023792 596.54252921044827 3.1656967699527723 4.3793987929821014 7.6569998264312744
|
||||
leaf_count=880 41 1511 8020 2927 18 26 46
|
||||
internal_value=-0.000813155 -0.00142725 -0.00594319 0.00822535 0.0744319 0.0357031 0.115377
|
||||
internal_weight=2753.82 2731.53 1860.91 870.621 22.2834 11.4607 10.8227
|
||||
internal_count=13469 13338 8900 4438 131 67 64
|
||||
is_linear=0
|
||||
shrinkage=0.104222
|
||||
|
||||
|
||||
Tree=23
|
||||
num_leaves=8
|
||||
num_cat=0
|
||||
split_feature=54 86 91 40 130 51 10
|
||||
split_gain=12.222 10.6549 10.0211 7.99429 8.88031 6.65012 3.52018
|
||||
threshold=1.5750000000000004 3.9115384615384619 0.46333333333333332 1.4550000000000003 3.4720000000000004 1.0650000000000002 1.2583333333333335
|
||||
decision_type=2 2 2 2 2 2 2
|
||||
left_child=1 2 -1 4 -2 -5 -3
|
||||
right_child=3 6 -4 5 -6 -7 -8
|
||||
leaf_value=-0.0022469666970611943 0.016170707495608474 0.01400989076087111 -0.12852821652405796 0.092762096676041614 -0.16668567356812367 0.014555154480083921 0.10191215472125625
|
||||
leaf_weight=2637.7453966140747 32.849811799824238 7.6601853668689754 6.834140643477439 32.402883179485798 3.1530902385711661 18.586706772446632 13.987728200852869
|
||||
leaf_count=12765 209 44 31 200 20 112 88
|
||||
internal_value=-0.000723185 -0.00197755 -0.00257376 0.0377177 0.000116917 0.0642621 0.0708273
|
||||
internal_weight=2753.22 2666.23 2644.58 86.9925 36.0029 50.9896 21.6479
|
||||
internal_count=13469 12928 12796 541 229 312 132
|
||||
is_linear=0
|
||||
shrinkage=0.104222
|
||||
|
||||
|
||||
Tree=24
|
||||
num_leaves=7
|
||||
num_cat=0
|
||||
split_feature=49 137 127 10 121 40
|
||||
split_gain=12.6198 16.4964 12.4085 9.96103 9.53044 3.42942
|
||||
threshold=3.3850000000000002 2.8500000000000001 1.4365000000000003 4.3000000000000007 2.7750000000000004 2.7850000000000006
|
||||
decision_type=2 2 2 2 2 2
|
||||
left_child=3 5 -3 4 -1 -2
|
||||
right_child=1 2 -4 -5 -6 -7
|
||||
leaf_value=0.0033265147337535523 -0.2293764266460791 -0.0035570344953408817 -0.040820787979045939 -0.15680120567767367 0.077284263807364459 -0.095337183505289247
|
||||
leaf_weight=2143.9148783534765 3.9466704875230771 459.63925896584988 123.04180367290974 4.1842633336782447 19.085369817912579 4.4060309380292892
|
||||
leaf_count=10807 17 1977 532 22 100 19
|
||||
internal_value=-1.32262e-05 -0.0135124 -0.0114264 0.00366831 0.00397942 -0.158858
|
||||
internal_weight=2758.22 591.034 582.681 2167.18 2163 8.3527
|
||||
internal_count=13474 2545 2509 10929 10907 36
|
||||
is_linear=0
|
||||
shrinkage=0.104222
|
||||
|
||||
|
||||
Tree=25
|
||||
num_leaves=8
|
||||
num_cat=0
|
||||
split_feature=49 6 94 56 127 104 7
|
||||
split_gain=10.0943 10.6231 11.9381 10.4719 9.83816 10.0485 9.11168
|
||||
threshold=3.3850000000000002 1485.1782059621762 17.550000000000004 2.1050000000000004 1.4365000000000003 0.42950000000000005 3.0523414722088096
|
||||
decision_type=2 2 2 2 2 2 2
|
||||
left_child=1 3 -3 -1 6 -6 -2
|
||||
right_child=4 2 -4 -5 5 -7 -8
|
||||
leaf_value=0.014478237478344286 -0.015463014973786505 -0.0073632514981839545 0.0096110031081186563 0.078530598053871026 -0.0086524748202200501 -0.067456935030922358 0.015332839885013407
|
||||
leaf_weight=308.73068219423294 309.28241093456745 1017.4353533536196 807.05070595443249 30.452116876840591 63.547168910503387 62.713204026222229 157.49458535015583
|
||||
leaf_count=1561 1327 5053 4143 172 271 269 678
|
||||
internal_value=-7.78723e-06 0.00329399 0.000145221 0.0202309 -0.012054 -0.0378634 -0.00507213
|
||||
internal_weight=2756.71 2163.67 1824.49 339.183 593.037 126.26 466.777
|
||||
internal_count=13474 10929 9196 1733 2545 540 2005
|
||||
is_linear=0
|
||||
shrinkage=0.104222
|
||||
|
||||
|
||||
Tree=26
|
||||
num_leaves=7
|
||||
num_cat=0
|
||||
split_feature=86 78 86 86 106 45
|
||||
split_gain=11.0807 19.2113 13.2975 16.5366 9.93615 9.16974
|
||||
threshold=2.6909814323607431 3.2071428571428577 1.0000000180025095e-35 1.8603896103896107 0.59050000000000014 1.1950000000000001
|
||||
decision_type=2 2 2 2 2 2
|
||||
left_child=2 4 5 -4 -2 -1
|
||||
right_child=1 -3 3 -5 -6 -7
|
||||
leaf_value=0.0057285473288771786 0.034425187503696976 -0.17261249206353166 -0.072289780038289356 -0.011539802081962551 0.0051253977786566941 -0.012137044156937154
|
||||
leaf_weight=1235.6497117057443 188.36661138385534 5.9902653545141211 54.218099623918533 475.07240612059832 377.99835128337145 417.49042452871799
|
||||
leaf_count=6255 973 33 239 2206 1968 1800
|
||||
internal_value=-1.60817e-06 0.0129056 -0.00338664 -0.0177639 0.0148706 0.00121666
|
||||
internal_weight=2754.79 572.355 2182.43 529.291 566.365 1653.14
|
||||
internal_count=13474 2974 10500 2445 2941 8055
|
||||
is_linear=0
|
||||
shrinkage=0.104222
|
||||
|
||||
|
||||
Tree=27
|
||||
num_leaves=8
|
||||
num_cat=0
|
||||
split_feature=148 141 115 107 104 4 134
|
||||
split_gain=10.1298 10.8135 12.2053 9.7232 15.8813 8.97442 7.29505
|
||||
threshold=0.17399267399267399 -0.79299999999999993 1.2250000000000003 0.40950000000000003 0.41250000000000003 1460.7202799166059 -0.30549999999999994
|
||||
decision_type=2 2 2 2 2 2 2
|
||||
left_child=3 5 -3 6 -5 -2 -1
|
||||
right_child=1 2 -4 4 -6 -7 -8
|
||||
leaf_value=-0.10259253098907423 -0.03269054108208802 -0.0021082423175915925 0.015308582355162925 -0.13734624687852556 0.036154293999346325 -0.0035202193944307817 0.042506627852751083
|
||||
leaf_weight=4.9914454817771938 183.7718341127038 1603.1649219617248 600.83727415651083 20.186357498168949 7.9962391257286063 304.1783049851656 15.20464117079973
|
||||
leaf_count=25 894 7830 2977 102 43 1483 78
|
||||
internal_value=-0.0013178 -0.00046824 0.00263984 -0.0485807 -0.0881234 -0.0145068 0.00661435
|
||||
internal_weight=2740.33 2691.95 2204 48.3787 28.1826 487.95 20.1961
|
||||
internal_count=13432 13184 10807 248 145 2377 103
|
||||
is_linear=0
|
||||
shrinkage=0.104222
|
||||
|
||||
|
||||
Tree=28
|
||||
num_leaves=8
|
||||
num_cat=0
|
||||
split_feature=86 134 41 106 29 91 3
|
||||
split_gain=10.2371 10.3112 10.5364 10.1993 11.3955 9.81442 8.99213
|
||||
threshold=2.864250614250615 -0.43849999999999995 2.1050000000000004 0.95450000000000013 1.3875000000000002 -0.0054166666666666486 1670.9226550788155
|
||||
decision_type=2 2 2 2 2 2 2
|
||||
left_child=3 2 -2 5 -5 -1 -3
|
||||
right_child=1 6 -4 4 -6 -7 -8
|
||||
leaf_value=-0.015667034131642239 -0.063328583368563382 0.021380817595676553 0.036600112461665557 -0.21945686012767968 -0.026560041934415114 0.0014656039323343243 -0.12083840209161807
|
||||
leaf_weight=461.02843705564737 34.460375130176544 373.77398503571749 17.162777505815029 3.4011466801166526 135.53743974119425 1711.2739861980081 4.8830340132117263
|
||||
leaf_count=2173 176 1994 103 17 658 8282 29
|
||||
internal_value=-0.0011736 0.0135866 -0.0301054 -0.00392153 -0.0312966 -0.00217051 0.0195438
|
||||
internal_weight=2741.52 430.28 51.6232 2311.24 138.939 2172.3 378.657
|
||||
internal_count=13432 2302 279 11130 675 10455 2023
|
||||
is_linear=0
|
||||
shrinkage=0.104222
|
||||
|
||||
|
||||
Tree=29
|
||||
num_leaves=8
|
||||
num_cat=0
|
||||
split_feature=12 84 115 141 136 11 110
|
||||
split_gain=9.65816 11.3219 10.1518 11.5361 10.0734 9.44977 6.86501
|
||||
threshold=1.0000000180025095e-35 0.017970779220779203 1.2915000000000003 -0.60349999999999981 3.6500000000000008 2.1083333333333338 27.750000000000004
|
||||
decision_type=2 2 2 2 2 2 2
|
||||
left_child=1 6 4 -4 -2 -3 -1
|
||||
right_child=2 5 3 -5 -6 -7 -8
|
||||
leaf_value=0.01695486197013276 0.018272032851518873 -0.047853351303007219 -0.020306195413698006 0.015936397489881831 -0.011004436565708025 0.099082325881264394 -0.086337197330871959
|
||||
leaf_weight=520.42061326652765 139.83185759186745 50.192012257874012 122.51054417341948 430.93686553835869 1465.7739738970995 5.2428159564733496 7.0750899761915198
|
||||
leaf_count=2585 694 257 602 2140 7090 29 35
|
||||
internal_value=-0.00104481 0.0108592 -0.00425885 0.00791357 -0.00845466 -0.0339422 0.0155679
|
||||
internal_weight=2741.98 582.931 2159.05 553.447 1605.61 55.4348 527.496
|
||||
internal_count=13432 2906 10526 2742 7784 286 2620
|
||||
is_linear=0
|
||||
shrinkage=0.104222
|
||||
|
||||
|
||||
Tree=30
|
||||
num_leaves=8
|
||||
num_cat=0
|
||||
split_feature=120 37 38 38 105 85 100
|
||||
split_gain=12.0364 15.2453 11.9342 13.3835 10.0958 10.0578 10.0546
|
||||
threshold=1.4145000000000001 0.42084520042422408 0.23906113829447367 0.14593794095256588 1.1225000000000003 -0.37141065830721004 1.3650000000000002
|
||||
decision_type=2 2 2 2 2 2 2
|
||||
left_child=2 6 3 -1 -3 -4 -2
|
||||
right_child=1 4 5 -5 -6 -7 -8
|
||||
leaf_value=-0.12038760226397151 -0.0090962232080863334 0.0034107878159782353 -0.060847190395801561 0.022337300771774038 -0.044228760615057698 -0.0060312321358531873 0.030222541925148897
|
||||
leaf_weight=7.3577561527490607 84.787891268730164 592.17002998292446 37.370823763310909 228.1118380650878 52.602927520871162 1330.5593820437789 423.39983003586531
|
||||
leaf_count=43 418 3057 171 1205 280 6147 2165
|
||||
internal_value=0.00204154 0.0101633 -0.00379863 0.0178729 -0.00047641 -0.00752917 0.0236624
|
||||
internal_weight=2756.36 1152.96 1603.4 235.47 644.773 1367.93 508.188
|
||||
internal_count=13486 5920 7566 1248 3337 6318 2583
|
||||
is_linear=0
|
||||
shrinkage=0.104222
|
||||
|
||||
|
||||
Tree=31
|
||||
num_leaves=8
|
||||
num_cat=0
|
||||
split_feature=86 86 86 1 127 31 122
|
||||
split_gain=10.4188 10.4465 11.9432 10.3703 15.906 10.0392 7.58651
|
||||
threshold=2.6939799331103687 1.0000000180025095e-35 1.8603896103896107 1467.9553804385575 2.1340000000000003 0.22980739360049704 2.3515000000000006
|
||||
decision_type=2 2 2 2 2 2 2
|
||||
left_child=1 6 -3 5 -5 -2 -1
|
||||
right_child=3 2 -4 4 -6 -7 -8
|
||||
leaf_value=0.0014256124570572787 0.0018941270616234518 -0.058645654322910731 -0.0086447877444392647 0.02733676763309234 -0.12590863729503893 -0.082423971345859881 0.042831761918242235
|
||||
leaf_weight=1608.7813730537891 166.28092505782843 58.273459360003471 473.13907171785831 372.2649027556181 7.4961845725774756 16.887378051877022 49.535993173718452
|
||||
leaf_count=7823 854 251 2181 1963 43 94 277
|
||||
internal_value=0.00183871 -0.00141235 -0.0141287 0.0144848 0.0243088 -0.0058835 0.0026627
|
||||
internal_weight=2752.66 2189.73 531.413 562.929 379.761 183.168 1658.32
|
||||
internal_count=13486 10532 2432 2954 2006 948 8100
|
||||
is_linear=0
|
||||
shrinkage=0.104222
|
||||
|
||||
|
||||
Tree=32
|
||||
num_leaves=8
|
||||
num_cat=0
|
||||
split_feature=93 144 112 94 104 93 120
|
||||
split_gain=10.1721 16.1941 11.2364 13.1765 9.78364 8.96284 8.48134
|
||||
threshold=20.550000000000004 0.013450000000000002 0.22750000000000004 5.5500000000000007 1.2865000000000002 25.050000000000004 2.0500000000000003
|
||||
decision_type=2 2 2 2 2 2 2
|
||||
left_child=2 4 6 -4 -2 -3 -1
|
||||
right_child=1 5 3 -5 -6 -7 -8
|
||||
leaf_value=0.0034609471658556285 0.020050548549636428 -0.009082334773132807 -0.11004659616173273 -0.007594432301873808 -0.021486441758403186 -0.13291664093499478 0.038221762386595907
|
||||
leaf_weight=820.33589915931225 594.67776323109865 13.120999380946161 13.791079476475714 1142.1561977639794 68.70460732281208 12.293409973382948 84.046657353639603
|
||||
leaf_count=3903 3054 71 65 5495 382 70 446
|
||||
internal_value=0.0016558 0.0126202 -0.00200979 -0.00881763 0.0157484 -0.0690103 0.00669173
|
||||
internal_weight=2749.13 688.797 2060.33 1155.95 663.382 25.4144 904.383
|
||||
internal_count=13486 3577 9909 5560 3436 141 4349
|
||||
is_linear=0
|
||||
shrinkage=0.104222
|
||||
|
||||
|
||||
Tree=33
|
||||
num_leaves=5
|
||||
num_cat=0
|
||||
split_feature=91 141 123 7
|
||||
split_gain=9.64033 9.54985 10.2025 8.96435
|
||||
threshold=0.46333333333333332 -0.77399999999999991 2.2565000000000004 33.104396552584838
|
||||
decision_type=2 2 2 2
|
||||
left_child=1 3 -3 -1
|
||||
right_child=-2 2 -4 -5
|
||||
leaf_value=-0.015732035707484552 -0.12133869515037762 0.0038526132779362976 0.057630556320633561 0.031446003016811871
|
||||
leaf_weight=450.80555958300829 6.9056807309389105 2188.2216669544578 38.992827542126179 48.440718069672585
|
||||
leaf_count=2162 31 10769 213 250
|
||||
internal_value=0.00156231 0.00187402 0.00479437 -0.011154
|
||||
internal_weight=2733.37 2726.46 2227.21 499.246
|
||||
internal_count=13425 13394 10982 2412
|
||||
is_linear=0
|
||||
shrinkage=0.104222
|
||||
|
||||
|
||||
Tree=34
|
||||
num_leaves=5
|
||||
num_cat=0
|
||||
split_feature=45 98 80 123
|
||||
split_gain=8.19305 8.37131 12.6079 10.3403
|
||||
threshold=1.6150000000000004 0.58114035087719318 0.043836402184014855 2.1715000000000004
|
||||
decision_type=2 2 2 2
|
||||
left_child=1 3 -3 -1
|
||||
right_child=-2 2 -4 -5
|
||||
leaf_value=0.008356015193296689 -0.14816646007318571 -0.033602776052007143 0.00019618273212945178 0.070940775625300892
|
||||
leaf_weight=749.49989224970341 3.9628616422414771 128.32899699360132 1819.463518589735 29.807201541960239
|
||||
leaf_count=3713 16 704 8830 162
|
||||
internal_value=0.00140384 0.0016217 -0.00203079 0.0107506
|
||||
internal_weight=2731.06 2727.1 1947.79 779.307
|
||||
internal_count=13425 13409 9534 3875
|
||||
is_linear=0
|
||||
shrinkage=0.104222
|
||||
|
||||
|
||||
end of trees
|
||||
|
||||
feature_importances:
|
||||
Column_86=18
|
||||
Column_49=9
|
||||
Column_53=8
|
||||
Column_41=6
|
||||
Column_54=6
|
||||
Column_120=6
|
||||
Column_141=6
|
||||
Column_45=5
|
||||
Column_91=5
|
||||
Column_104=5
|
||||
Column_1=4
|
||||
Column_94=4
|
||||
Column_96=4
|
||||
Column_132=4
|
||||
Column_136=4
|
||||
Column_148=4
|
||||
Column_3=3
|
||||
Column_4=3
|
||||
Column_6=3
|
||||
Column_11=3
|
||||
Column_30=3
|
||||
Column_35=3
|
||||
Column_38=3
|
||||
Column_48=3
|
||||
Column_50=3
|
||||
Column_85=3
|
||||
Column_92=3
|
||||
Column_105=3
|
||||
Column_115=3
|
||||
Column_123=3
|
||||
Column_127=3
|
||||
Column_130=3
|
||||
Column_133=3
|
||||
Column_149=3
|
||||
Column_150=3
|
||||
Column_0=2
|
||||
Column_7=2
|
||||
Column_10=2
|
||||
Column_16=2
|
||||
Column_29=2
|
||||
Column_40=2
|
||||
Column_44=2
|
||||
Column_51=2
|
||||
Column_52=2
|
||||
Column_55=2
|
||||
Column_56=2
|
||||
Column_89=2
|
||||
Column_93=2
|
||||
Column_98=2
|
||||
Column_100=2
|
||||
Column_102=2
|
||||
Column_106=2
|
||||
Column_110=2
|
||||
Column_111=2
|
||||
Column_122=2
|
||||
Column_134=2
|
||||
Column_135=2
|
||||
Column_137=2
|
||||
Column_143=2
|
||||
Column_144=2
|
||||
Column_147=2
|
||||
Column_8=1
|
||||
Column_12=1
|
||||
Column_14=1
|
||||
Column_17=1
|
||||
Column_21=1
|
||||
Column_27=1
|
||||
Column_28=1
|
||||
Column_31=1
|
||||
Column_34=1
|
||||
Column_37=1
|
||||
Column_39=1
|
||||
Column_46=1
|
||||
Column_47=1
|
||||
Column_78=1
|
||||
Column_80=1
|
||||
Column_84=1
|
||||
Column_95=1
|
||||
Column_99=1
|
||||
Column_107=1
|
||||
Column_112=1
|
||||
Column_119=1
|
||||
Column_121=1
|
||||
Column_125=1
|
||||
Column_126=1
|
||||
Column_145=1
|
||||
|
||||
parameters:
|
||||
[boosting: gbdt]
|
||||
[objective: binary]
|
||||
[metric: binary_logloss]
|
||||
[tree_learner: serial]
|
||||
[device_type: cpu]
|
||||
[data_sample_strategy: bagging]
|
||||
[data: ]
|
||||
[valid: ]
|
||||
[num_iterations: 1500]
|
||||
[learning_rate: 0.104222]
|
||||
[num_leaves: 8]
|
||||
[num_threads: 4]
|
||||
[seed: 42]
|
||||
[deterministic: 0]
|
||||
[force_col_wise: 0]
|
||||
[force_row_wise: 0]
|
||||
[histogram_pool_size: -1]
|
||||
[max_depth: 3]
|
||||
[min_data_in_leaf: 17]
|
||||
[min_sum_hessian_in_leaf: 0.001]
|
||||
[bagging_fraction: 0.709098]
|
||||
[pos_bagging_fraction: 1]
|
||||
[neg_bagging_fraction: 1]
|
||||
[bagging_freq: 3]
|
||||
[bagging_seed: 400]
|
||||
[bagging_by_query: 0]
|
||||
[feature_fraction: 0.58888]
|
||||
[feature_fraction_bynode: 1]
|
||||
[feature_fraction_seed: 30056]
|
||||
[extra_trees: 0]
|
||||
[extra_seed: 12879]
|
||||
[early_stopping_round: 0]
|
||||
[early_stopping_min_delta: 0]
|
||||
[first_metric_only: 0]
|
||||
[max_delta_step: 0]
|
||||
[lambda_l1: 7.81041e-07]
|
||||
[lambda_l2: 0.00942891]
|
||||
[linear_lambda: 0]
|
||||
[min_gain_to_split: 0]
|
||||
[drop_rate: 0.1]
|
||||
[max_drop: 50]
|
||||
[skip_drop: 0.5]
|
||||
[xgboost_dart_mode: 0]
|
||||
[uniform_drop: 0]
|
||||
[drop_seed: 17869]
|
||||
[top_rate: 0.2]
|
||||
[other_rate: 0.1]
|
||||
[min_data_per_group: 100]
|
||||
[max_cat_threshold: 32]
|
||||
[cat_l2: 10]
|
||||
[cat_smooth: 10]
|
||||
[max_cat_to_onehot: 4]
|
||||
[top_k: 20]
|
||||
[monotone_constraints: ]
|
||||
[monotone_constraints_method: basic]
|
||||
[monotone_penalty: 0]
|
||||
[feature_contri: ]
|
||||
[forcedsplits_filename: ]
|
||||
[refit_decay_rate: 0.9]
|
||||
[cegb_tradeoff: 1]
|
||||
[cegb_penalty_split: 0]
|
||||
[cegb_penalty_feature_lazy: ]
|
||||
[cegb_penalty_feature_coupled: ]
|
||||
[path_smooth: 0]
|
||||
[interaction_constraints: ]
|
||||
[verbosity: -1]
|
||||
[saved_feature_importance_type: 0]
|
||||
[use_quantized_grad: 0]
|
||||
[num_grad_quant_bins: 4]
|
||||
[quant_train_renew_leaf: 0]
|
||||
[stochastic_rounding: 1]
|
||||
[linear_tree: 0]
|
||||
[max_bin: 255]
|
||||
[max_bin_by_feature: ]
|
||||
[min_data_in_bin: 3]
|
||||
[bin_construct_sample_cnt: 200000]
|
||||
[data_random_seed: 175]
|
||||
[is_enable_sparse: 1]
|
||||
[enable_bundle: 1]
|
||||
[use_missing: 1]
|
||||
[zero_as_missing: 0]
|
||||
[feature_pre_filter: 1]
|
||||
[pre_partition: 0]
|
||||
[two_round: 0]
|
||||
[header: 0]
|
||||
[label_column: ]
|
||||
[weight_column: ]
|
||||
[group_column: ]
|
||||
[ignore_column: ]
|
||||
[categorical_feature: ]
|
||||
[forcedbins_filename: ]
|
||||
[precise_float_parser: 0]
|
||||
[parser_config_file: ]
|
||||
[objective_seed: 16083]
|
||||
[num_class: 1]
|
||||
[is_unbalance: 0]
|
||||
[scale_pos_weight: 1]
|
||||
[sigmoid: 1]
|
||||
[boost_from_average: 1]
|
||||
[reg_sqrt: 0]
|
||||
[alpha: 0.9]
|
||||
[fair_c: 1]
|
||||
[poisson_max_delta_step: 0.7]
|
||||
[tweedie_variance_power: 1.5]
|
||||
[lambdarank_truncation_level: 30]
|
||||
[lambdarank_norm: 1]
|
||||
[label_gain: ]
|
||||
[lambdarank_position_bias_regularization: 0]
|
||||
[eval_at: ]
|
||||
[multi_error_top_k: 1]
|
||||
[auc_mu_weights: ]
|
||||
[num_machines: 1]
|
||||
[local_listen_port: 12400]
|
||||
[time_out: 120]
|
||||
[machine_list_filename: ]
|
||||
[machines: ]
|
||||
[gpu_platform_id: -1]
|
||||
[gpu_device_id: -1]
|
||||
[gpu_use_dp: 0]
|
||||
[num_gpu: 1]
|
||||
|
||||
end of parameters
|
||||
|
||||
pandas_categorical:null
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -20,6 +20,13 @@ from dataclasses import dataclass, field
|
||||
import xgboost as xgb
|
||||
import lightgbm as lgb
|
||||
|
||||
import sys
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
try:
|
||||
from config.config_loader import get_config as _get_cfg
|
||||
except ImportError:
|
||||
_get_cfg = None # type: ignore[assignment]
|
||||
|
||||
# CatBoost is optional
|
||||
try:
|
||||
from catboost import CatBoostClassifier
|
||||
@@ -228,7 +235,7 @@ class V25Predictor:
|
||||
print(f"[V25] Using fallback feature columns ({len(V25Predictor._FALLBACK_FEATURE_COLS)} features)")
|
||||
return V25Predictor._FALLBACK_FEATURE_COLS
|
||||
|
||||
# Model weights for ensemble
|
||||
# Model weights for ensemble (overridden from config in __init__)
|
||||
DEFAULT_WEIGHTS = {
|
||||
'xgb': 0.50,
|
||||
'lgb': 0.50,
|
||||
@@ -245,6 +252,16 @@ class V25Predictor:
|
||||
self.models = {} # market -> {'xgb': model, 'lgb': model}
|
||||
self._loaded = False
|
||||
self.FEATURE_COLS = self._load_feature_cols()
|
||||
# Load weights from config (falls back to class default 0.50/0.50)
|
||||
if _get_cfg is not None:
|
||||
try:
|
||||
cfg = _get_cfg()
|
||||
self.DEFAULT_WEIGHTS = {
|
||||
'xgb': float(cfg.get('model_ensemble.xgb_weight', 0.50)),
|
||||
'lgb': float(cfg.get('model_ensemble.lgb_weight', 0.50)),
|
||||
}
|
||||
except Exception:
|
||||
pass # keep class-level defaults
|
||||
|
||||
# All trained market models available in V25
|
||||
ALL_MARKETS = [
|
||||
@@ -275,16 +292,29 @@ class V25Predictor:
|
||||
xgb_content = f.read()
|
||||
booster = xgb.Booster()
|
||||
booster.load_model(bytearray(xgb_content, 'utf-8'))
|
||||
# Corruption detection: verify model can run a dummy prediction
|
||||
try:
|
||||
_dummy = pd.DataFrame([{col: 0.0 for col in self.FEATURE_COLS}])
|
||||
booster.predict(xgb.DMatrix(_dummy))
|
||||
self.models[market]['xgb'] = booster
|
||||
loaded_count += 1
|
||||
except Exception as _ce:
|
||||
print(f"[V25] ⚠️ XGB model for {market} failed integrity check: {_ce} — skipping")
|
||||
|
||||
# Load LightGBM (read content in Python to avoid non-ASCII path issues)
|
||||
lgb_path = os.path.join(self.models_dir, f'lgb_v25_{market}.txt')
|
||||
if os.path.exists(lgb_path) and os.path.getsize(lgb_path) > 0:
|
||||
with open(lgb_path, 'r', encoding='utf-8') as f:
|
||||
model_str = f.read()
|
||||
self.models[market]['lgb'] = lgb.Booster(model_str=model_str)
|
||||
lgb_model = lgb.Booster(model_str=model_str)
|
||||
# Corruption detection: verify model can run a dummy prediction
|
||||
try:
|
||||
_dummy = pd.DataFrame([{col: 0.0 for col in self.FEATURE_COLS}])
|
||||
lgb_model.predict(_dummy)
|
||||
self.models[market]['lgb'] = lgb_model
|
||||
loaded_count += 1
|
||||
except Exception as _ce:
|
||||
print(f"[V25] ⚠️ LGB model for {market} failed integrity check: {_ce} — skipping")
|
||||
|
||||
# Remove empty entries
|
||||
if not self.models[market]:
|
||||
@@ -306,6 +336,26 @@ class V25Predictor:
|
||||
if not self.load_models():
|
||||
raise RuntimeError("Failed to load V25 models")
|
||||
|
||||
def readiness_summary(self) -> Dict[str, Any]:
|
||||
"""Return per-market model status for health check endpoint."""
|
||||
if not self._loaded:
|
||||
self.load_models()
|
||||
market_status = {}
|
||||
for market in self.ALL_MARKETS:
|
||||
m = self.models.get(market, {})
|
||||
market_status[market] = {
|
||||
"xgb": "xgb" in m,
|
||||
"lgb": "lgb" in m,
|
||||
"ready": bool(m),
|
||||
}
|
||||
loaded_markets = [k for k, v in market_status.items() if v["ready"]]
|
||||
return {
|
||||
"fully_loaded": len(loaded_markets) == len(self.ALL_MARKETS),
|
||||
"loaded_markets": loaded_markets,
|
||||
"missing_markets": [m for m in self.ALL_MARKETS if m not in loaded_markets],
|
||||
"weights": self.DEFAULT_WEIGHTS,
|
||||
}
|
||||
|
||||
def _prepare_features(self, features: Dict[str, float]) -> pd.DataFrame:
|
||||
"""Prepare feature vector for prediction."""
|
||||
X = pd.DataFrame([{col: features.get(col, 0.0) for col in self.FEATURE_COLS}])
|
||||
@@ -563,13 +613,23 @@ class V25Predictor:
|
||||
) -> List[ValueBet]:
|
||||
"""Detect value bets based on model vs market odds."""
|
||||
value_bets = []
|
||||
min_edge = 0.05 # 5% minimum edge
|
||||
# Market-specific minimum edge thresholds
|
||||
# MS: higher variance → require more edge
|
||||
# OU/BTTS: binary markets → tighter edge acceptable
|
||||
EDGE_THRESHOLDS = {
|
||||
'MS': 0.06,
|
||||
'OU25': 0.04,
|
||||
'BTTS': 0.04,
|
||||
}
|
||||
ms_edge = EDGE_THRESHOLDS['MS']
|
||||
ou_edge = EDGE_THRESHOLDS['OU25']
|
||||
btts_edge = EDGE_THRESHOLDS['BTTS']
|
||||
|
||||
# MS value bets
|
||||
if 'ms_h' in odds and odds['ms_h'] > 0:
|
||||
implied = 1 / odds['ms_h']
|
||||
edge = home_prob - implied
|
||||
if edge > min_edge:
|
||||
if edge > ms_edge:
|
||||
value_bets.append(ValueBet(
|
||||
market_type='MS',
|
||||
pick='1',
|
||||
@@ -582,7 +642,7 @@ class V25Predictor:
|
||||
if 'ms_d' in odds and odds['ms_d'] > 0:
|
||||
implied = 1 / odds['ms_d']
|
||||
edge = draw_prob - implied
|
||||
if edge > min_edge:
|
||||
if edge > ms_edge:
|
||||
value_bets.append(ValueBet(
|
||||
market_type='MS',
|
||||
pick='X',
|
||||
@@ -595,7 +655,7 @@ class V25Predictor:
|
||||
if 'ms_a' in odds and odds['ms_a'] > 0:
|
||||
implied = 1 / odds['ms_a']
|
||||
edge = away_prob - implied
|
||||
if edge > min_edge:
|
||||
if edge > ms_edge:
|
||||
value_bets.append(ValueBet(
|
||||
market_type='MS',
|
||||
pick='2',
|
||||
@@ -609,7 +669,7 @@ class V25Predictor:
|
||||
if 'ou25_o' in odds and odds['ou25_o'] > 0:
|
||||
implied = 1 / odds['ou25_o']
|
||||
edge = over_prob - implied
|
||||
if edge > min_edge:
|
||||
if edge > ou_edge:
|
||||
value_bets.append(ValueBet(
|
||||
market_type='OU25',
|
||||
pick='Over',
|
||||
@@ -622,7 +682,7 @@ class V25Predictor:
|
||||
if 'ou25_u' in odds and odds['ou25_u'] > 0:
|
||||
implied = 1 / odds['ou25_u']
|
||||
edge = under_prob - implied
|
||||
if edge > min_edge:
|
||||
if edge > ou_edge:
|
||||
value_bets.append(ValueBet(
|
||||
market_type='OU25',
|
||||
pick='Under',
|
||||
@@ -636,7 +696,7 @@ class V25Predictor:
|
||||
if 'btts_y' in odds and odds['btts_y'] > 0:
|
||||
implied = 1 / odds['btts_y']
|
||||
edge = btts_yes_prob - implied
|
||||
if edge > min_edge:
|
||||
if edge > btts_edge:
|
||||
value_bets.append(ValueBet(
|
||||
market_type='BTTS',
|
||||
pick='Yes',
|
||||
@@ -649,7 +709,7 @@ class V25Predictor:
|
||||
if 'btts_n' in odds and odds['btts_n'] > 0:
|
||||
implied = 1 / odds['btts_n']
|
||||
edge = btts_no_prob - implied
|
||||
if edge > min_edge:
|
||||
if edge > btts_edge:
|
||||
value_bets.append(ValueBet(
|
||||
market_type='BTTS',
|
||||
pick='No',
|
||||
|
||||
@@ -0,0 +1,160 @@
|
||||
{
|
||||
"total_test": 23039,
|
||||
"thresholds": {
|
||||
"0.0": {
|
||||
"n_matches": 22227,
|
||||
"pct": 96.5,
|
||||
"markets": {
|
||||
"ms": {
|
||||
"hit_rate": 0.5363,
|
||||
"avg_roi": -0.0046,
|
||||
"total_roi": -103.02
|
||||
},
|
||||
"ou15": {
|
||||
"hit_rate": 0.7463,
|
||||
"avg_roi": 0.0144,
|
||||
"total_roi": 319.02
|
||||
},
|
||||
"ou25": {
|
||||
"hit_rate": 0.6111,
|
||||
"avg_roi": -0.006,
|
||||
"total_roi": -134.41
|
||||
},
|
||||
"ou35": {
|
||||
"hit_rate": 0.7302,
|
||||
"avg_roi": -0.014,
|
||||
"total_roi": -310.51
|
||||
},
|
||||
"btts": {
|
||||
"hit_rate": 0.5848,
|
||||
"avg_roi": 0.0031,
|
||||
"total_roi": 69.5
|
||||
}
|
||||
}
|
||||
},
|
||||
"0.1": {
|
||||
"n_matches": 23033,
|
||||
"pct": 100.0,
|
||||
"markets": {
|
||||
"ms": {
|
||||
"hit_rate": 0.546,
|
||||
"avg_roi": -0.0045,
|
||||
"total_roi": -104.38
|
||||
},
|
||||
"ou15": {
|
||||
"hit_rate": 0.7533,
|
||||
"avg_roi": 0.0145,
|
||||
"total_roi": 335.02
|
||||
},
|
||||
"ou25": {
|
||||
"hit_rate": 0.6193,
|
||||
"avg_roi": -0.0042,
|
||||
"total_roi": -96.97
|
||||
},
|
||||
"ou35": {
|
||||
"hit_rate": 0.7277,
|
||||
"avg_roi": -0.0147,
|
||||
"total_roi": -338.57
|
||||
},
|
||||
"btts": {
|
||||
"hit_rate": 0.5886,
|
||||
"avg_roi": 0.0025,
|
||||
"total_roi": 57.21
|
||||
}
|
||||
}
|
||||
},
|
||||
"0.2": {
|
||||
"n_matches": 23034,
|
||||
"pct": 100.0,
|
||||
"markets": {
|
||||
"ms": {
|
||||
"hit_rate": 0.5459,
|
||||
"avg_roi": -0.0046,
|
||||
"total_roi": -105.38
|
||||
},
|
||||
"ou15": {
|
||||
"hit_rate": 0.7533,
|
||||
"avg_roi": 0.0146,
|
||||
"total_roi": 335.26
|
||||
},
|
||||
"ou25": {
|
||||
"hit_rate": 0.6193,
|
||||
"avg_roi": -0.0043,
|
||||
"total_roi": -97.97
|
||||
},
|
||||
"ou35": {
|
||||
"hit_rate": 0.7276,
|
||||
"avg_roi": -0.0147,
|
||||
"total_roi": -339.57
|
||||
},
|
||||
"btts": {
|
||||
"hit_rate": 0.5887,
|
||||
"avg_roi": 0.0025,
|
||||
"total_roi": 57.62
|
||||
}
|
||||
}
|
||||
},
|
||||
"0.3": {
|
||||
"n_matches": 23039,
|
||||
"pct": 100.0,
|
||||
"markets": {
|
||||
"ms": {
|
||||
"hit_rate": 0.546,
|
||||
"avg_roi": -0.0045,
|
||||
"total_roi": -103.45
|
||||
},
|
||||
"ou15": {
|
||||
"hit_rate": 0.7534,
|
||||
"avg_roi": 0.0146,
|
||||
"total_roi": 335.6
|
||||
},
|
||||
"ou25": {
|
||||
"hit_rate": 0.6194,
|
||||
"avg_roi": -0.0042,
|
||||
"total_roi": -97.44
|
||||
},
|
||||
"ou35": {
|
||||
"hit_rate": 0.7277,
|
||||
"avg_roi": -0.0147,
|
||||
"total_roi": -339.26
|
||||
},
|
||||
"btts": {
|
||||
"hit_rate": 0.5887,
|
||||
"avg_roi": 0.0025,
|
||||
"total_roi": 58.61
|
||||
}
|
||||
}
|
||||
},
|
||||
"0.5": {
|
||||
"n_matches": 23039,
|
||||
"pct": 100.0,
|
||||
"markets": {
|
||||
"ms": {
|
||||
"hit_rate": 0.546,
|
||||
"avg_roi": -0.0045,
|
||||
"total_roi": -103.45
|
||||
},
|
||||
"ou15": {
|
||||
"hit_rate": 0.7534,
|
||||
"avg_roi": 0.0146,
|
||||
"total_roi": 335.6
|
||||
},
|
||||
"ou25": {
|
||||
"hit_rate": 0.6194,
|
||||
"avg_roi": -0.0042,
|
||||
"total_roi": -97.44
|
||||
},
|
||||
"ou35": {
|
||||
"hit_rate": 0.7277,
|
||||
"avg_roi": -0.0147,
|
||||
"total_roi": -339.26
|
||||
},
|
||||
"btts": {
|
||||
"hit_rate": 0.5887,
|
||||
"avg_roi": 0.0025,
|
||||
"total_roi": 58.61
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,5 @@
|
||||
[
|
||||
{
|
||||
"market": "MS-Ev",
|
||||
"min_edge": 0.02,
|
||||
"n":
|
||||
@@ -0,0 +1,267 @@
|
||||
{
|
||||
"generated_at": "2026-05-15T21:40:57.995899",
|
||||
"matches_processed": 3000,
|
||||
"matches_skipped": 0,
|
||||
"markets": {
|
||||
"MS": {
|
||||
"overall_accuracy": 54.97,
|
||||
"total_matches": 3000,
|
||||
"by_confidence_band": {
|
||||
"<50%": {
|
||||
"accuracy": 38.87,
|
||||
"count": 759,
|
||||
"mean_confidence": 45.58
|
||||
},
|
||||
"50-65%": {
|
||||
"accuracy": 52.62,
|
||||
"count": 1300,
|
||||
"mean_confidence": 57.19
|
||||
},
|
||||
"65-75%": {
|
||||
"accuracy": 66.99,
|
||||
"count": 624,
|
||||
"mean_confidence": 69.49
|
||||
},
|
||||
"75%+": {
|
||||
"accuracy": 79.5,
|
||||
"count": 317,
|
||||
"mean_confidence": 80.69
|
||||
}
|
||||
},
|
||||
"by_league": {
|
||||
"Bundesliga": {
|
||||
"accuracy": 46.77,
|
||||
"count": 62
|
||||
},
|
||||
"Ligue 1": {
|
||||
"accuracy": 58.73,
|
||||
"count": 63
|
||||
},
|
||||
"Serie A": {
|
||||
"accuracy": 56.25,
|
||||
"count": 64
|
||||
},
|
||||
"Other": {
|
||||
"accuracy": 55.03,
|
||||
"count": 2811
|
||||
}
|
||||
},
|
||||
"by_pick_direction": {
|
||||
"1": {
|
||||
"accuracy": 58.38,
|
||||
"count": 1946,
|
||||
"mean_confidence": 60.84
|
||||
},
|
||||
"2": {
|
||||
"accuracy": 48.72,
|
||||
"count": 1053,
|
||||
"mean_confidence": 56.44
|
||||
},
|
||||
"X": {
|
||||
"accuracy": 0.0,
|
||||
"count": 1,
|
||||
"mean_confidence": 56.07
|
||||
}
|
||||
}
|
||||
},
|
||||
"OU15": {
|
||||
"overall_accuracy": 74.4,
|
||||
"total_matches": 3000,
|
||||
"by_confidence_band": {
|
||||
"50-65%": {
|
||||
"accuracy": 70.97,
|
||||
"count": 62,
|
||||
"mean_confidence": 59.63
|
||||
},
|
||||
"65-75%": {
|
||||
"accuracy": 68.0,
|
||||
"count": 275,
|
||||
"mean_confidence": 71.1
|
||||
},
|
||||
"75%+": {
|
||||
"accuracy": 75.14,
|
||||
"count": 2663,
|
||||
"mean_confidence": 89.44
|
||||
}
|
||||
},
|
||||
"by_league": {
|
||||
"Bundesliga": {
|
||||
"accuracy": 67.74,
|
||||
"count": 62
|
||||
},
|
||||
"Ligue 1": {
|
||||
"accuracy": 76.19,
|
||||
"count": 63
|
||||
},
|
||||
"Serie A": {
|
||||
"accuracy": 70.31,
|
||||
"count": 64
|
||||
},
|
||||
"Other": {
|
||||
"accuracy": 74.6,
|
||||
"count": 2811
|
||||
}
|
||||
},
|
||||
"by_pick_direction": {
|
||||
"Over": {
|
||||
"accuracy": 74.4,
|
||||
"count": 3000,
|
||||
"mean_confidence": 87.14
|
||||
}
|
||||
}
|
||||
},
|
||||
"OU25": {
|
||||
"overall_accuracy": 51.77,
|
||||
"total_matches": 3000,
|
||||
"by_confidence_band": {
|
||||
"50-65%": {
|
||||
"accuracy": 49.33,
|
||||
"count": 1267,
|
||||
"mean_confidence": 57.13
|
||||
},
|
||||
"65-75%": {
|
||||
"accuracy": 54.53,
|
||||
"count": 453,
|
||||
"mean_confidence": 69.42
|
||||
},
|
||||
"75%+": {
|
||||
"accuracy": 53.2,
|
||||
"count": 1280,
|
||||
"mean_confidence": 90.2
|
||||
}
|
||||
},
|
||||
"by_league": {
|
||||
"Bundesliga": {
|
||||
"accuracy": 41.94,
|
||||
"count": 62
|
||||
},
|
||||
"Ligue 1": {
|
||||
"accuracy": 50.79,
|
||||
"count": 63
|
||||
},
|
||||
"Serie A": {
|
||||
"accuracy": 43.75,
|
||||
"count": 64
|
||||
},
|
||||
"Other": {
|
||||
"accuracy": 52.19,
|
||||
"count": 2811
|
||||
}
|
||||
},
|
||||
"by_pick_direction": {
|
||||
"Over": {
|
||||
"accuracy": 51.03,
|
||||
"count": 2432,
|
||||
"mean_confidence": 76.11
|
||||
},
|
||||
"Under": {
|
||||
"accuracy": 54.93,
|
||||
"count": 568,
|
||||
"mean_confidence": 60.17
|
||||
}
|
||||
}
|
||||
},
|
||||
"BTTS": {
|
||||
"overall_accuracy": 51.83,
|
||||
"total_matches": 3000,
|
||||
"by_confidence_band": {
|
||||
"50-65%": {
|
||||
"accuracy": 48.74,
|
||||
"count": 2214,
|
||||
"mean_confidence": 58.66
|
||||
},
|
||||
"65-75%": {
|
||||
"accuracy": 60.42,
|
||||
"count": 758,
|
||||
"mean_confidence": 68.19
|
||||
},
|
||||
"75%+": {
|
||||
"accuracy": 64.29,
|
||||
"count": 28,
|
||||
"mean_confidence": 77.44
|
||||
}
|
||||
},
|
||||
"by_league": {
|
||||
"Bundesliga": {
|
||||
"accuracy": 54.84,
|
||||
"count": 62
|
||||
},
|
||||
"Ligue 1": {
|
||||
"accuracy": 50.79,
|
||||
"count": 63
|
||||
},
|
||||
"Serie A": {
|
||||
"accuracy": 57.81,
|
||||
"count": 64
|
||||
},
|
||||
"Other": {
|
||||
"accuracy": 51.65,
|
||||
"count": 2811
|
||||
}
|
||||
},
|
||||
"by_pick_direction": {
|
||||
"No": {
|
||||
"accuracy": 50.26,
|
||||
"count": 2099,
|
||||
"mean_confidence": 61.56
|
||||
},
|
||||
"Yes": {
|
||||
"accuracy": 55.49,
|
||||
"count": 901,
|
||||
"mean_confidence": 60.51
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"calibration": {
|
||||
"ms_home": {
|
||||
"brier_score": 0.2054,
|
||||
"calibration_error": 0.0,
|
||||
"sample_count": 3000,
|
||||
"last_trained": "2026-05-15T21:40:58.026574",
|
||||
"mean_predicted": 0.4942,
|
||||
"mean_actual": 0.46
|
||||
},
|
||||
"ms_draw": {
|
||||
"brier_score": 0.1846,
|
||||
"calibration_error": 0.0,
|
||||
"sample_count": 3000,
|
||||
"last_trained": "2026-05-15T21:40:58.030886",
|
||||
"mean_predicted": 0.149,
|
||||
"mean_actual": 0.2493
|
||||
},
|
||||
"ms_away": {
|
||||
"brier_score": 0.1726,
|
||||
"calibration_error": 0.0,
|
||||
"sample_count": 3000,
|
||||
"last_trained": "2026-05-15T21:40:58.033980",
|
||||
"mean_predicted": 0.3567,
|
||||
"mean_actual": 0.2907
|
||||
},
|
||||
"ou15": {
|
||||
"brier_score": 0.1884,
|
||||
"calibration_error": 0.0,
|
||||
"sample_count": 3000,
|
||||
"last_trained": "2026-05-15T21:40:58.037204",
|
||||
"mean_predicted": 0.8714,
|
||||
"mean_actual": 0.744
|
||||
},
|
||||
"ou25": {
|
||||
"brier_score": 0.247,
|
||||
"calibration_error": 0.0,
|
||||
"sample_count": 3000,
|
||||
"last_trained": "2026-05-15T21:40:58.041152",
|
||||
"mean_predicted": 0.6924,
|
||||
"mean_actual": 0.499
|
||||
},
|
||||
"btts": {
|
||||
"brier_score": 0.2453,
|
||||
"calibration_error": 0.0,
|
||||
"sample_count": 3000,
|
||||
"last_trained": "2026-05-15T21:40:58.044344",
|
||||
"mean_predicted": 0.4506,
|
||||
"mean_actual": 0.5147
|
||||
}
|
||||
},
|
||||
"runtime_seconds": 94.1
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,40 @@
|
||||
"""
|
||||
MatchData dataclass — core data transfer object used throughout the engine.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class MatchData:
|
||||
match_id: str
|
||||
home_team_id: str
|
||||
away_team_id: str
|
||||
home_team_name: str
|
||||
away_team_name: str
|
||||
match_date_ms: int
|
||||
sport: str
|
||||
league_id: Optional[str]
|
||||
league_name: str
|
||||
referee_name: Optional[str]
|
||||
odds_data: Dict[str, float]
|
||||
home_lineup: Optional[List[str]]
|
||||
away_lineup: Optional[List[str]]
|
||||
sidelined_data: Optional[Dict[str, Any]]
|
||||
home_goals_avg: float
|
||||
home_conceded_avg: float
|
||||
away_goals_avg: float
|
||||
away_conceded_avg: float
|
||||
home_position: int
|
||||
away_position: int
|
||||
lineup_source: str
|
||||
status: str = ""
|
||||
state: Optional[str] = None
|
||||
substate: Optional[str] = None
|
||||
current_score_home: Optional[int] = None
|
||||
current_score_away: Optional[int] = None
|
||||
lineup_confidence: float = 0.0
|
||||
source_table: str = "matches"
|
||||
@@ -0,0 +1,292 @@
|
||||
"""
|
||||
Shared prediction dataclasses used across the AI engine.
|
||||
|
||||
These were originally defined in models/v20_ensemble.py and are extracted here
|
||||
so they can be used without importing the full V20 ensemble.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from core.calculators.score_calculator import ScorePrediction
|
||||
|
||||
|
||||
@dataclass
|
||||
class MarketPrediction:
|
||||
"""Prediction for a single betting market."""
|
||||
market_type: str
|
||||
pick: str
|
||||
probability: float
|
||||
confidence: float
|
||||
odds: float = 0.0
|
||||
is_recommended: bool = False
|
||||
is_value_bet: bool = False
|
||||
edge: float = 0.0 # Expected edge over market
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"market_type": self.market_type,
|
||||
"pick": self.pick,
|
||||
"probability": round(self.probability * 100, 1),
|
||||
"confidence": round(self.confidence, 1),
|
||||
"odds": self.odds,
|
||||
"is_recommended": self.is_recommended,
|
||||
"is_value_bet": self.is_value_bet,
|
||||
"edge": round(self.edge, 1)
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class FullMatchPrediction:
|
||||
"""Complete prediction for a match with ALL markets."""
|
||||
match_id: str
|
||||
home_team: str
|
||||
away_team: str
|
||||
match_date: str = ""
|
||||
|
||||
# === MAÇ SONUCU (1X2) ===
|
||||
ms_home_prob: float = 0.33
|
||||
ms_draw_prob: float = 0.33
|
||||
ms_away_prob: float = 0.33
|
||||
ms_pick: str = ""
|
||||
ms_confidence: float = 0.0
|
||||
|
||||
# === ÇİFTE ŞANS ===
|
||||
dc_1x_prob: float = 0.66
|
||||
dc_x2_prob: float = 0.66
|
||||
dc_12_prob: float = 0.66
|
||||
dc_pick: str = ""
|
||||
dc_confidence: float = 0.0
|
||||
|
||||
# === ALT/ÜST GOLLER ===
|
||||
# 1.5
|
||||
over_15_prob: float = 0.70
|
||||
under_15_prob: float = 0.30
|
||||
ou15_pick: str = ""
|
||||
ou15_confidence: float = 0.0
|
||||
|
||||
# 2.5
|
||||
over_25_prob: float = 0.50
|
||||
under_25_prob: float = 0.50
|
||||
ou25_pick: str = ""
|
||||
ou25_confidence: float = 0.0
|
||||
|
||||
# 3.5
|
||||
over_35_prob: float = 0.30
|
||||
under_35_prob: float = 0.70
|
||||
ou35_pick: str = ""
|
||||
ou35_confidence: float = 0.0
|
||||
|
||||
# === KARŞILIKLI GOL (BTTS) ===
|
||||
btts_yes_prob: float = 0.50
|
||||
btts_no_prob: float = 0.50
|
||||
btts_pick: str = ""
|
||||
btts_confidence: float = 0.0
|
||||
|
||||
# === İLK YARI SONUCU ===
|
||||
ht_home_prob: float = 0.30
|
||||
ht_draw_prob: float = 0.40
|
||||
ht_away_prob: float = 0.30
|
||||
ht_pick: str = ""
|
||||
ht_confidence: float = 0.0
|
||||
|
||||
# === SKOR TAHMİNLERİ ===
|
||||
score: Optional[ScorePrediction] = None
|
||||
predicted_ft_score: str = "1-1"
|
||||
predicted_ht_score: str = "0-0"
|
||||
ft_scores_top5: List[Dict] = field(default_factory=list)
|
||||
|
||||
# === xG (Expected Goals) ===
|
||||
home_xg: float = 1.3
|
||||
away_xg: float = 1.1
|
||||
total_xg: float = 2.4
|
||||
|
||||
# === RISK DEĞERLENDİRMESİ ===
|
||||
risk_level: str = "MEDIUM" # LOW, MEDIUM, HIGH, EXTREME
|
||||
risk_score: float = 0.0
|
||||
is_surprise_risk: bool = False
|
||||
surprise_type: str = ""
|
||||
risk_warnings: List[str] = field(default_factory=list)
|
||||
ht_ft_probs: Dict[str, float] = field(default_factory=dict)
|
||||
|
||||
# === GLM-5 SÜRPRİZ SKORU ===
|
||||
upset_score: int = 0 # 0-100 arası sürpriz skoru
|
||||
upset_level: str = "LOW" # LOW, MEDIUM, HIGH, EXTREME
|
||||
upset_reasons: List[str] = field(default_factory=list)
|
||||
|
||||
# === SÜRPRİZ PROFİLİ ===
|
||||
surprise_score: float = 0.0 # 0-100 overall surprise risk score
|
||||
surprise_comment: str = "" # Human-readable surprise commentary
|
||||
surprise_reasons: List[str] = field(default_factory=list) # Flagged risk reasons
|
||||
surprise_breakdown: List[Dict[str, Any]] = field(default_factory=list) # Per-factor {code, points, label}
|
||||
|
||||
# === ENGINE KATKILARI ===
|
||||
team_confidence: float = 0.0
|
||||
player_confidence: float = 0.0
|
||||
odds_confidence: float = 0.0
|
||||
referee_confidence: float = 0.0
|
||||
|
||||
# === KORNER & KART & DİĞER ===
|
||||
total_corners_pred: float = 9.5
|
||||
corner_pick: str = "9.5 Üst"
|
||||
|
||||
total_cards_pred: float = 4.5
|
||||
card_pick: str = "4.5 Alt"
|
||||
cards_over_prob: float = 0.50
|
||||
cards_under_prob: float = 0.50
|
||||
cards_confidence: float = 0.0
|
||||
|
||||
handicap_pick: str = ""
|
||||
handicap_home_prob: float = 0.33
|
||||
handicap_draw_prob: float = 0.34
|
||||
handicap_away_prob: float = 0.33
|
||||
handicap_confidence: float = 0.0
|
||||
|
||||
ht_over_05_prob: float = 0.65
|
||||
ht_under_05_prob: float = 0.35
|
||||
ht_over_15_prob: float = 0.30
|
||||
ht_under_15_prob: float = 0.70
|
||||
ht_ou_pick: str = "İY 0.5 Üst"
|
||||
ht_ou15_pick: str = "İY 1.5 Alt"
|
||||
|
||||
odd_even_pick: str = "Çift"
|
||||
odd_prob: float = 0.50 # Tek olasılığı
|
||||
even_prob: float = 0.50 # Çift olasılığı
|
||||
|
||||
# === TAVSİYELER (RECOMMENDATIONS) ===
|
||||
best_bet: Optional[MarketPrediction] = None
|
||||
recommended_bets: List[MarketPrediction] = field(default_factory=list)
|
||||
alternative_bet: Optional[MarketPrediction] = None
|
||||
expert_recommendation: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
# === DETAILED ANALYSIS ===
|
||||
analysis_details: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"match_info": {
|
||||
"match_id": self.match_id,
|
||||
"home_team": self.home_team,
|
||||
"away_team": self.away_team,
|
||||
"match_date": self.match_date
|
||||
},
|
||||
"predictions": {
|
||||
"match_result": {
|
||||
"1": round(self.ms_home_prob * 100, 1),
|
||||
"X": round(self.ms_draw_prob * 100, 1),
|
||||
"2": round(self.ms_away_prob * 100, 1),
|
||||
"pick": self.ms_pick,
|
||||
"confidence": round(self.ms_confidence, 1)
|
||||
},
|
||||
"double_chance": {
|
||||
"1X": round(self.dc_1x_prob * 100, 1),
|
||||
"X2": round(self.dc_x2_prob * 100, 1),
|
||||
"12": round(self.dc_12_prob * 100, 1),
|
||||
"pick": self.dc_pick,
|
||||
"confidence": round(self.dc_confidence, 1)
|
||||
},
|
||||
"over_under": {
|
||||
"1.5": {
|
||||
"over": round(self.over_15_prob * 100, 1),
|
||||
"under": round(self.under_15_prob * 100, 1),
|
||||
"pick": self.ou15_pick,
|
||||
"confidence": round(self.ou15_confidence, 1)
|
||||
},
|
||||
"2.5": {
|
||||
"over": round(self.over_25_prob * 100, 1),
|
||||
"under": round(self.under_25_prob * 100, 1),
|
||||
"pick": self.ou25_pick,
|
||||
"confidence": round(self.ou25_confidence, 1)
|
||||
},
|
||||
"3.5": {
|
||||
"over": round(self.over_35_prob * 100, 1),
|
||||
"under": round(self.under_35_prob * 100, 1),
|
||||
"pick": self.ou35_pick,
|
||||
"confidence": round(self.ou35_confidence, 1)
|
||||
}
|
||||
},
|
||||
"btts": {
|
||||
"yes": round(self.btts_yes_prob * 100, 1),
|
||||
"no": round(self.btts_no_prob * 100, 1),
|
||||
"pick": self.btts_pick,
|
||||
"confidence": round(self.btts_confidence, 1)
|
||||
},
|
||||
"first_half": {
|
||||
"1": round(self.ht_home_prob * 100, 1),
|
||||
"X": round(self.ht_draw_prob * 100, 1),
|
||||
"2": round(self.ht_away_prob * 100, 1),
|
||||
"pick": self.ht_pick,
|
||||
"confidence": round(self.ht_confidence, 1),
|
||||
"over_under_05": {
|
||||
"over": round(self.ht_over_05_prob * 100, 1),
|
||||
"under": round(self.ht_under_05_prob * 100, 1),
|
||||
"pick": self.ht_ou_pick
|
||||
},
|
||||
"over_under_15": {
|
||||
"over": round(self.ht_over_15_prob * 100, 1),
|
||||
"under": round(self.ht_under_15_prob * 100, 1),
|
||||
"pick": self.ht_ou15_pick
|
||||
}
|
||||
},
|
||||
"scores": {
|
||||
"predicted_ft": self.predicted_ft_score,
|
||||
"predicted_ht": self.predicted_ht_score,
|
||||
"top_5_ft_scores": self.ft_scores_top5
|
||||
},
|
||||
"others": {
|
||||
"handicap": {
|
||||
"pick": self.handicap_pick,
|
||||
"confidence": round(self.handicap_confidence, 1),
|
||||
"home": round(self.handicap_home_prob * 100, 1),
|
||||
"draw": round(self.handicap_draw_prob * 100, 1),
|
||||
"away": round(self.handicap_away_prob * 100, 1)
|
||||
},
|
||||
"corners": {
|
||||
"total": round(self.total_corners_pred, 1),
|
||||
"pick": self.corner_pick
|
||||
},
|
||||
"cards": {
|
||||
"total": round(self.total_cards_pred, 1),
|
||||
"pick": self.card_pick,
|
||||
"confidence": round(self.cards_confidence, 1),
|
||||
"over": round(self.cards_over_prob * 100, 1),
|
||||
"under": round(self.cards_under_prob * 100, 1)
|
||||
},
|
||||
"odd_even": {
|
||||
"pick": self.odd_even_pick,
|
||||
"tek": round(self.odd_prob * 100, 1),
|
||||
"cift": round(self.even_prob * 100, 1)
|
||||
}
|
||||
},
|
||||
"xg": {
|
||||
"home": round(self.home_xg, 2),
|
||||
"away": round(self.away_xg, 2),
|
||||
"total": round(self.total_xg, 2)
|
||||
}
|
||||
},
|
||||
"risk": {
|
||||
"level": self.risk_level,
|
||||
"score": round(self.risk_score, 1),
|
||||
"is_surprise_risk": self.is_surprise_risk,
|
||||
"surprise_type": self.surprise_type,
|
||||
"ht_ft_probs": {k: round(v * 100, 1) for k, v in self.ht_ft_probs.items()} if self.ht_ft_probs else {},
|
||||
"warnings": self.risk_warnings
|
||||
},
|
||||
"upset_analysis": {
|
||||
"score": self.upset_score,
|
||||
"level": self.upset_level,
|
||||
"reasons": self.upset_reasons
|
||||
},
|
||||
"engine_breakdown": {
|
||||
"team_engine": round(self.team_confidence, 1),
|
||||
"player_engine": round(self.player_confidence, 1),
|
||||
"odds_engine": round(self.odds_confidence, 1),
|
||||
"referee_engine": round(self.referee_confidence, 1)
|
||||
},
|
||||
"recommendations": {
|
||||
"best_bet": self.best_bet.to_dict() if self.best_bet else None,
|
||||
"all_recommended": [b.to_dict() for b in self.recommended_bets] if self.recommended_bets else [],
|
||||
"alternative_bet": self.alternative_bet.to_dict() if self.alternative_bet else None
|
||||
},
|
||||
"analysis_details": self.analysis_details
|
||||
}
|
||||
@@ -0,0 +1,510 @@
|
||||
"""
|
||||
Calibration Backfill Script
|
||||
============================
|
||||
Runs V25 model against historical matches (using pre-computed ai_features + odds)
|
||||
to generate calibration training data, then trains isotonic calibration models.
|
||||
|
||||
Usage:
|
||||
python ai-engine/scripts/backfill_calibration.py
|
||||
python ai-engine/scripts/backfill_calibration.py --limit 5000
|
||||
python ai-engine/scripts/backfill_calibration.py --min-samples 50
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
from dotenv import load_dotenv
|
||||
|
||||
AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
sys.path.insert(0, AI_ENGINE_DIR)
|
||||
|
||||
from models.v25_ensemble import V25Predictor
|
||||
from models.calibration import get_calibrator
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
def _normalize_pick(pick) -> str:
|
||||
return str(pick or "").strip().casefold()
|
||||
|
||||
|
||||
def resolve_actual(market, pick, score_home, score_away, ht_home, ht_away):
|
||||
if score_home is None or score_away is None:
|
||||
return None
|
||||
market = (market or "").upper()
|
||||
p = _normalize_pick(pick)
|
||||
total = score_home + score_away
|
||||
ht_total = (ht_home or 0) + (ht_away or 0) if ht_home is not None else None
|
||||
|
||||
if market == "MS":
|
||||
if p == "1": return int(score_home > score_away)
|
||||
if p in {"x", "0"}: return int(score_home == score_away)
|
||||
if p == "2": return int(score_away > score_home)
|
||||
return None
|
||||
if market in {"OU15", "OU25", "OU35"}:
|
||||
line = {"OU15": 1.5, "OU25": 2.5, "OU35": 3.5}[market]
|
||||
if "over" in p or "üst" in p or "ust" in p: return int(total > line)
|
||||
if "under" in p or "alt" in p: return int(total < line)
|
||||
return None
|
||||
if market == "BTTS":
|
||||
both = score_home > 0 and score_away > 0
|
||||
if "yes" in p or "var" in p: return int(both)
|
||||
if "no" in p or "yok" in p: return int(not both)
|
||||
return None
|
||||
if market == "HT":
|
||||
if ht_home is None or ht_away is None: return None
|
||||
if p == "1": return int(ht_home > ht_away)
|
||||
if p in {"x", "0"}: return int(ht_home == ht_away)
|
||||
if p == "2": return int(ht_away > ht_home)
|
||||
return None
|
||||
if market == "HTFT":
|
||||
if ht_home is None or ht_away is None or "/" not in p: return None
|
||||
ht_p, ft_p = p.split("/")
|
||||
ht_actual = "1" if ht_home > ht_away else "2" if ht_away > ht_home else "x"
|
||||
ft_actual = "1" if score_home > score_away else "2" if score_away > score_home else "x"
|
||||
return int(ht_p.strip() == ht_actual and ft_p.strip() == ft_actual)
|
||||
if market == "DC":
|
||||
norm = p.replace("-", "").upper()
|
||||
if norm == "1X": return int(score_home >= score_away)
|
||||
if norm == "X2": return int(score_away >= score_home)
|
||||
if norm == "12": return int(score_home != score_away)
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def calibrator_key(market, pick):
|
||||
m = (market or "").upper()
|
||||
p = _normalize_pick(pick)
|
||||
if m == "MS":
|
||||
if p == "1": return "ms_home"
|
||||
if p in {"x", "0"}: return "ms_draw"
|
||||
if p == "2": return "ms_away"
|
||||
return None
|
||||
if m == "DC": return "dc"
|
||||
if m == "OU15" and ("over" in p or "üst" in p): return "ou15"
|
||||
if m == "OU25" and ("over" in p or "üst" in p): return "ou25"
|
||||
if m == "OU35" and ("over" in p or "üst" in p): return "ou35"
|
||||
if m == "BTTS" and ("yes" in p or "var" in p): return "btts"
|
||||
if m == "HT":
|
||||
if p == "1": return "ht_home"
|
||||
if p in {"x", "0"}: return "ht_draw"
|
||||
if p == "2": return "ht_away"
|
||||
return None
|
||||
if m == "HTFT": return "ht_ft"
|
||||
return None
|
||||
|
||||
|
||||
def get_conn():
|
||||
db_url = os.getenv("DATABASE_URL", "")
|
||||
if "?schema=" in db_url:
|
||||
db_url = db_url.split("?schema=")[0]
|
||||
if not db_url:
|
||||
raise ValueError("DATABASE_URL not set")
|
||||
return psycopg2.connect(db_url, cursor_factory=RealDictCursor)
|
||||
|
||||
|
||||
ODD_CAT_MAP = {
|
||||
"maç sonucu": {"1": "ms_h", "0": "ms_d", "x": "ms_d", "2": "ms_a"},
|
||||
"1. yarı sonucu": {"1": "ht_ms_h", "0": "ht_ms_d", "x": "ht_ms_d", "2": "ht_ms_a"},
|
||||
}
|
||||
|
||||
ODD_CAT_KEYWORD_MAP = {
|
||||
"karşılıklı gol": {"var": "btts_y", "yok": "btts_n"},
|
||||
"0,5 alt/üst": {"alt": "ou05_u", "üst": "ou05_o"},
|
||||
"1,5 alt/üst": {"alt": "ou15_u", "üst": "ou15_o"},
|
||||
"2,5 alt/üst": {"alt": "ou25_u", "üst": "ou25_o"},
|
||||
"3,5 alt/üst": {"alt": "ou35_u", "üst": "ou35_o"},
|
||||
"ilk yarı 0,5 alt/üst": {"alt": "ht_ou05_u", "üst": "ht_ou05_o"},
|
||||
"ilk yarı 1,5 alt/üst": {"alt": "ht_ou15_u", "üst": "ht_ou15_o"},
|
||||
}
|
||||
|
||||
|
||||
def load_matches(cur, limit: int) -> List[Dict]:
|
||||
cur.execute("""
|
||||
SELECT m.id, m.score_home, m.score_away,
|
||||
m.ht_score_home, m.ht_score_away
|
||||
FROM matches m
|
||||
JOIN football_ai_features f ON f.match_id = m.id
|
||||
WHERE m.status = 'FT'
|
||||
AND m.sport = 'football'
|
||||
AND m.score_home IS NOT NULL
|
||||
AND m.score_away IS NOT NULL
|
||||
ORDER BY m.mst_utc DESC
|
||||
LIMIT %s
|
||||
""", (limit,))
|
||||
return cur.fetchall()
|
||||
|
||||
|
||||
def load_ai_features_batch(cur, match_ids: List[str]) -> Dict[str, Dict]:
|
||||
if not match_ids:
|
||||
return {}
|
||||
ph = ",".join(["%s"] * len(match_ids))
|
||||
cur.execute(f"""
|
||||
SELECT match_id,
|
||||
home_elo AS home_overall_elo,
|
||||
away_elo AS away_overall_elo,
|
||||
elo_diff,
|
||||
home_home_elo, away_away_elo,
|
||||
home_form_elo, away_form_elo,
|
||||
(home_form_elo - away_form_elo) AS form_elo_diff,
|
||||
home_goals_avg_5 AS home_goals_avg,
|
||||
home_conceded_avg_5 AS home_conceded_avg,
|
||||
away_goals_avg_5 AS away_goals_avg,
|
||||
away_conceded_avg_5 AS away_conceded_avg,
|
||||
home_clean_sheet_rate, away_clean_sheet_rate,
|
||||
home_scoring_rate, away_scoring_rate,
|
||||
home_win_streak AS home_winning_streak,
|
||||
away_win_streak AS away_winning_streak,
|
||||
0 AS home_unbeaten_streak,
|
||||
0 AS away_unbeaten_streak,
|
||||
h2h_total AS h2h_total_matches,
|
||||
h2h_home_win_rate,
|
||||
(1.0 - h2h_home_win_rate - 0.33) AS h2h_draw_rate,
|
||||
h2h_avg_goals,
|
||||
h2h_btts_rate, h2h_over25_rate,
|
||||
home_avg_possession, away_avg_possession,
|
||||
home_avg_shots_on_target, away_avg_shots_on_target,
|
||||
home_shot_conversion, away_shot_conversion,
|
||||
0.0 AS home_avg_corners, 0.0 AS away_avg_corners,
|
||||
implied_home, implied_draw, implied_away,
|
||||
league_avg_goals,
|
||||
0.0 AS league_zero_goal_rate,
|
||||
0.0 AS home_xga, 0.0 AS away_xga,
|
||||
0.0 AS upset_atmosphere, 0.0 AS upset_motivation,
|
||||
0.0 AS upset_fatigue, 0.0 AS upset_potential,
|
||||
referee_home_bias, referee_avg_goals,
|
||||
referee_avg_cards AS referee_cards_total,
|
||||
0.0 AS referee_avg_yellow,
|
||||
0.0 AS referee_experience,
|
||||
0.0 AS home_momentum_score, 0.0 AS away_momentum_score,
|
||||
0.0 AS momentum_diff,
|
||||
0.0 AS home_squad_quality, 0.0 AS away_squad_quality,
|
||||
0.0 AS squad_diff,
|
||||
0 AS home_key_players, 0 AS away_key_players,
|
||||
missing_players_impact AS home_missing_impact,
|
||||
0.0 AS away_missing_impact,
|
||||
home_goals_avg_5 AS home_goals_form,
|
||||
away_goals_avg_5 AS away_goals_form
|
||||
FROM football_ai_features
|
||||
WHERE match_id IN ({ph})
|
||||
""", match_ids)
|
||||
return {str(row["match_id"]): dict(row) for row in cur.fetchall()}
|
||||
|
||||
|
||||
def load_odds_batch(cur, match_ids: List[str]) -> Dict[str, Dict[str, float]]:
|
||||
if not match_ids:
|
||||
return {}
|
||||
ph = ",".join(["%s"] * len(match_ids))
|
||||
cur.execute(f"""
|
||||
SELECT oc.match_id, oc.name AS cat_name,
|
||||
os.name AS sel_name, os.odd_value
|
||||
FROM odd_selections os
|
||||
JOIN odd_categories oc ON os.odd_category_db_id = oc.db_id
|
||||
WHERE oc.match_id IN ({ph})
|
||||
""", match_ids)
|
||||
|
||||
odds: Dict[str, Dict[str, float]] = {}
|
||||
for row in cur.fetchall():
|
||||
mid = str(row["match_id"])
|
||||
cat = (row["cat_name"] or "").lower().strip()
|
||||
sel = (row["sel_name"] or "").strip()
|
||||
val = float(row["odd_value"]) if row["odd_value"] else 0
|
||||
if val <= 0:
|
||||
continue
|
||||
if mid not in odds:
|
||||
odds[mid] = {}
|
||||
|
||||
if cat in ODD_CAT_MAP:
|
||||
key = ODD_CAT_MAP[cat].get(sel.lower())
|
||||
if key:
|
||||
odds[mid][key] = val
|
||||
else:
|
||||
for cat_pattern, kw_map in ODD_CAT_KEYWORD_MAP.items():
|
||||
if cat == cat_pattern:
|
||||
for keyword, key in kw_map.items():
|
||||
if keyword in sel.lower():
|
||||
odds[mid][key] = val
|
||||
break
|
||||
return odds
|
||||
|
||||
|
||||
MARKETS_TO_PREDICT = [
|
||||
("MS", "1", lambda p: p[0]),
|
||||
("MS", "X", lambda p: p[1]),
|
||||
("MS", "2", lambda p: p[2]),
|
||||
("OU25", "Over 2.5", lambda p: p[0]),
|
||||
("BTTS", "Yes", lambda p: p[0]),
|
||||
("OU15", "Over 1.5", lambda p: p[0]),
|
||||
("OU35", "Over 3.5", lambda p: p[0]),
|
||||
("HT", "1", lambda p: p[0]),
|
||||
("HT", "X", lambda p: p[1]),
|
||||
("HT", "2", lambda p: p[2]),
|
||||
]
|
||||
|
||||
|
||||
def run_backfill(args):
|
||||
print("=" * 70)
|
||||
print("CALIBRATION BACKFILL")
|
||||
print("=" * 70)
|
||||
|
||||
conn = get_conn()
|
||||
cur = conn.cursor(cursor_factory=RealDictCursor)
|
||||
|
||||
t0 = time.time()
|
||||
print(f"Loading matches (limit={args.limit})...")
|
||||
matches = load_matches(cur, args.limit)
|
||||
print(f" Found {len(matches)} finished matches with ai_features")
|
||||
|
||||
match_ids = [str(m["id"]) for m in matches]
|
||||
match_map = {str(m["id"]): m for m in matches}
|
||||
|
||||
print("Loading ai_features...")
|
||||
features_map = load_ai_features_batch(cur, match_ids)
|
||||
print(f" Loaded features for {len(features_map)} matches")
|
||||
|
||||
print("Loading odds...")
|
||||
odds_map = load_odds_batch(cur, match_ids)
|
||||
print(f" Loaded odds for {len(odds_map)} matches")
|
||||
|
||||
print(f"Data loading: {time.time() - t0:.1f}s")
|
||||
|
||||
print("\nLoading V25 model...")
|
||||
predictor = V25Predictor()
|
||||
predictor.load_models()
|
||||
|
||||
feature_cols = predictor.FEATURE_COLS
|
||||
|
||||
samples: List[Dict[str, Any]] = []
|
||||
skipped = 0
|
||||
processed = 0
|
||||
|
||||
print(f"\nRunning predictions on {len(match_ids)} matches...")
|
||||
t1 = time.time()
|
||||
|
||||
for i, mid in enumerate(match_ids):
|
||||
if mid not in features_map:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
feat_row = features_map[mid]
|
||||
odds_row = odds_map.get(mid, {})
|
||||
match_row = match_map[mid]
|
||||
|
||||
feat_dict = {}
|
||||
for col in feature_cols:
|
||||
if col in feat_row and feat_row[col] is not None:
|
||||
feat_dict[col] = float(feat_row[col])
|
||||
elif col.startswith("odds_") and not col.endswith("_present"):
|
||||
odds_key = col.replace("odds_", "")
|
||||
feat_dict[col] = float(odds_row.get(odds_key, 0))
|
||||
elif col.endswith("_present"):
|
||||
base = col.replace("_present", "")
|
||||
odds_key = base.replace("odds_", "")
|
||||
feat_dict[col] = 1.0 if odds_row.get(odds_key, 0) > 0 else 0.0
|
||||
else:
|
||||
feat_dict[col] = 0.0
|
||||
|
||||
if odds_row.get("ms_h", 0) > 0:
|
||||
feat_dict["odds_ms_h"] = odds_row["ms_h"]
|
||||
if odds_row.get("ms_d", 0) > 0:
|
||||
feat_dict["odds_ms_d"] = odds_row["ms_d"]
|
||||
if odds_row.get("ms_a", 0) > 0:
|
||||
feat_dict["odds_ms_a"] = odds_row["ms_a"]
|
||||
|
||||
ms_h = feat_dict.get("odds_ms_h", 0)
|
||||
ms_d = feat_dict.get("odds_ms_d", 0)
|
||||
ms_a = feat_dict.get("odds_ms_a", 0)
|
||||
if ms_h > 0 and ms_d > 0 and ms_a > 0:
|
||||
raw_sum = 1/ms_h + 1/ms_d + 1/ms_a
|
||||
feat_dict["implied_home"] = (1/ms_h) / raw_sum
|
||||
feat_dict["implied_draw"] = (1/ms_d) / raw_sum
|
||||
feat_dict["implied_away"] = (1/ms_a) / raw_sum
|
||||
|
||||
sh = match_row["score_home"]
|
||||
sa = match_row["score_away"]
|
||||
ht_h = match_row.get("ht_score_home")
|
||||
ht_a = match_row.get("ht_score_away")
|
||||
|
||||
try:
|
||||
X = pd.DataFrame([{c: feat_dict.get(c, 0.0) for c in feature_cols}])
|
||||
|
||||
for market_name, model_key, market_list in [
|
||||
("ms", "ms", ["MS"]),
|
||||
("ou25", "ou25", ["OU25"]),
|
||||
("btts", "btts", ["BTTS"]),
|
||||
("ou15", "ou15", ["OU15"]),
|
||||
("ou35", "ou35", ["OU35"]),
|
||||
("ht_result", "ht_result", ["HT"]),
|
||||
]:
|
||||
if model_key not in predictor.models:
|
||||
continue
|
||||
|
||||
probs = predictor.predict_market(model_key, feat_dict)
|
||||
if probs is None:
|
||||
continue
|
||||
|
||||
if model_key == "ms":
|
||||
for pick, prob in [("1", probs[0]), ("X", probs[1]), ("2", probs[2])]:
|
||||
actual = resolve_actual("MS", pick, sh, sa, ht_h, ht_a)
|
||||
key = calibrator_key("MS", pick)
|
||||
if actual is not None and key:
|
||||
samples.append({
|
||||
"match_id": mid,
|
||||
"market": "MS",
|
||||
"pick": pick,
|
||||
"key": key,
|
||||
"raw_prob": float(prob),
|
||||
"actual": int(actual),
|
||||
})
|
||||
|
||||
elif model_key == "ht_result":
|
||||
if ht_h is None or ht_a is None:
|
||||
continue
|
||||
for pick, prob in [("1", probs[0]), ("X", probs[1]), ("2", probs[2])]:
|
||||
actual = resolve_actual("HT", pick, sh, sa, ht_h, ht_a)
|
||||
key = calibrator_key("HT", pick)
|
||||
if actual is not None and key:
|
||||
samples.append({
|
||||
"match_id": mid,
|
||||
"market": "HT",
|
||||
"pick": pick,
|
||||
"key": key,
|
||||
"raw_prob": float(prob),
|
||||
"actual": int(actual),
|
||||
})
|
||||
|
||||
elif model_key in ("ou25", "ou15", "ou35"):
|
||||
market_upper = model_key.upper()
|
||||
over_prob = float(probs[0]) if len(probs) > 0 else 0.5
|
||||
pick = f"Over"
|
||||
actual = resolve_actual(market_upper, "Over", sh, sa, ht_h, ht_a)
|
||||
key = calibrator_key(market_upper, "Over")
|
||||
if actual is not None and key:
|
||||
samples.append({
|
||||
"match_id": mid,
|
||||
"market": market_upper,
|
||||
"pick": pick,
|
||||
"key": key,
|
||||
"raw_prob": over_prob,
|
||||
"actual": int(actual),
|
||||
})
|
||||
|
||||
elif model_key == "btts":
|
||||
yes_prob = float(probs[0]) if len(probs) > 0 else 0.5
|
||||
actual = resolve_actual("BTTS", "Yes", sh, sa, ht_h, ht_a)
|
||||
key = calibrator_key("BTTS", "Yes")
|
||||
if actual is not None and key:
|
||||
samples.append({
|
||||
"match_id": mid,
|
||||
"market": "BTTS",
|
||||
"pick": "Yes",
|
||||
"key": key,
|
||||
"raw_prob": yes_prob,
|
||||
"actual": int(actual),
|
||||
})
|
||||
|
||||
processed += 1
|
||||
|
||||
except Exception as e:
|
||||
skipped += 1
|
||||
if skipped <= 5:
|
||||
print(f" Error on {mid}: {e}")
|
||||
|
||||
if (i + 1) % 5000 == 0:
|
||||
elapsed = time.time() - t1
|
||||
rate = (i + 1) / elapsed
|
||||
print(f" Processed {i+1}/{len(match_ids)} ({rate:.0f} matches/s)")
|
||||
|
||||
elapsed = time.time() - t1
|
||||
print(f"\nPrediction complete: {processed} matches, {skipped} skipped, {elapsed:.1f}s")
|
||||
|
||||
if not samples:
|
||||
print("No calibration samples generated!")
|
||||
cur.close()
|
||||
conn.close()
|
||||
return
|
||||
|
||||
df = pd.DataFrame(samples)
|
||||
print(f"\nTotal calibration samples: {len(df)}")
|
||||
print(f"Unique matches: {df['match_id'].nunique()}")
|
||||
print(f"\nPer-key counts:")
|
||||
for key, count in df["key"].value_counts().items():
|
||||
print(f" {key:<14} {count}")
|
||||
|
||||
print(f"\nTraining isotonic calibration models (min_samples={args.min_samples})...")
|
||||
calibrator = get_calibrator()
|
||||
results: Dict[str, Any] = {}
|
||||
keys = sorted(df["key"].unique())
|
||||
|
||||
for key in keys:
|
||||
sub = df[df["key"] == key].copy()
|
||||
sub = sub.drop_duplicates(subset=["match_id", "key"], keep="first")
|
||||
sub = sub.dropna(subset=["raw_prob", "actual"])
|
||||
sub = sub[(sub["raw_prob"] > 0.0) & (sub["raw_prob"] < 1.0)]
|
||||
|
||||
n = len(sub)
|
||||
if n < args.min_samples:
|
||||
results[key] = {"status": "skipped", "samples": n}
|
||||
continue
|
||||
|
||||
metrics = calibrator.train_calibration(
|
||||
df=sub,
|
||||
market=key,
|
||||
prob_col="raw_prob",
|
||||
actual_col="actual",
|
||||
min_samples=args.min_samples,
|
||||
save=True,
|
||||
)
|
||||
results[key] = {
|
||||
"status": "trained",
|
||||
"samples": metrics.sample_count,
|
||||
"brier": round(metrics.brier_score, 4),
|
||||
"ece": round(metrics.calibration_error, 4),
|
||||
"mean_predicted": round(metrics.mean_predicted, 4),
|
||||
"mean_actual": round(metrics.mean_actual, 4),
|
||||
}
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("CALIBRATION RESULTS")
|
||||
print("=" * 70)
|
||||
print(f"{'market':<14} {'status':<10} {'n':<8} {'brier':<9} {'ece':<8} {'pred_avg':<9} {'actual_avg'}")
|
||||
print("-" * 70)
|
||||
for key, info in sorted(results.items()):
|
||||
if info["status"] == "trained":
|
||||
print(
|
||||
f"{key:<14} {'OK':<10} {info['samples']:<8} "
|
||||
f"{info['brier']:<9.4f} {info['ece']:<8.4f} "
|
||||
f"{info['mean_predicted']:<9.4f} {info['mean_actual']}"
|
||||
)
|
||||
else:
|
||||
print(f"{key:<14} {'SKIP':<10} {info['samples']:<8}")
|
||||
print("=" * 70)
|
||||
|
||||
total_time = time.time() - t0
|
||||
print(f"\nTotal time: {total_time:.1f}s")
|
||||
print(f"Calibration models saved to: {os.path.join(AI_ENGINE_DIR, 'models', 'calibration')}/")
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Backfill calibration from historical matches")
|
||||
parser.add_argument("--limit", type=int, default=50000,
|
||||
help="Max matches to process (default: 50000)")
|
||||
parser.add_argument("--min-samples", type=int, default=100,
|
||||
help="Min samples per market for calibration (default: 100)")
|
||||
args = parser.parse_args()
|
||||
run_backfill(args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,352 @@
|
||||
"""
|
||||
Tutarsızlık Bazlı Backtest
|
||||
============================
|
||||
Modeller arası tutarsızlığı ölçer, tutarlı maçlarda bahis açılsaydı
|
||||
ROI ne olurdu hesaplar.
|
||||
|
||||
Mantık:
|
||||
- Her maç için market'ler arası çelişkileri tespit et
|
||||
- Tutarsız maçları filtrele
|
||||
- Tutarlı maçlarda hit rate ve ROI hesapla
|
||||
|
||||
Usage:
|
||||
python scripts/backtest_consistency.py
|
||||
"""
|
||||
|
||||
import os, sys, json
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import xgboost as xgb
|
||||
from sklearn.metrics import accuracy_score
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
|
||||
'data', 'training_data.csv')
|
||||
MODELS_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
|
||||
'models', 'v25')
|
||||
|
||||
SKIP_COLS = {
|
||||
'match_id','home_team_id','away_team_id','league_id','mst_utc',
|
||||
'score_home','score_away','total_goals','ht_score_home','ht_score_away','ht_total_goals',
|
||||
'label_ms','label_ou05','label_ou15','label_ou25','label_ou35','label_btts',
|
||||
'label_ht_result','label_ht_ou05','label_ht_ou15','label_ht_ft',
|
||||
'label_odd_even','label_yellow_cards','label_cards_ou45','label_handicap_ms',
|
||||
}
|
||||
|
||||
|
||||
def load_model(market: str):
|
||||
path = os.path.join(MODELS_DIR, f'xgb_v25_{market}.json')
|
||||
if not os.path.exists(path):
|
||||
return None
|
||||
b = xgb.Booster()
|
||||
b.load_model(path)
|
||||
return b
|
||||
|
||||
|
||||
def predict_proba(model, X: np.ndarray, feature_cols: list, n_class: int):
|
||||
dmat = xgb.DMatrix(pd.DataFrame(X, columns=feature_cols))
|
||||
raw = model.predict(dmat)
|
||||
if n_class > 2:
|
||||
return raw.reshape(-1, n_class)
|
||||
return np.column_stack([1 - raw, raw])
|
||||
|
||||
|
||||
def consistency_score(probs: dict) -> tuple[float, list]:
|
||||
"""
|
||||
Market'ler arası tutarsızlığı hesapla.
|
||||
0 = tamamen tutarlı, 1 = tamamen çelişkili.
|
||||
|
||||
Kontrol edilen çelişkiler:
|
||||
1. OU15 üst yüksek ama OU25 üst de yüksek → ok
|
||||
OU15 üst yüksek ama OU25 alt yüksek → ÇELISKI (1 gol bekleniyor ama 2.5+ da bekleniyor?)
|
||||
|
||||
2. HT_OU05 üst yüksek ama HT sonucu draw yüksek → ÇELISKI
|
||||
|
||||
3. OU35 üst yüksek ama BTTS düşük → şüpheli
|
||||
|
||||
4. MS home yüksek ama HT away yüksek → çelişkili
|
||||
"""
|
||||
conflicts = []
|
||||
total_weight = 0
|
||||
total_conflict = 0
|
||||
|
||||
# OU tutarlılığı: P(OU25>0.5) <= P(OU15>0.5) matematiksel zorunluluk
|
||||
ou15_over = probs.get('ou15_over', 0.5)
|
||||
ou25_over = probs.get('ou25_over', 0.5)
|
||||
ou35_over = probs.get('ou35_over', 0.5)
|
||||
|
||||
# OU hiyerarşisi: ou35 <= ou25 <= ou15 olmalı
|
||||
if ou25_over > ou15_over + 0.05:
|
||||
gap = ou25_over - ou15_over
|
||||
conflicts.append(f'OU25>{ou25_over:.0%} > OU15>{ou15_over:.0%} (imkansız)')
|
||||
total_conflict += gap * 2
|
||||
total_weight += 1
|
||||
|
||||
if ou35_over > ou25_over + 0.05:
|
||||
gap = ou35_over - ou25_over
|
||||
conflicts.append(f'OU35>{ou35_over:.0%} > OU25>{ou25_over:.0%} (imkansız)')
|
||||
total_conflict += gap * 2
|
||||
total_weight += 1
|
||||
|
||||
# HT_OU05 ve HT sonuç tutarlılığı
|
||||
ht_ou05_over = probs.get('ht_ou05_over', 0.5)
|
||||
ht_draw_prob = probs.get('ht_draw', 0.34)
|
||||
|
||||
# İlk yarıda gol bekleniyor ama beraberlik de bekleniyor (0-0 draw?)
|
||||
# HT_OU05 >%70 ama HT draw >%50 → çelişkili (0-0 berabere çok?)
|
||||
if ht_ou05_over > 0.70 and ht_draw_prob > 0.50:
|
||||
conflict = min(ht_ou05_over - 0.5, ht_draw_prob - 0.4)
|
||||
conflicts.append(f'HT_OU05>{ht_ou05_over:.0%} ama HT_Draw>{ht_draw_prob:.0%}')
|
||||
total_conflict += conflict
|
||||
total_weight += 1
|
||||
|
||||
# HT_OU05 ve HT_OU15 tutarlılığı
|
||||
ht_ou15_over = probs.get('ht_ou15_over', 0.3)
|
||||
if ht_ou15_over > ht_ou05_over + 0.05:
|
||||
gap = ht_ou15_over - ht_ou05_over
|
||||
conflicts.append(f'HT_OU15>{ht_ou15_over:.0%} > HT_OU05>{ht_ou05_over:.0%} (imkansız)')
|
||||
total_conflict += gap * 2
|
||||
total_weight += 1
|
||||
|
||||
# MS ve OU tutarlılığı
|
||||
ms_home = probs.get('ms_home', 0.33)
|
||||
ms_away = probs.get('ms_away', 0.33)
|
||||
btts_yes = probs.get('btts_yes', 0.5)
|
||||
|
||||
# Tek takım galibiyeti kuvvetli ama BTTS yüksek → şüpheli
|
||||
dominant = max(ms_home, ms_away)
|
||||
if dominant > 0.65 and btts_yes > 0.65:
|
||||
conflict = (dominant - 0.5) * (btts_yes - 0.5)
|
||||
conflicts.append(f'MS dominant>{dominant:.0%} ama BTTS_Yes>{btts_yes:.0%}')
|
||||
total_conflict += conflict * 0.5
|
||||
total_weight += 1
|
||||
|
||||
# OU25 ve BTTS tutarlılığı
|
||||
# BTTS yüksekse en az 2 gol → OU25 üst de yüksek olmalı
|
||||
if btts_yes > 0.65 and ou25_over < 0.45:
|
||||
conflict = btts_yes - ou25_over
|
||||
conflicts.append(f'BTTS_Yes>{btts_yes:.0%} ama OU25>{ou25_over:.0%} düşük')
|
||||
total_conflict += conflict
|
||||
total_weight += 1
|
||||
|
||||
# OU35 üst yüksek ama BTTS düşük → şüpheli (3+ gol ama tek takım mı?)
|
||||
if ou35_over > 0.45 and btts_yes < 0.40:
|
||||
conflict = (ou35_over - 0.35) * (0.5 - btts_yes)
|
||||
conflicts.append(f'OU35>{ou35_over:.0%} ama BTTS_Yes<{btts_yes:.0%}')
|
||||
total_conflict += conflict
|
||||
total_weight += 1
|
||||
|
||||
score = min(1.0, total_conflict / max(total_weight * 0.3, 0.1))
|
||||
return score, conflicts
|
||||
|
||||
|
||||
def main():
|
||||
print('Loading data...')
|
||||
df = pd.read_csv(DATA_PATH, low_memory=False)
|
||||
|
||||
# Son %20 = test seti (kronolojik)
|
||||
df = df.sort_values('mst_utc')
|
||||
n_test = int(len(df) * 0.20)
|
||||
df_test = df.tail(n_test).copy()
|
||||
print(f'Test seti: {len(df_test):,} maç')
|
||||
|
||||
feature_cols = [c for c in df.columns if c not in SKIP_COLS]
|
||||
|
||||
# Modelleri yükle
|
||||
print('Modeller yükleniyor...')
|
||||
models = {
|
||||
'ms': (load_model('ms'), 3),
|
||||
'ou15': (load_model('ou15'), 2),
|
||||
'ou25': (load_model('ou25'), 2),
|
||||
'ou35': (load_model('ou35'), 2),
|
||||
'btts': (load_model('btts'), 2),
|
||||
'ht_result':(load_model('ht_result'), 3),
|
||||
'ht_ou05': (load_model('ht_ou05'), 2),
|
||||
'ht_ou15': (load_model('ht_ou15'), 2),
|
||||
}
|
||||
models = {k: v for k, v in models.items() if v[0] is not None}
|
||||
print(f'Yüklenen model: {list(models.keys())}')
|
||||
|
||||
X = df_test[feature_cols].fillna(0).values
|
||||
|
||||
# Tüm tahminleri al
|
||||
print('Tahminler yapılıyor...')
|
||||
preds = {}
|
||||
for mkey, (model, n_class) in models.items():
|
||||
p = predict_proba(model, X, feature_cols, n_class)
|
||||
preds[mkey] = p
|
||||
|
||||
# Her maç için tutarsızlık skoru ve tahmin kararı
|
||||
results = []
|
||||
for i in range(len(df_test)):
|
||||
row = df_test.iloc[i]
|
||||
|
||||
# Olasılıkları topla
|
||||
probs = {}
|
||||
if 'ms' in preds:
|
||||
probs['ms_home'] = preds['ms'][i][0]
|
||||
probs['ms_draw'] = preds['ms'][i][1]
|
||||
probs['ms_away'] = preds['ms'][i][2]
|
||||
if 'ou15' in preds:
|
||||
probs['ou15_over'] = preds['ou15'][i][1]
|
||||
if 'ou25' in preds:
|
||||
probs['ou25_over'] = preds['ou25'][i][1]
|
||||
if 'ou35' in preds:
|
||||
probs['ou35_over'] = preds['ou35'][i][1]
|
||||
if 'btts' in preds:
|
||||
probs['btts_yes'] = preds['btts'][i][1]
|
||||
if 'ht_result' in preds:
|
||||
probs['ht_home'] = preds['ht_result'][i][0]
|
||||
probs['ht_draw'] = preds['ht_result'][i][1]
|
||||
probs['ht_away'] = preds['ht_result'][i][2]
|
||||
if 'ht_ou05' in preds:
|
||||
probs['ht_ou05_over'] = preds['ht_ou05'][i][1]
|
||||
if 'ht_ou15' in preds:
|
||||
probs['ht_ou15_over'] = preds['ht_ou15'][i][1]
|
||||
|
||||
c_score, conflicts = consistency_score(probs)
|
||||
|
||||
# Gerçek sonuçlar
|
||||
actual = {
|
||||
'ms': int(row.get('label_ms', -1)),
|
||||
'ou15': int(row.get('label_ou15', -1)),
|
||||
'ou25': int(row.get('label_ou25', -1)),
|
||||
'ou35': int(row.get('label_ou35', -1)),
|
||||
'btts': int(row.get('label_btts', -1)),
|
||||
}
|
||||
|
||||
# Her market için tahmin ve doğruluk
|
||||
market_results = {}
|
||||
for mkt, label_key in [('ms','ms'),('ou15','ou15'),('ou25','ou25'),
|
||||
('ou35','ou35'),('btts','btts')]:
|
||||
if mkt not in preds or actual[label_key] < 0:
|
||||
continue
|
||||
pred_class = int(np.argmax(preds[mkt][i]))
|
||||
correct = int(pred_class == actual[label_key])
|
||||
|
||||
# Odds (implied prob → odds = 1/prob)
|
||||
pred_prob = float(preds[mkt][i][pred_class])
|
||||
implied_odds = 1 / pred_prob if pred_prob > 0.01 else 10.0
|
||||
# ROI hesabı: 1 birim bahis, kazanırsa (odds-1) kazanç, kaybederse -1
|
||||
roi = (implied_odds - 1) * correct - (1 - correct)
|
||||
|
||||
market_results[mkt] = {
|
||||
'pred': pred_class,
|
||||
'actual': actual[label_key],
|
||||
'correct': correct,
|
||||
'prob': pred_prob,
|
||||
'roi': roi,
|
||||
}
|
||||
|
||||
results.append({
|
||||
'idx': i,
|
||||
'consistency_score': c_score,
|
||||
'conflicts': conflicts,
|
||||
'probs': probs,
|
||||
'market_results': market_results,
|
||||
})
|
||||
|
||||
df_results = pd.DataFrame([{
|
||||
'consistency_score': r['consistency_score'],
|
||||
'n_conflicts': len(r['conflicts']),
|
||||
**{f'{m}_correct': r['market_results'].get(m, {}).get('correct', None)
|
||||
for m in ['ms','ou15','ou25','ou35','btts']},
|
||||
**{f'{m}_roi': r['market_results'].get(m, {}).get('roi', None)
|
||||
for m in ['ms','ou15','ou25','ou35','btts']},
|
||||
} for r in results])
|
||||
|
||||
# ── Analiz ──────────────────────────────────────────────────────────
|
||||
print(f'\n{"="*70}')
|
||||
print('TUTARSIZLIK ANALİZİ')
|
||||
print(f'{"="*70}')
|
||||
|
||||
thresholds = [0.0, 0.1, 0.2, 0.3, 0.5]
|
||||
markets = ['ms', 'ou15', 'ou25', 'ou35', 'btts']
|
||||
|
||||
for t in thresholds:
|
||||
mask = df_results['consistency_score'] <= t
|
||||
n = mask.sum()
|
||||
if n < 50:
|
||||
continue
|
||||
|
||||
print(f'\n[Tutarsızlık <= {t:.1f}] → {n:,} maç ({n/len(df_results)*100:.0f}%)')
|
||||
print(f' {"Market":<8} {"HitRate":>8} {"ROI/bahis":>10} {"Toplam ROI":>12}')
|
||||
print(f' {"-"*42}')
|
||||
for m in markets:
|
||||
col_c = f'{m}_correct'
|
||||
col_r = f'{m}_roi'
|
||||
if col_c not in df_results.columns:
|
||||
continue
|
||||
sub = df_results[mask][col_c].dropna()
|
||||
roi_sub = df_results[mask][col_r].dropna()
|
||||
if len(sub) < 20:
|
||||
continue
|
||||
hit = sub.mean()
|
||||
avg_roi = roi_sub.mean()
|
||||
total_roi = roi_sub.sum()
|
||||
print(f' {m:<8} {hit:>7.1%} {avg_roi:>+9.3f} {total_roi:>+11.1f}')
|
||||
|
||||
# Çelişki türlerine göre breakdown
|
||||
print(f'\n{"="*70}')
|
||||
print('EN SIK ÇELIŞKILER')
|
||||
print(f'{"="*70}')
|
||||
all_conflicts = [c for r in results for c in r['conflicts']]
|
||||
from collections import Counter
|
||||
for conflict, cnt in Counter(all_conflicts).most_common(10):
|
||||
print(f' {cnt:>5}x {conflict}')
|
||||
|
||||
# Tutarsızlık dağılımı
|
||||
print(f'\n{"="*70}')
|
||||
print('TUTARSIZLIK DAĞILIMI')
|
||||
print(f'{"="*70}')
|
||||
for label, lo, hi in [
|
||||
('Tamamen tutarlı', 0.0, 0.05),
|
||||
('Çok tutarlı', 0.05, 0.15),
|
||||
('Orta', 0.15, 0.30),
|
||||
('Tutarsız', 0.30, 0.50),
|
||||
('Çok tutarsız', 0.50, 1.01),
|
||||
]:
|
||||
mask = (df_results['consistency_score'] >= lo) & (df_results['consistency_score'] < hi)
|
||||
n = mask.sum()
|
||||
ou25_hit = df_results[mask]['ou25_correct'].mean()
|
||||
ms_hit = df_results[mask]['ms_correct'].mean()
|
||||
print(f' {label:<20} {n:>6,} maç ({n/len(df_results)*100:>4.0f}%) | '
|
||||
f'MS={ms_hit:.0%} OU25={ou25_hit:.0%}')
|
||||
|
||||
# Raporu kaydet
|
||||
report = {
|
||||
'total_test': len(df_results),
|
||||
'thresholds': {},
|
||||
}
|
||||
for t in thresholds:
|
||||
mask = df_results['consistency_score'] <= t
|
||||
n = mask.sum()
|
||||
report['thresholds'][str(t)] = {
|
||||
'n_matches': int(n),
|
||||
'pct': round(n/len(df_results)*100, 1),
|
||||
'markets': {},
|
||||
}
|
||||
for m in markets:
|
||||
col_c = f'{m}_correct'
|
||||
col_r = f'{m}_roi'
|
||||
if col_c not in df_results.columns:
|
||||
continue
|
||||
sub_c = df_results[mask][col_c].dropna()
|
||||
sub_r = df_results[mask][col_r].dropna()
|
||||
if len(sub_c) > 0:
|
||||
report['thresholds'][str(t)]['markets'][m] = {
|
||||
'hit_rate': round(float(sub_c.mean()), 4),
|
||||
'avg_roi': round(float(sub_r.mean()), 4),
|
||||
'total_roi': round(float(sub_r.sum()), 2),
|
||||
}
|
||||
|
||||
out_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
|
||||
'reports', 'backtest_consistency.json')
|
||||
with open(out_path, 'w') as f:
|
||||
json.dump(report, f, indent=2)
|
||||
print(f'\nRapor: {out_path}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -0,0 +1,310 @@
|
||||
"""
|
||||
League Model Backtest — Son 100+ Maç
|
||||
======================================
|
||||
Her lig için en son 100-200 maçı (eğitim datasından bağımsız, test seti)
|
||||
lig bazlı modelle tahmin eder ve gerçek sonuçla karşılaştırır.
|
||||
|
||||
Usage:
|
||||
python scripts/backtest_league_models.py
|
||||
python scripts/backtest_league_models.py --min-matches 150
|
||||
"""
|
||||
|
||||
import os, sys, json, warnings, argparse
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import xgboost as xgb
|
||||
from sklearn.metrics import accuracy_score
|
||||
|
||||
warnings.filterwarnings("ignore")
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from models.league_model import get_league_model_loader, MARKET_META, FILE_TO_SIGNAL
|
||||
|
||||
AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
DATA_PATH = os.path.join(AI_ENGINE_DIR, "data", "training_data.csv")
|
||||
REPORTS_DIR = os.path.join(AI_ENGINE_DIR, "reports")
|
||||
QL_PATH = os.path.join(os.path.dirname(AI_ENGINE_DIR), "qualified_leagues.json")
|
||||
|
||||
# Gerçek label kolonları (CSV'den)
|
||||
LABEL_COLS = {
|
||||
"MS": "label_ms",
|
||||
"OU15": "label_ou15",
|
||||
"OU25": "label_ou25",
|
||||
"OU35": "label_ou35",
|
||||
"BTTS": "label_btts",
|
||||
"HT": "label_ht_result",
|
||||
"HT_OU05": "label_ht_ou05",
|
||||
"HT_OU15": "label_ht_ou15",
|
||||
"HTFT": "label_ht_ft",
|
||||
"OE": "label_odd_even",
|
||||
"CARDS": "label_cards_ou45",
|
||||
"HCAP": "label_handicap_ms",
|
||||
}
|
||||
|
||||
# Model dosya adı → signal key eşlemesi
|
||||
SIGNAL_TO_FILE = {v: k for k, v in FILE_TO_SIGNAL.items()}
|
||||
|
||||
SKIP_COLS = {
|
||||
"match_id","home_team_id","away_team_id","league_id","mst_utc",
|
||||
"score_home","score_away","total_goals","ht_score_home","ht_score_away","ht_total_goals",
|
||||
"label_ms","label_ou05","label_ou15","label_ou25","label_ou35","label_btts",
|
||||
"label_ht_result","label_ht_ou05","label_ht_ou15","label_ht_ft",
|
||||
"label_odd_even","label_yellow_cards","label_cards_ou45","label_handicap_ms",
|
||||
}
|
||||
|
||||
|
||||
def backtest_league(
|
||||
league_id: str,
|
||||
df_league: pd.DataFrame,
|
||||
feature_cols: list,
|
||||
league_model,
|
||||
n_test: int,
|
||||
) -> dict:
|
||||
"""Son n_test maçı backtest et, her market için doğruluk döndür."""
|
||||
df_sorted = df_league.sort_values("mst_utc")
|
||||
df_test = df_sorted.tail(n_test)
|
||||
|
||||
X = df_test[feature_cols].fillna(0)
|
||||
results = {}
|
||||
|
||||
for sig_key, mfile_key in SIGNAL_TO_FILE.items():
|
||||
label_col = LABEL_COLS.get(sig_key)
|
||||
if not label_col or label_col not in df_test.columns:
|
||||
continue
|
||||
|
||||
y_true = df_test[label_col].dropna().values
|
||||
if len(y_true) < 30:
|
||||
continue
|
||||
|
||||
# League-specific model varsa kullan
|
||||
if league_model and league_model.has_market(mfile_key):
|
||||
probs_list = []
|
||||
preds = []
|
||||
for _, row in df_test.iterrows():
|
||||
feat = row[feature_cols].fillna(0).to_dict()
|
||||
probs = league_model.predict_market(mfile_key, feat)
|
||||
if probs:
|
||||
best = max(probs, key=probs.__getitem__)
|
||||
meta = MARKET_META[mfile_key]
|
||||
labels = meta[1]
|
||||
pred_idx = labels.index(best)
|
||||
preds.append(pred_idx)
|
||||
probs_list.append(list(probs.values()))
|
||||
|
||||
if not preds:
|
||||
continue
|
||||
|
||||
y_valid = df_test[label_col].dropna()
|
||||
if len(preds) != len(y_valid):
|
||||
min_len = min(len(preds), len(y_valid))
|
||||
preds = preds[:min_len]
|
||||
y_valid = y_valid.values[:min_len]
|
||||
else:
|
||||
y_valid = y_valid.values
|
||||
|
||||
acc = accuracy_score(y_valid, preds)
|
||||
results[sig_key] = {
|
||||
"accuracy": round(acc, 4),
|
||||
"n": len(preds),
|
||||
"source": "league_specific",
|
||||
}
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def backtest_with_general_v25(
|
||||
df_test: pd.DataFrame,
|
||||
feature_cols: list,
|
||||
) -> dict:
|
||||
"""Genel V25 modeli ile backtest."""
|
||||
try:
|
||||
from models.v25_ensemble import get_v25_predictor
|
||||
v25 = get_v25_predictor()
|
||||
if not v25._loaded:
|
||||
v25.load_models()
|
||||
except Exception as e:
|
||||
return {}
|
||||
|
||||
X = df_test[feature_cols].fillna(0)
|
||||
results = {}
|
||||
|
||||
mkey_map = {
|
||||
"MS": ("ms", {"1": 0, "X": 1, "2": 2}),
|
||||
"OU15": ("ou15", {"Over": 0, "Under": 1}),
|
||||
"OU25": ("ou25", {"Over": 0, "Under": 1}),
|
||||
"OU35": ("ou35", {"Over": 0, "Under": 1}),
|
||||
"BTTS": ("btts", {"Yes": 0, "No": 1}),
|
||||
}
|
||||
|
||||
for sig_key, (mkey, label_to_idx) in mkey_map.items():
|
||||
label_col = LABEL_COLS.get(sig_key)
|
||||
if not label_col or label_col not in df_test.columns:
|
||||
continue
|
||||
y_true = df_test[label_col].dropna().values
|
||||
if len(y_true) < 30 or not v25.has_market(mkey):
|
||||
continue
|
||||
|
||||
try:
|
||||
dmat = xgb.DMatrix(X.values, feature_names=feature_cols)
|
||||
models_v25 = v25.models.get(mkey, {})
|
||||
if "xgb" not in models_v25:
|
||||
continue
|
||||
raw = models_v25["xgb"].predict(dmat)
|
||||
num_class = list(MARKET_META.get(mkey, (2,)))[0]
|
||||
|
||||
if num_class > 2:
|
||||
raw = raw.reshape(-1, num_class)
|
||||
preds = np.argmax(raw, axis=1)
|
||||
else:
|
||||
preds = (raw >= 0.5).astype(int)
|
||||
|
||||
acc = accuracy_score(y_true, preds)
|
||||
results[sig_key] = {
|
||||
"accuracy": round(acc, 4),
|
||||
"n": len(preds),
|
||||
"source": "general_v25",
|
||||
}
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--min-matches", type=int, default=100)
|
||||
parser.add_argument("--test-size", type=int, default=150,
|
||||
help="Son kaç maçı test için kullan (min 100)")
|
||||
args = parser.parse_args()
|
||||
n_test = max(args.min_matches, args.test_size)
|
||||
|
||||
print(f"Loading training data ...")
|
||||
df = pd.read_csv(DATA_PATH, low_memory=False)
|
||||
feature_cols = [c for c in df.columns if c not in SKIP_COLS]
|
||||
print(f" {len(df):,} maç | {len(feature_cols)} feature")
|
||||
|
||||
qualified = json.load(open(QL_PATH)) if os.path.exists(QL_PATH) else []
|
||||
loader = get_league_model_loader()
|
||||
|
||||
try:
|
||||
import psycopg2
|
||||
from data.db import get_clean_dsn
|
||||
conn = psycopg2.connect(get_clean_dsn())
|
||||
cur = conn.cursor()
|
||||
cur.execute("SELECT id, name FROM leagues WHERE id = ANY(%s)", (qualified,))
|
||||
league_names = {r[0]: r[1] for r in cur.fetchall()}
|
||||
conn.close()
|
||||
except Exception:
|
||||
league_names = {}
|
||||
|
||||
counts = df[df["league_id"].isin(qualified)].groupby("league_id").size()
|
||||
leagues_to_test = counts[counts >= n_test].index.tolist()
|
||||
print(f"\nBacktest: {len(leagues_to_test)} lig (>={n_test} maç) | son {n_test} maç kullanılacak\n")
|
||||
|
||||
all_results = []
|
||||
markets_order = ["MS", "OU15", "OU25", "OU35", "BTTS", "HT", "HT_OU05", "HT_OU15", "HTFT", "OE", "CARDS", "HCAP"]
|
||||
|
||||
header = f"{'Liga':<35} {'Maç':>5} | " + " | ".join(f"{m:>7}" for m in markets_order)
|
||||
print(header)
|
||||
print("-" * len(header))
|
||||
|
||||
for league_id in leagues_to_test:
|
||||
df_league = df[df["league_id"] == league_id].copy()
|
||||
name = league_names.get(league_id, league_id[:20])
|
||||
|
||||
league_model = loader.get(league_id)
|
||||
|
||||
if league_model and league_model.models:
|
||||
# Batch predict from CSV features (fast)
|
||||
df_test = df_league.sort_values("mst_utc").tail(n_test)
|
||||
X = df_test[feature_cols].fillna(0)
|
||||
mkt_results = {}
|
||||
|
||||
for mfile_key in list(league_model.models.keys()):
|
||||
sig_key = FILE_TO_SIGNAL.get(mfile_key)
|
||||
if not sig_key:
|
||||
continue
|
||||
label_col = LABEL_COLS.get(sig_key)
|
||||
if not label_col or label_col not in df_test.columns:
|
||||
continue
|
||||
y_true = df_test[label_col].dropna().values
|
||||
if len(y_true) < 30:
|
||||
continue
|
||||
|
||||
try:
|
||||
dmat = xgb.DMatrix(X.values, feature_names=feature_cols)
|
||||
raw = league_model.models[mfile_key].predict(dmat)
|
||||
nc = MARKET_META[mfile_key][0]
|
||||
if nc > 2:
|
||||
preds = np.argmax(raw.reshape(-1, nc), axis=1)
|
||||
else:
|
||||
preds = (raw >= 0.5).astype(int)
|
||||
|
||||
acc = accuracy_score(y_true[:len(preds)], preds[:len(y_true)])
|
||||
mkt_results[sig_key] = {"accuracy": round(float(acc), 4), "n": len(preds), "source": "league_xgb"}
|
||||
except Exception as e:
|
||||
mkt_results[sig_key] = {"error": str(e)}
|
||||
|
||||
# Fill missing markets with general V25
|
||||
missing_mkts_df = df_league.sort_values("mst_utc").tail(n_test)
|
||||
gen_results = backtest_with_general_v25(missing_mkts_df, feature_cols)
|
||||
for k, v in gen_results.items():
|
||||
if k not in mkt_results:
|
||||
mkt_results[k] = {**v, "source": "general_v25_fallback"}
|
||||
else:
|
||||
# No league model — use general V25
|
||||
df_test = df_league.sort_values("mst_utc").tail(n_test)
|
||||
mkt_results = backtest_with_general_v25(df_test, feature_cols)
|
||||
for k in mkt_results:
|
||||
mkt_results[k]["source"] = "general_v25"
|
||||
|
||||
n_used = min(n_test, len(df_league))
|
||||
|
||||
# Print row
|
||||
accs = []
|
||||
for m in markets_order:
|
||||
r = mkt_results.get(m, {})
|
||||
if "accuracy" in r:
|
||||
accs.append(f"{r['accuracy']*100:>6.1f}%")
|
||||
else:
|
||||
accs.append(f"{'—':>7}")
|
||||
print(f"{name:<35} {n_used:>5} | " + " | ".join(accs))
|
||||
|
||||
all_results.append({
|
||||
"league_id": league_id,
|
||||
"league_name": name,
|
||||
"n_tested": n_used,
|
||||
"markets": mkt_results,
|
||||
})
|
||||
|
||||
# ── Özet ──────────────────────────────────────────────────────
|
||||
print("\n" + "=" * len(header))
|
||||
print("ORTALAMA DOĞRULUK (tüm ligler):")
|
||||
for m in markets_order:
|
||||
accs = [r["markets"][m]["accuracy"] for r in all_results if m in r["markets"] and "accuracy" in r["markets"][m]]
|
||||
if accs:
|
||||
print(f" {m:<10}: {np.mean(accs)*100:.1f}% (min={min(accs)*100:.1f}% max={max(accs)*100:.1f}% n_leagues={len(accs)})")
|
||||
|
||||
# En iyi / en kötü MS ligleri
|
||||
ms_sorted = sorted(
|
||||
[(r["league_name"], r["markets"].get("MS",{}).get("accuracy",0), r["n_tested"])
|
||||
for r in all_results if "MS" in r["markets"] and "accuracy" in r["markets"]["MS"]],
|
||||
key=lambda x: x[1], reverse=True
|
||||
)
|
||||
print("\nEN İYİ MS (Top 10):")
|
||||
for name, acc, n in ms_sorted[:10]:
|
||||
print(f" {name:<35} {acc*100:.1f}% ({n} maç)")
|
||||
print("\nEN KÖTÜ MS (Bottom 10):")
|
||||
for name, acc, n in ms_sorted[-10:]:
|
||||
print(f" {name:<35} {acc*100:.1f}% ({n} maç)")
|
||||
|
||||
# Save
|
||||
report = {"generated_at": pd.Timestamp.now().isoformat(), "n_test_per_league": n_test, "results": all_results}
|
||||
out_path = os.path.join(REPORTS_DIR, "backtest_league_results.json")
|
||||
with open(out_path, "w") as f:
|
||||
json.dump(report, f, indent=2)
|
||||
print(f"\nRapor: {out_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
+110
-197
@@ -1,223 +1,136 @@
|
||||
"""
|
||||
Real AI Engine Backtest Script
|
||||
==============================
|
||||
Uses the ACTUAL models (V20/V25 Ensemble) to predict historical matches.
|
||||
|
||||
Usage:
|
||||
python ai-engine/scripts/backtest_real.py
|
||||
Gerçek Odds Bazlı Backtest
|
||||
============================
|
||||
Model olasılığı vs gerçek bookmaker odds karşılaştırır.
|
||||
Edge varsa bahis açıldığı varsayılır, gerçek ROI hesaplanır.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import time
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
from datetime import datetime
|
||||
import os, sys, json
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import xgboost as xgb
|
||||
|
||||
# Add paths
|
||||
AI_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
ROOT_DIR = os.path.dirname(AI_DIR)
|
||||
sys.path.insert(0, ROOT_DIR)
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
# Fix for Windows path issues in scripts
|
||||
if "scripts" in os.path.basename(AI_DIR):
|
||||
ROOT_DIR = os.path.dirname(ROOT_DIR) # One level up if inside scripts folder
|
||||
DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data', 'training_data.csv')
|
||||
MODELS_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'models', 'v25')
|
||||
REPORT_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'reports')
|
||||
|
||||
from services.single_match_orchestrator import get_single_match_orchestrator, MatchData
|
||||
SKIP_COLS = {
|
||||
'match_id','home_team_id','away_team_id','league_id','mst_utc',
|
||||
'score_home','score_away','total_goals','ht_score_home','ht_score_away','ht_total_goals',
|
||||
'label_ms','label_ou05','label_ou15','label_ou25','label_ou35','label_btts',
|
||||
'label_ht_result','label_ht_ou05','label_ht_ou15','label_ht_ft',
|
||||
'label_odd_even','label_yellow_cards','label_cards_ou45','label_handicap_ms',
|
||||
}
|
||||
|
||||
def get_clean_dsn() -> str:
|
||||
return "postgresql://suggestbet:SuGGesT2026SecuRe@localhost:15432/boilerplate_db"
|
||||
# (model_key, n_class, pred_class, label_col, odds_col, isim)
|
||||
MARKETS = [
|
||||
('ms', 3, 0, 'label_ms', 'odds_ms_h', 'MS-Ev'),
|
||||
('ms', 3, 1, 'label_ms', 'odds_ms_d', 'MS-Ber'),
|
||||
('ms', 3, 2, 'label_ms', 'odds_ms_a', 'MS-Dep'),
|
||||
('ou15', 2, 1, 'label_ou15', 'odds_ou15_o', 'OU15-Ust'),
|
||||
('ou15', 2, 0, 'label_ou15', 'odds_ou15_u', 'OU15-Alt'),
|
||||
('ou25', 2, 1, 'label_ou25', 'odds_ou25_o', 'OU25-Ust'),
|
||||
('ou25', 2, 0, 'label_ou25', 'odds_ou25_u', 'OU25-Alt'),
|
||||
('ou35', 2, 1, 'label_ou35', 'odds_ou35_o', 'OU35-Ust'),
|
||||
('ou35', 2, 0, 'label_ou35', 'odds_ou35_u', 'OU35-Alt'),
|
||||
('btts', 2, 1, 'label_btts', 'odds_btts_y', 'BTTS-Var'),
|
||||
('btts', 2, 0, 'label_btts', 'odds_btts_n', 'BTTS-Yok'),
|
||||
]
|
||||
|
||||
def run_backtest():
|
||||
print("🚀 REAL AI BACKTEST: Sept 13, 2024 - Top Leagues")
|
||||
print("🧠 Engine: V30 Ensemble (V20+V25)")
|
||||
print("="*60)
|
||||
MIN_ODDS = 1.10
|
||||
MAX_ODDS = 10.0
|
||||
|
||||
# Load Top Leagues
|
||||
leagues_path = os.path.join(ROOT_DIR, "top_leagues.json")
|
||||
try:
|
||||
with open(leagues_path, 'r') as f:
|
||||
top_leagues = json.load(f)
|
||||
league_ids = tuple(str(lid) for lid in top_leagues)
|
||||
print(f"📋 Loaded {len(top_leagues)} top leagues.")
|
||||
except Exception as e:
|
||||
print(f"❌ Error loading top_leagues.json: {e}")
|
||||
return
|
||||
|
||||
# Date Range (Sept 13, 2024)
|
||||
start_dt = datetime(2024, 9, 13, 0, 0, 0)
|
||||
end_dt = datetime(2024, 9, 13, 23, 59, 59)
|
||||
start_ts = int(start_dt.timestamp() * 1000)
|
||||
end_ts = int(end_dt.timestamp() * 1000)
|
||||
def load_model(market):
|
||||
path = os.path.join(MODELS_DIR, f'xgb_v25_{market}.json')
|
||||
if not os.path.exists(path):
|
||||
return None
|
||||
b = xgb.Booster()
|
||||
b.load_model(path)
|
||||
return b
|
||||
|
||||
dsn = get_clean_dsn()
|
||||
conn = psycopg2.connect(dsn)
|
||||
cur = conn.cursor(cursor_factory=RealDictCursor)
|
||||
|
||||
# Fetch Matches
|
||||
cur.execute("""
|
||||
SELECT m.id, m.match_name, m.home_team_id, m.away_team_id,
|
||||
m.mst_utc, m.league_id, m.status, m.score_home, m.score_away,
|
||||
t1.name as home_team, t2.name as away_team,
|
||||
l.name as league_name
|
||||
FROM matches m
|
||||
LEFT JOIN teams t1 ON m.home_team_id = t1.id
|
||||
LEFT JOIN teams t2 ON m.away_team_id = t2.id
|
||||
LEFT JOIN leagues l ON m.league_id = l.id
|
||||
WHERE m.mst_utc BETWEEN %s AND %s
|
||||
AND m.league_id IN %s
|
||||
AND m.status = 'FT'
|
||||
ORDER BY m.mst_utc ASC
|
||||
LIMIT 20 -- Limit to 20 matches to avoid running for hours on a single backtest
|
||||
""", (start_ts, end_ts, league_ids))
|
||||
def main():
|
||||
print('Veri yukleniyor...')
|
||||
df = pd.read_csv(DATA_PATH, low_memory=False)
|
||||
df = df.sort_values('mst_utc')
|
||||
n_test = int(len(df) * 0.20)
|
||||
df_test = df.tail(n_test).copy().reset_index(drop=True)
|
||||
print(f'Test seti: {len(df_test):,} mac')
|
||||
|
||||
rows = cur.fetchall()
|
||||
print(f"📊 Found {len(rows)} finished matches. Starting AI Analysis...")
|
||||
feature_cols = [c for c in df.columns if c not in SKIP_COLS]
|
||||
X = df_test[feature_cols].fillna(0).values
|
||||
|
||||
if not rows:
|
||||
print("⚠️ No matches found for this date.")
|
||||
cur.close()
|
||||
conn.close()
|
||||
return
|
||||
# Modelleri yukle
|
||||
loaded = {}
|
||||
for mkey, n_class, *_ in MARKETS:
|
||||
if mkey not in loaded:
|
||||
m = load_model(mkey)
|
||||
if m:
|
||||
loaded[mkey] = (m, n_class)
|
||||
print(f'Modeller: {list(loaded.keys())}')
|
||||
|
||||
# Initialize AI Engine
|
||||
try:
|
||||
orchestrator = get_single_match_orchestrator()
|
||||
print("✅ AI Engine (SingleMatchOrchestrator) Loaded.")
|
||||
except Exception as e:
|
||||
print(f"❌ Failed to load AI Engine: {e}")
|
||||
print("💡 Make sure models are trained/present in ai-engine/models/")
|
||||
cur.close()
|
||||
conn.close()
|
||||
return
|
||||
# Toplu tahmin
|
||||
raw_preds = {}
|
||||
for mkey, (model, n_class) in loaded.items():
|
||||
dmat = xgb.DMatrix(pd.DataFrame(X, columns=feature_cols))
|
||||
raw = model.predict(dmat)
|
||||
raw_preds[mkey] = raw.reshape(-1, n_class) if n_class > 2 else np.column_stack([1-raw, raw])
|
||||
|
||||
# ─── Backtest Loop ───
|
||||
total_matches_analyzed = 0
|
||||
bets_skipped = 0
|
||||
bets_played = 0
|
||||
bets_won = 0
|
||||
total_profit = 0.0
|
||||
# Backtest
|
||||
all_results = []
|
||||
print(f'\n{"Market":<12} {"Edge>=":>7} {"Bahis":>7} {"Hit%":>7} {"AvgOdds":>9} {"ROI/b":>8} {"Toplam":>10}')
|
||||
print('-' * 65)
|
||||
|
||||
# Thresholds matching the NEW Skip Logic
|
||||
MIN_CONF = 45.0
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
for i, row in enumerate(rows):
|
||||
match_id = str(row['id'])
|
||||
home_team = row['home_team']
|
||||
away_team = row['away_team']
|
||||
home_score = row['score_home']
|
||||
away_score = row['score_away']
|
||||
|
||||
print(f"\n[{i+1}/{len(rows)}] Analyzing: {home_team} vs {away_team} ...")
|
||||
|
||||
try:
|
||||
# 1. AI PREDICTION (Actual Model Call)
|
||||
prediction = orchestrator.analyze_match(match_id)
|
||||
|
||||
if not prediction:
|
||||
print(f" ⚠️ AI returned no prediction.")
|
||||
for mkey, n_class, pred_cls, label_col, odds_col, isim in MARKETS:
|
||||
if mkey not in raw_preds or label_col not in df_test.columns or odds_col not in df_test.columns:
|
||||
continue
|
||||
|
||||
total_matches_analyzed += 1
|
||||
mp = raw_preds[mkey][:, pred_cls]
|
||||
act = pd.to_numeric(df_test[label_col], errors='coerce').values
|
||||
bko = pd.to_numeric(df_test[odds_col], errors='coerce').values
|
||||
|
||||
# 2. Extract Main Pick
|
||||
main_pick = prediction.get("main_pick") or {}
|
||||
pick_name = main_pick.get("pick")
|
||||
confidence = main_pick.get("confidence", 0)
|
||||
odds = main_pick.get("odds", 0)
|
||||
valid = (~np.isnan(act) & ~np.isnan(bko) &
|
||||
(bko >= MIN_ODDS) & (bko <= MAX_ODDS))
|
||||
mp, act, bko = mp[valid], act[valid].astype(int), bko[valid]
|
||||
implied = 1.0 / bko
|
||||
edge = mp - implied
|
||||
|
||||
if not pick_name or not confidence:
|
||||
print(f" ⚠️ No main pick found in prediction.")
|
||||
print(f'\n{isim}:')
|
||||
for min_e in [0.02, 0.03, 0.05, 0.07, 0.10]:
|
||||
mask = edge >= min_e
|
||||
n = mask.sum()
|
||||
if n < 20:
|
||||
continue
|
||||
won = (act[mask] == pred_cls).astype(int)
|
||||
roi = (bko[mask] - 1) * won - (1 - won)
|
||||
hit = won.mean()
|
||||
avg_roi = roi.mean()
|
||||
total = roi.sum()
|
||||
avg_odds = bko[mask].mean()
|
||||
sign = '+' if total > 0 else ''
|
||||
print(f' edge>={min_e:+.0%} n={n:>5,} hit={hit:.1%} odds={avg_odds:.2f} roi/b={avg_roi:+.3f} toplam={sign}{total:.1f}')
|
||||
all_results.append({'market': isim, 'min_edge': min_e, 'n': n,
|
||||
'hit': round(hit, 4), 'avg_odds': round(avg_odds, 3),
|
||||
'avg_roi': round(avg_roi, 4), 'total_roi': round(total, 2)})
|
||||
|
||||
print(f" 🤖 Pick: {pick_name} | Conf: {confidence}% | Odds: {odds}")
|
||||
# En iyi
|
||||
winners = sorted([r for r in all_results if r['total_roi'] > 0],
|
||||
key=lambda x: x['avg_roi'], reverse=True)
|
||||
print(f'\n{"="*65}')
|
||||
print('KAZANCLI KOMBINASYONLAR (total_roi > 0):')
|
||||
print(f'{"="*65}')
|
||||
for r in winners[:20]:
|
||||
print(f' {r["market"]:<12} edge>={r["min_edge"]:+.0%} | n={r["n"]:>5,} | '
|
||||
f'hit={r["hit"]:.0%} | roi/b={r["avg_roi"]:+.3f} | toplam={r["total_roi"]:+.1f}')
|
||||
|
||||
# 3. Apply Skip Logic (New Backtest Logic)
|
||||
if confidence < MIN_CONF:
|
||||
print(f" 🚫 SKIPPED (Confidence {confidence}% < {MIN_CONF}%)")
|
||||
bets_skipped += 1
|
||||
continue
|
||||
os.makedirs(REPORT_DIR, exist_ok=True)
|
||||
with open(os.path.join(REPORT_DIR, 'backtest_real_odds.json'), 'w') as f:
|
||||
json.dump(all_results, f, indent=2)
|
||||
print(f'\nRapor kaydedildi.')
|
||||
|
||||
if odds > 0:
|
||||
implied_prob = 1.0 / odds
|
||||
my_prob = confidence / 100.0
|
||||
if my_prob - implied_prob < -0.03: # Negative edge
|
||||
print(f" 🚫 SKIPPED (Negative Edge)")
|
||||
bets_skipped += 1
|
||||
continue
|
||||
|
||||
# 4. Bet Played
|
||||
bets_played += 1
|
||||
print(f" 🎲 BET PLAYED: {pick_name} @ {odds}")
|
||||
|
||||
# 5. Resolve Bet
|
||||
won = False
|
||||
# Basic resolution logic (Need to parse pick_name like "1", "X", "2", "2.5 Üst", etc.)
|
||||
pick_clean = str(pick_name).upper()
|
||||
|
||||
# MS
|
||||
if pick_clean in ["1", "MS 1"] and home_score > away_score: won = True
|
||||
elif pick_clean in ["X", "MS X"] and home_score == away_score: won = True
|
||||
elif pick_clean in ["2", "MS 2"] and away_score > home_score: won = True
|
||||
|
||||
# OU25
|
||||
elif "ÜST" in pick_clean or "OVER" in pick_clean:
|
||||
if (home_score + away_score) > 2.5: won = True
|
||||
elif "ALT" in pick_clean or "UNDER" in pick_clean:
|
||||
if (home_score + away_score) < 2.5: won = True
|
||||
|
||||
# BTTS
|
||||
elif "VAR" in pick_clean and home_score > 0 and away_score > 0: won = True
|
||||
elif "YOK" in pick_clean and (home_score == 0 or away_score == 0): won = True
|
||||
|
||||
if won:
|
||||
bets_won += 1
|
||||
profit = odds - 1.0
|
||||
print(f" ✅ WON! (+{profit:.2f} units)")
|
||||
else:
|
||||
profit = -1.0
|
||||
print(f" ❌ LOST! (-1.00 units)")
|
||||
|
||||
total_profit += profit
|
||||
|
||||
except Exception as e:
|
||||
print(f" 💥 Error during analysis: {e}")
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
# ─── FINAL REPORT ───
|
||||
print("\n" + "="*60)
|
||||
print("📈 REAL AI BACKTEST RESULTS")
|
||||
print(f"🕒 Time taken: {elapsed:.1f} seconds")
|
||||
print("="*60)
|
||||
print(f"📊 Matches Analyzed: {total_matches_analyzed}")
|
||||
print(f"🚫 Bets SKIPPED: {bets_skipped}")
|
||||
print(f"✅ Bets PLAYED: {bets_played}")
|
||||
|
||||
if bets_played > 0:
|
||||
win_rate = (bets_won / bets_played) * 100
|
||||
roi = (total_profit / bets_played) * 100
|
||||
yield_val = total_profit # Net Units
|
||||
|
||||
print(f"🏆 Bets Won: {bets_won}")
|
||||
print(f"💀 Bets Lost: {bets_played - bets_won}")
|
||||
print("-" * 40)
|
||||
print(f" Win Rate: {win_rate:.2f}%")
|
||||
print(f"💰 Total Profit (Units): {total_profit:.2f}")
|
||||
print(f"📊 ROI: {roi:.2f}%")
|
||||
|
||||
if roi > 0:
|
||||
print("🟢 STRATEGY IS PROFITABLE!")
|
||||
else:
|
||||
print("🔴 STRATEGY IS LOSING")
|
||||
else:
|
||||
print("⚠️ No bets were played. All were skipped or failed.")
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_backtest()
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
@@ -129,6 +129,39 @@ FEATURE_COLS = [
|
||||
"home_avg_player_exp", "away_avg_player_exp",
|
||||
"home_goals_diversity", "away_goals_diversity",
|
||||
|
||||
# V27 H2H Expanded (4)
|
||||
"h2h_home_goals_avg", "h2h_away_goals_avg",
|
||||
"h2h_recent_trend", "h2h_venue_advantage",
|
||||
|
||||
# V27 Rolling Stats (13)
|
||||
"home_rolling5_goals", "home_rolling5_conceded",
|
||||
"home_rolling10_goals", "home_rolling10_conceded",
|
||||
"home_rolling20_goals", "home_rolling20_conceded",
|
||||
"away_rolling5_goals", "away_rolling5_conceded",
|
||||
"away_rolling10_goals", "away_rolling10_conceded",
|
||||
"home_rolling5_cs", "away_rolling5_cs",
|
||||
|
||||
# V27 Venue Stats (4)
|
||||
"home_venue_goals", "home_venue_conceded",
|
||||
"away_venue_goals", "away_venue_conceded",
|
||||
|
||||
# V27 Goal Trend (2)
|
||||
"home_goal_trend", "away_goal_trend",
|
||||
|
||||
# V27 Calendar (5)
|
||||
"home_days_rest", "away_days_rest",
|
||||
"match_month", "is_season_start", "is_season_end",
|
||||
|
||||
# V27 Interaction (6)
|
||||
"attack_vs_defense_home", "attack_vs_defense_away",
|
||||
"xg_diff", "form_momentum_interaction",
|
||||
"elo_form_consistency", "upset_x_elo_gap",
|
||||
|
||||
# V27 League Expanded (5)
|
||||
"league_home_win_rate", "league_draw_rate",
|
||||
"league_btts_rate", "league_ou25_rate",
|
||||
"league_reliability_score",
|
||||
|
||||
# Labels
|
||||
"score_home", "score_away", "total_goals",
|
||||
"ht_score_home", "ht_score_away", "ht_total_goals",
|
||||
@@ -296,6 +329,10 @@ class BatchDataLoader:
|
||||
SELECT league_id,
|
||||
AVG(score_home + score_away) as avg_goals,
|
||||
AVG(CASE WHEN score_home = 0 AND score_away = 0 THEN 1.0 ELSE 0.0 END) as zero_rate,
|
||||
AVG(CASE WHEN score_home > score_away THEN 1.0 ELSE 0.0 END) as home_win_rate,
|
||||
AVG(CASE WHEN score_home = score_away THEN 1.0 ELSE 0.0 END) as draw_rate,
|
||||
AVG(CASE WHEN score_home > 0 AND score_away > 0 THEN 1.0 ELSE 0.0 END) as btts_rate,
|
||||
AVG(CASE WHEN score_home + score_away > 2.5 THEN 1.0 ELSE 0.0 END) as ou25_rate,
|
||||
COUNT(*) as match_count
|
||||
FROM matches
|
||||
WHERE status = 'FT'
|
||||
@@ -305,11 +342,16 @@ class BatchDataLoader:
|
||||
GROUP BY league_id
|
||||
""", self.top_league_ids)
|
||||
|
||||
for league_id, avg_goals, zero_rate, cnt in self.cur.fetchall():
|
||||
for row in self.cur.fetchall():
|
||||
league_id, avg_goals, zero_rate, home_win_rate, draw_rate, btts_rate, ou25_rate, cnt = row
|
||||
self.league_stats_cache[league_id] = {
|
||||
"avg_goals": float(avg_goals) if avg_goals else 2.5,
|
||||
"zero_rate": float(zero_rate) if zero_rate else 0.07,
|
||||
"match_count": cnt
|
||||
"home_win_rate": float(home_win_rate) if home_win_rate else 0.45,
|
||||
"draw_rate": float(draw_rate) if draw_rate else 0.25,
|
||||
"btts_rate": float(btts_rate) if btts_rate else 0.50,
|
||||
"ou25_rate": float(ou25_rate) if ou25_rate else 0.50,
|
||||
"match_count": cnt,
|
||||
}
|
||||
|
||||
def _load_team_history(self):
|
||||
@@ -666,6 +708,9 @@ class FeatureExtractor:
|
||||
|
||||
print(f"\n🔄 Extracting features for {total} matches...", flush=True)
|
||||
|
||||
_last_print = t_start
|
||||
_PRINT_INTERVAL = 60 # her dakika bir ilerleme
|
||||
|
||||
# Process chronologically — ELO grows as we go
|
||||
for i, m in enumerate(matches):
|
||||
(
|
||||
@@ -683,17 +728,25 @@ class FeatureExtractor:
|
||||
league_name,
|
||||
) = m
|
||||
|
||||
if i % 100 == 0 and i > 0:
|
||||
elapsed = time.time() - t_start
|
||||
rate = i / elapsed # matches per second
|
||||
now = time.time()
|
||||
if now - _last_print >= _PRINT_INTERVAL and i > 0:
|
||||
elapsed = now - t_start
|
||||
rate = i / elapsed
|
||||
remaining = (total - i) / rate if rate > 0 else 0
|
||||
pct = i / total * 100
|
||||
eta_h = int(remaining // 3600)
|
||||
eta_m = int((remaining % 3600) // 60)
|
||||
eta_s = int(remaining % 60)
|
||||
eta_str = (f"{eta_h}s {eta_m}dk" if eta_h else f"{eta_m}dk {eta_s}s")
|
||||
print(
|
||||
f" [{i}/{total}] ({pct:.0f}%) | {rate:.1f} maç/s | "
|
||||
f"ETA: {remaining/60:.1f} dk | skipped: {skipped} | "
|
||||
f"dq_rejected: {dq_rejected}",
|
||||
f" ⏱ [{i:>6}/{total}] %{pct:>4.1f} | "
|
||||
f"{rate:.1f} maç/s | "
|
||||
f"bitti: {len(rows):,} | "
|
||||
f"atlanan: {skipped+dq_rejected} | "
|
||||
f"ETA: {eta_str}",
|
||||
flush=True,
|
||||
)
|
||||
_last_print = now
|
||||
|
||||
row = self._extract_one(
|
||||
mid, hid, aid, sh, sa, hth, hta, mst, lid,
|
||||
@@ -882,7 +935,10 @@ class FeatureExtractor:
|
||||
}
|
||||
|
||||
# === LEAGUE FEATURES ===
|
||||
league = self.loader.league_stats_cache.get(lid, {"avg_goals": 2.5, "zero_rate": 0.07})
|
||||
league = self.loader.league_stats_cache.get(lid, {
|
||||
"avg_goals": 2.5, "zero_rate": 0.07, "home_win_rate": 0.45,
|
||||
"draw_rate": 0.25, "btts_rate": 0.50, "ou25_rate": 0.50, "match_count": 0,
|
||||
})
|
||||
league_features = {
|
||||
"league_avg_goals": league["avg_goals"],
|
||||
"league_zero_goal_rate": league["zero_rate"],
|
||||
@@ -953,6 +1009,11 @@ class FeatureExtractor:
|
||||
home_goals_form = home_sq.get('goals_form', 0)
|
||||
away_goals_form = away_sq.get('goals_form', 0)
|
||||
|
||||
# === V27 ROLLING / VENUE / CALENDAR FEATURES ===
|
||||
v27 = self._compute_v27_features(hid, aid, mst, elo_features, form_features,
|
||||
home_momentum_score, away_momentum_score,
|
||||
upset_feats, h2h_features, league)
|
||||
|
||||
# === ASSEMBLE ROW ===
|
||||
row = {
|
||||
"match_id": mid,
|
||||
@@ -1008,6 +1069,9 @@ class FeatureExtractor:
|
||||
"home_goals_diversity": home_sq.get('goals_diversity', 0.0),
|
||||
"away_goals_diversity": away_sq.get('goals_diversity', 0.0),
|
||||
|
||||
# V27 Features
|
||||
**v27,
|
||||
|
||||
# Labels
|
||||
"score_home": sh,
|
||||
"score_away": sa,
|
||||
@@ -1033,6 +1097,103 @@ class FeatureExtractor:
|
||||
|
||||
return row
|
||||
|
||||
def _compute_v27_features(self, hid, aid, mst, elo_features, form_features,
|
||||
home_momentum, away_momentum, upset_feats, h2h_features, league):
|
||||
"""Compute V27 rolling, venue, calendar, interaction features from pre-loaded data."""
|
||||
home_history = self.loader.team_matches.get(hid, [])
|
||||
away_history = self.loader.team_matches.get(aid, [])
|
||||
|
||||
def _rolling(history, n):
|
||||
recent = [m for m in history if m[0] < mst][-n:]
|
||||
if not recent:
|
||||
return 1.3, 1.1, 0.0
|
||||
goals = sum(m[2] for m in recent) / len(recent)
|
||||
conceded = sum(m[3] for m in recent) / len(recent)
|
||||
cs = sum(1 for m in recent if m[3] == 0) / len(recent)
|
||||
return round(goals, 3), round(conceded, 3), round(cs, 3)
|
||||
|
||||
def _venue(history, is_home):
|
||||
recent = [m for m in history if m[0] < mst and m[1] == is_home][-10:]
|
||||
if not recent:
|
||||
return 1.3, 1.1
|
||||
goals = sum(m[2] for m in recent) / len(recent)
|
||||
conceded = sum(m[3] for m in recent) / len(recent)
|
||||
return round(goals, 3), round(conceded, 3)
|
||||
|
||||
def _days_rest(history):
|
||||
prior = [m[0] for m in history if m[0] < mst]
|
||||
if not prior:
|
||||
return 7.0
|
||||
last = prior[-1]
|
||||
return round(min((mst - last) / 86400000.0, 30.0), 1)
|
||||
|
||||
h5g, h5c, h5cs = _rolling(home_history, 5)
|
||||
h10g, h10c, _ = _rolling(home_history, 10)
|
||||
h20g, h20c, _ = _rolling(home_history, 20)
|
||||
a5g, a5c, a5cs = _rolling(away_history, 5)
|
||||
a10g, a10c, _ = _rolling(away_history, 10)
|
||||
|
||||
hvg, hvc = _venue(home_history, True)
|
||||
avg, avc = _venue(away_history, False)
|
||||
|
||||
home_rest = _days_rest(home_history)
|
||||
away_rest = _days_rest(away_history)
|
||||
|
||||
import datetime
|
||||
match_dt = datetime.datetime.utcfromtimestamp(mst / 1000)
|
||||
match_month = match_dt.month
|
||||
|
||||
elo_diff = elo_features["elo_diff"]
|
||||
form_elo_diff = elo_features["form_elo_diff"]
|
||||
mom_diff = home_momentum - away_momentum
|
||||
home_conceded = form_features["home_conceded_avg"]
|
||||
away_conceded = form_features["away_conceded_avg"]
|
||||
home_goals = form_features["home_goals_avg"]
|
||||
away_goals = form_features["away_goals_avg"]
|
||||
upset_potential = upset_feats.get("upset_potential", 0.0)
|
||||
|
||||
h2h_prior = [m for m in home_history if m[0] < mst and m[4] == aid]
|
||||
h2h_home_goals_avg = sum(m[2] for m in h2h_prior) / len(h2h_prior) if h2h_prior else 1.3
|
||||
h2h_away_goals_avg = sum(m[3] for m in h2h_prior) / len(h2h_prior) if h2h_prior else 1.1
|
||||
recent_h2h = h2h_prior[-3:]
|
||||
h2h_recent_trend = sum(1 if m[2] > m[3] else -1 if m[2] < m[3] else 0 for m in recent_h2h) / max(len(recent_h2h), 1)
|
||||
venue_h2h = [m for m in h2h_prior if m[1]]
|
||||
h2h_venue_advantage = sum(1 if m[2] > m[3] else 0 for m in venue_h2h) / max(len(venue_h2h), 1) if venue_h2h else 0.5
|
||||
|
||||
league_count = league.get("match_count", 0)
|
||||
|
||||
return {
|
||||
"h2h_home_goals_avg": round(h2h_home_goals_avg, 3),
|
||||
"h2h_away_goals_avg": round(h2h_away_goals_avg, 3),
|
||||
"h2h_recent_trend": round(h2h_recent_trend, 3),
|
||||
"h2h_venue_advantage": round(h2h_venue_advantage, 3),
|
||||
"home_rolling5_goals": h5g, "home_rolling5_conceded": h5c,
|
||||
"home_rolling10_goals": h10g, "home_rolling10_conceded": h10c,
|
||||
"home_rolling20_goals": h20g, "home_rolling20_conceded": h20c,
|
||||
"away_rolling5_goals": a5g, "away_rolling5_conceded": a5c,
|
||||
"away_rolling10_goals": a10g, "away_rolling10_conceded": a10c,
|
||||
"home_rolling5_cs": h5cs, "away_rolling5_cs": a5cs,
|
||||
"home_venue_goals": hvg, "home_venue_conceded": hvc,
|
||||
"away_venue_goals": avg, "away_venue_conceded": avc,
|
||||
"home_goal_trend": round(h5g - h10g, 3),
|
||||
"away_goal_trend": round(a5g - a10g, 3),
|
||||
"home_days_rest": home_rest, "away_days_rest": away_rest,
|
||||
"match_month": float(match_month),
|
||||
"is_season_start": 1.0 if match_month in (7, 8, 9) else 0.0,
|
||||
"is_season_end": 1.0 if match_month in (5, 6) else 0.0,
|
||||
"attack_vs_defense_home": round(home_goals - away_conceded, 3),
|
||||
"attack_vs_defense_away": round(away_goals - home_conceded, 3),
|
||||
"xg_diff": round(home_conceded - away_conceded, 3),
|
||||
"form_momentum_interaction": round(mom_diff * form_elo_diff / 1000.0, 4),
|
||||
"elo_form_consistency": round(1.0 - abs(elo_diff - form_elo_diff) / max(abs(elo_diff), 100.0), 4),
|
||||
"upset_x_elo_gap": round(upset_potential * abs(elo_diff) / 500.0, 4),
|
||||
"league_home_win_rate": league.get("home_win_rate", 0.45),
|
||||
"league_draw_rate": league.get("draw_rate", 0.25),
|
||||
"league_btts_rate": league.get("btts_rate", 0.50),
|
||||
"league_ou25_rate": league.get("ou25_rate", 0.50),
|
||||
"league_reliability_score": min(1.0, league_count / 500.0) if league_count else 0.3,
|
||||
}
|
||||
|
||||
def _validate_row_quality(
|
||||
self,
|
||||
row: dict,
|
||||
|
||||
@@ -0,0 +1,166 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": ["# Training Data Extraction — Google Colab\n", "SSH tunnel ile sunucuya bağlanır, DB'den 270K+ maç çeker, Drive'a kaydeder.\n"]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 1. Gerekli paketler\n",
|
||||
"!pip install sshtunnel psycopg2-binary pandas numpy -q\n",
|
||||
"print('Paketler hazır')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 2. Drive bağla\n",
|
||||
"from google.colab import drive\n",
|
||||
"drive.mount('/content/drive')\n",
|
||||
"import os\n",
|
||||
"DRIVE_DIR = '/content/drive/MyDrive/iddaai'\n",
|
||||
"os.makedirs(DRIVE_DIR, exist_ok=True)\n",
|
||||
"print('Drive hazır:', DRIVE_DIR)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 3. SSH private key upload\n",
|
||||
"# Mac'te terminalde şunu çalıştır, çıktıyı kopyala:\n",
|
||||
"# cat ~/.ssh/id_ed25519\n",
|
||||
"# Aşağıya yapıştır (BEGIN ve END satırları dahil)\n",
|
||||
"\n",
|
||||
"SSH_PRIVATE_KEY = \"\"\"-----BEGIN OPENSSH PRIVATE KEY-----\n",
|
||||
"BURAYA_KEY_ICERIGINI_YAPISTIR\n",
|
||||
"-----END OPENSSH PRIVATE KEY-----\"\"\"\n",
|
||||
"\n",
|
||||
"# Key dosyasına yaz\n",
|
||||
"key_path = '/root/.ssh/id_ed25519'\n",
|
||||
"os.makedirs('/root/.ssh', exist_ok=True)\n",
|
||||
"with open(key_path, 'w') as f:\n",
|
||||
" f.write(SSH_PRIVATE_KEY.strip() + '\\n')\n",
|
||||
"os.chmod(key_path, 0o600)\n",
|
||||
"print('SSH key hazır')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 4. SSH Tunnel aç + DB bağlantısını test et\n",
|
||||
"from sshtunnel import SSHTunnelForwarder\n",
|
||||
"import psycopg2\n",
|
||||
"\n",
|
||||
"tunnel = SSHTunnelForwarder(\n",
|
||||
" ('95.70.252.214', 2222),\n",
|
||||
" ssh_username='haruncan',\n",
|
||||
" ssh_pkey=key_path,\n",
|
||||
" remote_bind_address=('localhost', 5432),\n",
|
||||
" local_bind_address=('localhost', 15432),\n",
|
||||
")\n",
|
||||
"tunnel.start()\n",
|
||||
"print(f'Tunnel açık: localhost:{tunnel.local_bind_port}')\n",
|
||||
"\n",
|
||||
"conn = psycopg2.connect(\n",
|
||||
" host='localhost',\n",
|
||||
" port=15432,\n",
|
||||
" dbname='iddaai_db',\n",
|
||||
" user='iddaai_user',\n",
|
||||
" password='IddaA1_S4crET!',\n",
|
||||
")\n",
|
||||
"cur = conn.cursor()\n",
|
||||
"cur.execute(\"SELECT COUNT(*) FROM matches WHERE status='FT' AND score_home IS NOT NULL\")\n",
|
||||
"print(f'DB bağlantısı OK — FT maç sayısı: {cur.fetchone()[0]:,}')\n",
|
||||
"conn.close()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 5. extract_training_data.py kodunu Drive'dan veya doğrudan çalıştır\n",
|
||||
"# Önce repo'yu Drive'a kopyala (yoksa)\n",
|
||||
"import subprocess\n",
|
||||
"\n",
|
||||
"REPO_DIR = f'{DRIVE_DIR}/ai-engine'\n",
|
||||
"SCRIPT = f'{REPO_DIR}/scripts/extract_training_data.py'\n",
|
||||
"\n",
|
||||
"if not os.path.exists(SCRIPT):\n",
|
||||
" print('Script bulunamadı — ai-engine klasörünü Drive a yükle:')\n",
|
||||
" print(' Yerel makinede: cp -r /Users/piton/Documents/GitHub/iddaai/iddaai-be/ai-engine ~/Google\\ Drive/MyDrive/iddaai/')\n",
|
||||
"else:\n",
|
||||
" print('Script hazır:', SCRIPT)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 6. Extraction'ı çalıştır\n",
|
||||
"import sys, os\n",
|
||||
"sys.path.insert(0, REPO_DIR)\n",
|
||||
"\n",
|
||||
"# DB URL'i tunnel üzerinden ayarla\n",
|
||||
"os.environ['DATABASE_URL'] = 'postgresql://iddaai_user:IddaA1_S4crET!@localhost:15432/iddaai_db'\n",
|
||||
"\n",
|
||||
"# Output CSV'yi Drive'a kaydet\n",
|
||||
"OUTPUT_CSV = f'{DRIVE_DIR}/training_data_full.csv'\n",
|
||||
"\n",
|
||||
"# Script'i import et ve main'i çalıştır\n",
|
||||
"import importlib.util\n",
|
||||
"spec = importlib.util.spec_from_file_location('extract', SCRIPT)\n",
|
||||
"mod = importlib.util.load_from_spec(spec)\n",
|
||||
"spec.loader.exec_module(mod)\n",
|
||||
"\n",
|
||||
"# OUTPUT_CSV'yi override et\n",
|
||||
"mod.OUTPUT_CSV = OUTPUT_CSV\n",
|
||||
"mod.TOP_LEAGUES_PATH = f'{DRIVE_DIR}/qualified_leagues.json'\n",
|
||||
"\n",
|
||||
"mod.main()\n",
|
||||
"print(f'\\nKaydedildi: {OUTPUT_CSV}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 7. Tunnel kapat\n",
|
||||
"tunnel.stop()\n",
|
||||
"print('Tunnel kapatıldı')\n",
|
||||
"\n",
|
||||
"# Dosya boyutunu kontrol et\n",
|
||||
"size_mb = os.path.getsize(OUTPUT_CSV) / 1024 / 1024\n",
|
||||
"import pandas as pd\n",
|
||||
"df = pd.read_csv(OUTPUT_CSV, nrows=5)\n",
|
||||
"print(f'CSV: {size_mb:.1f} MB')\n",
|
||||
"print(f'Kolonlar: {len(df.columns)}')"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"},
|
||||
"language_info": {"name": "python", "version": "3.10.0"}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -0,0 +1,806 @@
|
||||
"""
|
||||
V25 Backtest + Calibration Training Script
|
||||
==========================================
|
||||
Runs a full backtest on historical football matches, measures model accuracy
|
||||
by market / confidence band / league, and trains isotonic calibration models
|
||||
for MS, OU15, OU25, and BTTS markets.
|
||||
|
||||
Usage:
|
||||
venv/bin/python scripts/run_backtest_and_calibrate.py
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import pickle
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional, Tuple, Any
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Path setup — works whether executed from ai-engine/ or project root
|
||||
# ---------------------------------------------------------------------------
|
||||
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
AI_ENGINE_DIR = os.path.dirname(SCRIPT_DIR)
|
||||
sys.path.insert(0, AI_ENGINE_DIR)
|
||||
|
||||
from data.db import get_clean_dsn
|
||||
from models.calibration import Calibrator
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
QUALIFIED_LEAGUES_PATH = os.path.join(AI_ENGINE_DIR, "..", "qualified_leagues.json")
|
||||
CALIBRATION_DIR = os.path.join(AI_ENGINE_DIR, "models", "calibration")
|
||||
REPORTS_DIR = os.path.join(AI_ENGINE_DIR, "reports")
|
||||
MAX_MATCHES = 3000 # target upper bound
|
||||
PROGRESS_INTERVAL = 100 # print every N matches
|
||||
|
||||
os.makedirs(CALIBRATION_DIR, exist_ok=True)
|
||||
os.makedirs(REPORTS_DIR, exist_ok=True)
|
||||
|
||||
# Mapping: Turkish category name -> internal feature key
|
||||
ODDS_CATEGORY_MAP = {
|
||||
"Maç Sonucu": {
|
||||
"1": "odds_ms_h",
|
||||
"X": "odds_ms_d",
|
||||
"2": "odds_ms_a",
|
||||
},
|
||||
"1,5 Alt/Üst": {
|
||||
"Üst": "odds_ou15_o",
|
||||
"Alt": "odds_ou15_u",
|
||||
},
|
||||
"2,5 Alt/Üst": {
|
||||
"Üst": "odds_ou25_o",
|
||||
"Alt": "odds_ou25_u",
|
||||
},
|
||||
"3,5 Alt/Üst": {
|
||||
"Üst": "odds_ou35_o",
|
||||
"Alt": "odds_ou35_u",
|
||||
},
|
||||
"0,5 Alt/Üst": {
|
||||
"Üst": "odds_ou05_o",
|
||||
"Alt": "odds_ou05_u",
|
||||
},
|
||||
"Karşılıklı Gol": {
|
||||
"Var": "odds_btts_y",
|
||||
"Yok": "odds_btts_n",
|
||||
},
|
||||
"1. Yarı Sonucu": {
|
||||
"1": "odds_ht_ms_h",
|
||||
"X": "odds_ht_ms_d",
|
||||
"2": "odds_ht_ms_a",
|
||||
},
|
||||
"1. Yarı 0,5 Alt/Üst": {
|
||||
"Üst": "odds_ht_ou05_o",
|
||||
"Alt": "odds_ht_ou05_u",
|
||||
},
|
||||
"1. Yarı 1,5 Alt/Üst": {
|
||||
"Üst": "odds_ht_ou15_o",
|
||||
"Alt": "odds_ht_ou15_u",
|
||||
},
|
||||
}
|
||||
|
||||
# Top 5 leagues by name for individual breakdown (will be matched by league_id)
|
||||
TOP5_LEAGUE_NAMES = {
|
||||
"Premier League",
|
||||
"La Liga",
|
||||
"Bundesliga",
|
||||
"Serie A",
|
||||
"Ligue 1",
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# STEP 1 — Load qualified league IDs
|
||||
# ============================================================================
|
||||
|
||||
def load_qualified_leagues() -> List[str]:
|
||||
path = os.path.abspath(QUALIFIED_LEAGUES_PATH)
|
||||
with open(path, "r") as f:
|
||||
leagues = json.load(f)
|
||||
print(f"[Step 1] Loaded {len(leagues)} qualified league IDs.")
|
||||
return [str(lid) for lid in leagues]
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# STEP 1b — Fetch matches + pre-computed features in batch
|
||||
# ============================================================================
|
||||
|
||||
def fetch_matches(conn, league_ids: List[str]) -> pd.DataFrame:
|
||||
"""
|
||||
Single batch query: matches + football_ai_features + league name.
|
||||
Only returns matches that also have odds data (inner join on odd_categories).
|
||||
Returns a DataFrame with one row per match.
|
||||
"""
|
||||
print("[Step 1b] Fetching matches with pre-computed features and odds ...")
|
||||
cur = conn.cursor(cursor_factory=RealDictCursor)
|
||||
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT
|
||||
m.id AS match_id,
|
||||
m.league_id,
|
||||
l.name AS league_name,
|
||||
m.score_home,
|
||||
m.score_away,
|
||||
m.mst_utc,
|
||||
-- From football_ai_features
|
||||
f.home_elo AS home_overall_elo,
|
||||
f.away_elo AS away_overall_elo,
|
||||
f.elo_diff,
|
||||
f.home_home_elo,
|
||||
f.away_away_elo,
|
||||
f.home_form_elo,
|
||||
f.away_form_elo,
|
||||
f.home_goals_avg_5 AS home_goals_avg,
|
||||
f.away_goals_avg_5 AS away_goals_avg,
|
||||
f.home_conceded_avg_5 AS home_conceded_avg,
|
||||
f.away_conceded_avg_5 AS away_conceded_avg,
|
||||
f.home_clean_sheet_rate,
|
||||
f.away_clean_sheet_rate,
|
||||
f.home_scoring_rate,
|
||||
f.away_scoring_rate,
|
||||
f.home_win_streak AS home_winning_streak,
|
||||
f.away_win_streak AS away_winning_streak,
|
||||
f.home_avg_possession,
|
||||
f.away_avg_possession,
|
||||
f.home_avg_shots_on_target,
|
||||
f.away_avg_shots_on_target,
|
||||
f.home_shot_conversion,
|
||||
f.away_shot_conversion,
|
||||
f.home_avg_corners,
|
||||
f.away_avg_corners,
|
||||
f.h2h_total AS h2h_total_matches,
|
||||
f.h2h_home_win_rate,
|
||||
f.h2h_avg_goals,
|
||||
f.h2h_over25_rate,
|
||||
f.h2h_btts_rate,
|
||||
f.league_avg_goals,
|
||||
f.league_home_win_pct AS league_home_win_rate,
|
||||
f.league_over25_pct AS league_ou25_rate,
|
||||
f.referee_avg_cards AS referee_cards_total,
|
||||
f.referee_home_bias,
|
||||
f.referee_avg_goals,
|
||||
f.missing_players_impact AS home_missing_impact,
|
||||
f.implied_home,
|
||||
f.implied_draw,
|
||||
f.implied_away
|
||||
FROM matches m
|
||||
JOIN football_ai_features f ON f.match_id = m.id
|
||||
-- Only matches that have odds data
|
||||
JOIN (SELECT DISTINCT match_id FROM odd_categories WHERE sport = 'football') oc
|
||||
ON oc.match_id = m.id
|
||||
LEFT JOIN leagues l ON l.id = m.league_id
|
||||
WHERE m.status = 'FT'
|
||||
AND m.score_home IS NOT NULL
|
||||
AND m.score_away IS NOT NULL
|
||||
AND m.league_id = ANY(%s)
|
||||
ORDER BY m.mst_utc DESC
|
||||
LIMIT %s
|
||||
""",
|
||||
(league_ids, MAX_MATCHES),
|
||||
)
|
||||
|
||||
rows = cur.fetchall()
|
||||
cur.close()
|
||||
df = pd.DataFrame([dict(r) for r in rows])
|
||||
print(f"[Step 1b] Fetched {len(df)} matches with features + odds coverage.")
|
||||
return df
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# STEP 1c — Fetch all odds for the matched match IDs in one query
|
||||
# ============================================================================
|
||||
|
||||
def fetch_odds_bulk(conn, match_ids: List[str]) -> Dict[str, Dict[str, float]]:
|
||||
"""
|
||||
Returns {match_id: {feature_key: odd_value, ...}} for all known categories.
|
||||
"""
|
||||
print(f"[Step 1c] Fetching odds for {len(match_ids)} matches ...")
|
||||
cur = conn.cursor(cursor_factory=RealDictCursor)
|
||||
|
||||
# Build a set of known category names
|
||||
known_cats = tuple(ODDS_CATEGORY_MAP.keys())
|
||||
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT oc.match_id, oc.name AS cat_name, os.name AS sel_name, os.odd_value
|
||||
FROM odd_categories oc
|
||||
JOIN odd_selections os ON os.odd_category_db_id = oc.db_id
|
||||
WHERE oc.match_id = ANY(%s)
|
||||
AND oc.name = ANY(%s)
|
||||
AND oc.sport = 'football'
|
||||
AND os.odd_value IS NOT NULL
|
||||
AND os.odd_value ~ '^[0-9]+(\.[0-9]+)?$'
|
||||
""",
|
||||
(match_ids, list(known_cats)),
|
||||
)
|
||||
|
||||
rows = cur.fetchall()
|
||||
cur.close()
|
||||
|
||||
# Build nested dict: match_id -> {feature_key -> value}
|
||||
odds_map: Dict[str, Dict[str, float]] = defaultdict(dict)
|
||||
for r in rows:
|
||||
cat_name = r["cat_name"]
|
||||
sel_name = r["sel_name"]
|
||||
if cat_name in ODDS_CATEGORY_MAP and sel_name in ODDS_CATEGORY_MAP[cat_name]:
|
||||
feat_key = ODDS_CATEGORY_MAP[cat_name][sel_name]
|
||||
try:
|
||||
val = float(r["odd_value"])
|
||||
if val > 1.0:
|
||||
# Keep first encountered (most recent or primary bookmaker)
|
||||
if feat_key not in odds_map[r["match_id"]]:
|
||||
odds_map[r["match_id"]][feat_key] = val
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
|
||||
print(f"[Step 1c] Odds loaded for {len(odds_map)} matches.")
|
||||
return dict(odds_map)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# STEP 2 — Build 114-feature vector per match
|
||||
# ============================================================================
|
||||
|
||||
def load_feature_cols() -> List[str]:
|
||||
path = os.path.join(AI_ENGINE_DIR, "models", "v25", "feature_cols.json")
|
||||
with open(path, "r") as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def build_feature_vector(
|
||||
match_row: pd.Series,
|
||||
odds: Dict[str, float],
|
||||
feature_cols: List[str],
|
||||
) -> Dict[str, float]:
|
||||
"""
|
||||
Construct the full feature dict for one match.
|
||||
Falls back to 0.0 for any missing feature.
|
||||
"""
|
||||
feat: Dict[str, float] = {col: 0.0 for col in feature_cols}
|
||||
|
||||
# ---- Direct columns from match row ----
|
||||
direct_map = {
|
||||
"home_overall_elo": "home_overall_elo",
|
||||
"away_overall_elo": "away_overall_elo",
|
||||
"elo_diff": "elo_diff",
|
||||
"home_home_elo": "home_home_elo",
|
||||
"away_away_elo": "away_away_elo",
|
||||
"home_form_elo": "home_form_elo",
|
||||
"away_form_elo": "away_form_elo",
|
||||
"home_goals_avg": "home_goals_avg",
|
||||
"away_goals_avg": "away_goals_avg",
|
||||
"home_conceded_avg": "home_conceded_avg",
|
||||
"away_conceded_avg": "away_conceded_avg",
|
||||
"home_clean_sheet_rate": "home_clean_sheet_rate",
|
||||
"away_clean_sheet_rate": "away_clean_sheet_rate",
|
||||
"home_scoring_rate": "home_scoring_rate",
|
||||
"away_scoring_rate": "away_scoring_rate",
|
||||
"home_winning_streak": "home_winning_streak",
|
||||
"away_winning_streak": "away_winning_streak",
|
||||
"home_avg_possession": "home_avg_possession",
|
||||
"away_avg_possession": "away_avg_possession",
|
||||
"home_avg_shots_on_target": "home_avg_shots_on_target",
|
||||
"away_avg_shots_on_target": "away_avg_shots_on_target",
|
||||
"home_shot_conversion": "home_shot_conversion",
|
||||
"away_shot_conversion": "away_shot_conversion",
|
||||
"home_avg_corners": "home_avg_corners",
|
||||
"away_avg_corners": "away_avg_corners",
|
||||
"h2h_total_matches": "h2h_total_matches",
|
||||
"h2h_home_win_rate": "h2h_home_win_rate",
|
||||
"h2h_avg_goals": "h2h_avg_goals",
|
||||
"h2h_over25_rate": "h2h_over25_rate",
|
||||
"h2h_btts_rate": "h2h_btts_rate",
|
||||
"league_avg_goals": "league_avg_goals",
|
||||
"league_home_win_rate": "league_home_win_rate",
|
||||
"league_ou25_rate": "league_ou25_rate",
|
||||
"referee_cards_total": "referee_cards_total",
|
||||
"referee_home_bias": "referee_home_bias",
|
||||
"referee_avg_goals": "referee_avg_goals",
|
||||
"home_missing_impact": "home_missing_impact",
|
||||
"implied_home": "implied_home",
|
||||
"implied_draw": "implied_draw",
|
||||
"implied_away": "implied_away",
|
||||
}
|
||||
|
||||
for src_col, feat_col in direct_map.items():
|
||||
if feat_col in feat and src_col in match_row.index:
|
||||
val = match_row.get(src_col)
|
||||
if val is not None and not (isinstance(val, float) and np.isnan(val)):
|
||||
feat[feat_col] = float(val)
|
||||
|
||||
# ---- Derived elo features ----
|
||||
if feat.get("home_form_elo", 0) and feat.get("away_form_elo", 0):
|
||||
feat["form_elo_diff"] = feat["home_form_elo"] - feat["away_form_elo"]
|
||||
|
||||
# ---- Odds features from relational tables ----
|
||||
odds_features = [
|
||||
"odds_ms_h", "odds_ms_d", "odds_ms_a",
|
||||
"odds_ht_ms_h", "odds_ht_ms_d", "odds_ht_ms_a",
|
||||
"odds_ou05_o", "odds_ou05_u",
|
||||
"odds_ou15_o", "odds_ou15_u",
|
||||
"odds_ou25_o", "odds_ou25_u",
|
||||
"odds_ou35_o", "odds_ou35_u",
|
||||
"odds_ht_ou05_o", "odds_ht_ou05_u",
|
||||
"odds_ht_ou15_o", "odds_ht_ou15_u",
|
||||
"odds_btts_y", "odds_btts_n",
|
||||
]
|
||||
for ok in odds_features:
|
||||
if ok in odds:
|
||||
feat[ok] = odds[ok]
|
||||
presence_key = f"{ok}_present"
|
||||
if presence_key in feat:
|
||||
feat[presence_key] = 1.0
|
||||
|
||||
# Recompute implied probabilities from odds if available and not already set
|
||||
if feat.get("odds_ms_h", 0) > 1 and feat.get("odds_ms_d", 0) > 1 and feat.get("odds_ms_a", 0) > 1:
|
||||
raw_h = 1.0 / feat["odds_ms_h"]
|
||||
raw_d = 1.0 / feat["odds_ms_d"]
|
||||
raw_a = 1.0 / feat["odds_ms_a"]
|
||||
total = raw_h + raw_d + raw_a
|
||||
if total > 0:
|
||||
feat["implied_home"] = raw_h / total
|
||||
feat["implied_draw"] = raw_d / total
|
||||
feat["implied_away"] = raw_a / total
|
||||
|
||||
# ---- Derived match metadata ----
|
||||
mst = match_row.get("mst_utc")
|
||||
if mst is not None:
|
||||
try:
|
||||
ts_s = int(mst) / 1000 # stored as epoch ms
|
||||
dt = datetime.utcfromtimestamp(ts_s)
|
||||
if "match_month" in feat:
|
||||
feat["match_month"] = float(dt.month)
|
||||
# Season markers: Sept-Oct = start, April-May = end
|
||||
if "is_season_start" in feat:
|
||||
feat["is_season_start"] = 1.0 if dt.month in (8, 9, 10) else 0.0
|
||||
if "is_season_end" in feat:
|
||||
feat["is_season_end"] = 1.0 if dt.month in (4, 5) else 0.0
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# ---- Interaction features ----
|
||||
if "attack_vs_defense_home" in feat:
|
||||
feat["attack_vs_defense_home"] = feat.get("home_goals_avg", 0) - feat.get("away_conceded_avg", 0)
|
||||
if "attack_vs_defense_away" in feat:
|
||||
feat["attack_vs_defense_away"] = feat.get("away_goals_avg", 0) - feat.get("home_conceded_avg", 0)
|
||||
if "form_momentum_interaction" in feat:
|
||||
feat["form_momentum_interaction"] = (
|
||||
feat.get("home_momentum_score", 0) * feat.get("home_goals_avg", 0)
|
||||
- feat.get("away_momentum_score", 0) * feat.get("away_goals_avg", 0)
|
||||
)
|
||||
if "elo_form_consistency" in feat:
|
||||
feat["elo_form_consistency"] = feat.get("elo_diff", 0) * feat.get("home_goals_avg", 0)
|
||||
|
||||
return feat
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# STEP 3 — Run V25 predictions
|
||||
# ============================================================================
|
||||
|
||||
def load_predictor():
|
||||
from models.v25_ensemble import get_v25_predictor
|
||||
print("[Step 3] Loading V25 predictor ...")
|
||||
pred = get_v25_predictor()
|
||||
print("[Step 3] V25 predictor ready.")
|
||||
return pred
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# STEP 4 — Compute actual outcomes from scores
|
||||
# ============================================================================
|
||||
|
||||
def compute_actuals(score_home: int, score_away: int) -> Dict[str, Any]:
|
||||
total = score_home + score_away
|
||||
return {
|
||||
"ms_actual": "1" if score_home > score_away else ("X" if score_home == score_away else "2"),
|
||||
"ou15_actual": "Over" if total >= 2 else "Under",
|
||||
"ou25_actual": "Over" if total >= 3 else "Under",
|
||||
"btts_actual": "Yes" if score_home > 0 and score_away > 0 else "No",
|
||||
}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# STEP 5 — Accuracy helpers
|
||||
# ============================================================================
|
||||
|
||||
def confidence_band(prob: float) -> str:
|
||||
if prob < 0.50:
|
||||
return "<50%"
|
||||
elif prob < 0.65:
|
||||
return "50-65%"
|
||||
elif prob < 0.75:
|
||||
return "65-75%"
|
||||
else:
|
||||
return "75%+"
|
||||
|
||||
|
||||
def pick_from_ms(home_prob: float, draw_prob: float, away_prob: float) -> Tuple[str, float]:
|
||||
picks = {"1": home_prob, "X": draw_prob, "2": away_prob}
|
||||
best = max(picks, key=picks.__getitem__)
|
||||
return best, picks[best]
|
||||
|
||||
|
||||
def pick_from_binary(yes_prob: float, no_prob: float, yes_label: str, no_label: str) -> Tuple[str, float]:
|
||||
if yes_prob >= no_prob:
|
||||
return yes_label, yes_prob
|
||||
return no_label, no_prob
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# MAIN
|
||||
# ============================================================================
|
||||
|
||||
def main():
|
||||
t_start = time.time()
|
||||
print("=" * 70)
|
||||
print(" V25 Backtest + Calibration Training")
|
||||
print(f" Run at: {datetime.utcnow().isoformat()} UTC")
|
||||
print("=" * 70)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step 1 — Load qualified leagues
|
||||
# ------------------------------------------------------------------
|
||||
league_ids = load_qualified_leagues()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step 1b — Fetch matches with features
|
||||
# ------------------------------------------------------------------
|
||||
conn = psycopg2.connect(get_clean_dsn())
|
||||
try:
|
||||
matches_df = fetch_matches(conn, league_ids)
|
||||
|
||||
if matches_df.empty:
|
||||
print("[ERROR] No matches found. Check DB connection and league IDs.")
|
||||
return
|
||||
|
||||
match_ids = matches_df["match_id"].tolist()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step 1c — Fetch odds in bulk
|
||||
# ------------------------------------------------------------------
|
||||
odds_map = fetch_odds_bulk(conn, match_ids)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step 2 — Build feature vectors
|
||||
# ------------------------------------------------------------------
|
||||
print(f"\n[Step 2] Building feature vectors for {len(matches_df)} matches ...")
|
||||
feature_cols = load_feature_cols()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step 3 — Load V25 predictor
|
||||
# ------------------------------------------------------------------
|
||||
predictor = load_predictor()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Main loop — predict each match, collect results
|
||||
# ------------------------------------------------------------------
|
||||
print(f"\n[Loop] Running predictions ...")
|
||||
|
||||
# Storage for calibration training
|
||||
calib_data: Dict[str, List[Tuple[float, int]]] = {
|
||||
"ms_home": [], # (prob, 1 if home win)
|
||||
"ms_draw": [],
|
||||
"ms_away": [],
|
||||
"ou15": [],
|
||||
"ou25": [],
|
||||
"btts": [],
|
||||
}
|
||||
|
||||
# Storage for accuracy reporting
|
||||
records = []
|
||||
|
||||
skipped = 0
|
||||
processed = 0
|
||||
|
||||
for idx, row in matches_df.iterrows():
|
||||
match_id = row["match_id"]
|
||||
score_home = row.get("score_home")
|
||||
score_away = row.get("score_away")
|
||||
|
||||
# Validate scores
|
||||
try:
|
||||
score_home = int(score_home)
|
||||
score_away = int(score_away)
|
||||
except (TypeError, ValueError):
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
# Build features
|
||||
match_odds = odds_map.get(match_id, {})
|
||||
feat = build_feature_vector(row, match_odds, feature_cols)
|
||||
|
||||
# Run predictions
|
||||
try:
|
||||
home_prob, draw_prob, away_prob = predictor.predict_ms(feat)
|
||||
over25_prob, under25_prob = predictor.predict_ou25(feat)
|
||||
btts_yes_prob, btts_no_prob = predictor.predict_btts(feat)
|
||||
|
||||
# ou15 is loaded via predict_market (returns np.ndarray for binary)
|
||||
ou15_arr = predictor.predict_market("ou15", feat)
|
||||
if ou15_arr is not None and len(ou15_arr) > 0:
|
||||
over15_prob = float(ou15_arr[0])
|
||||
under15_prob = 1.0 - over15_prob
|
||||
else:
|
||||
over15_prob = 0.5
|
||||
under15_prob = 0.5
|
||||
|
||||
except Exception as e:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
# Compute actuals
|
||||
actuals = compute_actuals(score_home, score_away)
|
||||
|
||||
# MS picks
|
||||
ms_pick, ms_conf = pick_from_ms(home_prob, draw_prob, away_prob)
|
||||
ms_correct = int(ms_pick == actuals["ms_actual"])
|
||||
|
||||
# OU15
|
||||
ou15_pick, ou15_conf = pick_from_binary(over15_prob, under15_prob, "Over", "Under")
|
||||
ou15_correct = int(ou15_pick == actuals["ou15_actual"])
|
||||
|
||||
# OU25
|
||||
ou25_pick, ou25_conf = pick_from_binary(over25_prob, under25_prob, "Over", "Under")
|
||||
ou25_correct = int(ou25_pick == actuals["ou25_actual"])
|
||||
|
||||
# BTTS
|
||||
btts_pick, btts_conf = pick_from_binary(btts_yes_prob, btts_no_prob, "Yes", "No")
|
||||
btts_correct = int(btts_pick == actuals["btts_actual"])
|
||||
|
||||
# Collect calibration data
|
||||
calib_data["ms_home"].append((home_prob, int(actuals["ms_actual"] == "1")))
|
||||
calib_data["ms_draw"].append((draw_prob, int(actuals["ms_actual"] == "X")))
|
||||
calib_data["ms_away"].append((away_prob, int(actuals["ms_actual"] == "2")))
|
||||
calib_data["ou15"].append((over15_prob, int(actuals["ou15_actual"] == "Over")))
|
||||
calib_data["ou25"].append((over25_prob, int(actuals["ou25_actual"] == "Over")))
|
||||
calib_data["btts"].append((btts_yes_prob, int(actuals["btts_actual"] == "Yes")))
|
||||
|
||||
# Determine league group
|
||||
league_name = str(row.get("league_name", "Other") or "Other")
|
||||
league_group = league_name if league_name in TOP5_LEAGUE_NAMES else "Other"
|
||||
|
||||
records.append({
|
||||
"match_id": match_id,
|
||||
"league_name": league_name,
|
||||
"league_group": league_group,
|
||||
"score_home": score_home,
|
||||
"score_away": score_away,
|
||||
# MS
|
||||
"ms_pick": ms_pick,
|
||||
"ms_actual": actuals["ms_actual"],
|
||||
"ms_conf": ms_conf,
|
||||
"ms_conf_band": confidence_band(ms_conf),
|
||||
"ms_correct": ms_correct,
|
||||
"ms_home_prob": home_prob,
|
||||
"ms_draw_prob": draw_prob,
|
||||
"ms_away_prob": away_prob,
|
||||
# OU15
|
||||
"ou15_pick": ou15_pick,
|
||||
"ou15_actual": actuals["ou15_actual"],
|
||||
"ou15_conf": ou15_conf,
|
||||
"ou15_conf_band": confidence_band(ou15_conf),
|
||||
"ou15_correct": ou15_correct,
|
||||
"ou15_over_prob": over15_prob,
|
||||
# OU25
|
||||
"ou25_pick": ou25_pick,
|
||||
"ou25_actual": actuals["ou25_actual"],
|
||||
"ou25_conf": ou25_conf,
|
||||
"ou25_conf_band": confidence_band(ou25_conf),
|
||||
"ou25_correct": ou25_correct,
|
||||
"ou25_over_prob": over25_prob,
|
||||
# BTTS
|
||||
"btts_pick": btts_pick,
|
||||
"btts_actual": actuals["btts_actual"],
|
||||
"btts_conf": btts_conf,
|
||||
"btts_conf_band": confidence_band(btts_conf),
|
||||
"btts_correct": btts_correct,
|
||||
"btts_yes_prob": btts_yes_prob,
|
||||
})
|
||||
|
||||
processed += 1
|
||||
if processed % PROGRESS_INTERVAL == 0:
|
||||
elapsed = time.time() - t_start
|
||||
print(f" [Progress] {processed}/{len(matches_df)} matches | "
|
||||
f"skipped={skipped} | elapsed={elapsed:.1f}s")
|
||||
|
||||
print(f"\n[Loop] Done. Processed={processed}, Skipped={skipped}")
|
||||
|
||||
if not records:
|
||||
print("[ERROR] No records to analyze. Exiting.")
|
||||
return
|
||||
|
||||
results_df = pd.DataFrame(records)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step 5 — Accuracy report
|
||||
# ------------------------------------------------------------------
|
||||
print("\n" + "=" * 70)
|
||||
print(" ACCURACY REPORT")
|
||||
print("=" * 70)
|
||||
|
||||
markets = [
|
||||
("MS", "ms_correct", "ms_conf", "ms_conf_band", "ms_pick"),
|
||||
("OU15", "ou15_correct", "ou15_conf", "ou15_conf_band", "ou15_pick"),
|
||||
("OU25", "ou25_correct", "ou25_conf", "ou25_conf_band", "ou25_pick"),
|
||||
("BTTS", "btts_correct", "btts_conf", "btts_conf_band", "btts_pick"),
|
||||
]
|
||||
|
||||
summary: Dict[str, Any] = {
|
||||
"generated_at": datetime.utcnow().isoformat(),
|
||||
"matches_processed": processed,
|
||||
"matches_skipped": skipped,
|
||||
"markets": {},
|
||||
}
|
||||
|
||||
for market_label, correct_col, conf_col, band_col, pick_col in markets:
|
||||
print(f"\n--- {market_label} ---")
|
||||
sub = results_df[[correct_col, conf_col, band_col, pick_col, "league_group"]].copy()
|
||||
total = len(sub)
|
||||
overall_acc = sub[correct_col].mean() * 100
|
||||
print(f" Overall accuracy: {overall_acc:.1f}% ({sub[correct_col].sum()}/{total})")
|
||||
|
||||
market_summary = {
|
||||
"overall_accuracy": round(overall_acc, 2),
|
||||
"total_matches": total,
|
||||
"by_confidence_band": {},
|
||||
"by_league": {},
|
||||
"by_pick_direction": {},
|
||||
}
|
||||
|
||||
# By confidence band
|
||||
print(f" By confidence band:")
|
||||
bands = ["<50%", "50-65%", "65-75%", "75%+"]
|
||||
for band in bands:
|
||||
mask = sub[band_col] == band
|
||||
n = mask.sum()
|
||||
if n > 0:
|
||||
acc = sub.loc[mask, correct_col].mean() * 100
|
||||
mean_conf = sub.loc[mask, conf_col].mean() * 100
|
||||
print(f" {band:8s}: {acc:5.1f}% acc | {n:4d} matches | "
|
||||
f"mean_conf={mean_conf:.1f}%")
|
||||
market_summary["by_confidence_band"][band] = {
|
||||
"accuracy": round(acc, 2),
|
||||
"count": int(n),
|
||||
"mean_confidence": round(mean_conf, 2),
|
||||
}
|
||||
|
||||
# By league group
|
||||
print(f" By league:")
|
||||
league_groups = list(results_df["league_group"].unique())
|
||||
# Sort: named leagues first, then Other
|
||||
named = sorted([g for g in league_groups if g != "Other"])
|
||||
ordered = named + (["Other"] if "Other" in league_groups else [])
|
||||
for lg in ordered:
|
||||
mask = sub["league_group"] == lg
|
||||
n = mask.sum()
|
||||
if n > 0:
|
||||
acc = sub.loc[mask, correct_col].mean() * 100
|
||||
print(f" {lg[:20]:20s}: {acc:5.1f}% ({n} matches)")
|
||||
market_summary["by_league"][lg] = {
|
||||
"accuracy": round(acc, 2),
|
||||
"count": int(n),
|
||||
}
|
||||
|
||||
# By pick direction
|
||||
print(f" By pick direction:")
|
||||
for pick_val in sorted(sub[pick_col].unique()):
|
||||
mask = sub[pick_col] == pick_val
|
||||
n = mask.sum()
|
||||
if n > 0:
|
||||
acc = sub.loc[mask, correct_col].mean() * 100
|
||||
mean_conf = sub.loc[mask, conf_col].mean() * 100
|
||||
print(f" {pick_val:8s}: {acc:5.1f}% acc | {n:4d} matches | "
|
||||
f"mean_conf={mean_conf:.1f}%")
|
||||
market_summary["by_pick_direction"][pick_val] = {
|
||||
"accuracy": round(acc, 2),
|
||||
"count": int(n),
|
||||
"mean_confidence": round(mean_conf, 2),
|
||||
}
|
||||
|
||||
summary["markets"][market_label] = market_summary
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step 6 — Train calibration models
|
||||
# ------------------------------------------------------------------
|
||||
print("\n" + "=" * 70)
|
||||
print(" CALIBRATION TRAINING")
|
||||
print("=" * 70)
|
||||
|
||||
calibrator = Calibrator()
|
||||
|
||||
# Market config: market_key -> (label for prob, label for actual binary)
|
||||
calib_market_map = {
|
||||
"ms_home": "ms_home",
|
||||
"ms_draw": "ms_draw",
|
||||
"ms_away": "ms_away",
|
||||
"ou15": "ou15",
|
||||
"ou25": "ou25",
|
||||
"btts": "btts",
|
||||
}
|
||||
|
||||
calibration_results: Dict[str, Dict] = {}
|
||||
|
||||
for market_key in calib_market_map:
|
||||
pairs = calib_data[market_key]
|
||||
if len(pairs) < 100:
|
||||
print(f"[Calib] {market_key}: only {len(pairs)} samples — skipping.")
|
||||
continue
|
||||
|
||||
probs = np.array([p for p, _ in pairs])
|
||||
actuals_bin = np.array([a for _, a in pairs])
|
||||
|
||||
# Build a tiny DataFrame to use Calibrator.train_calibration
|
||||
calib_df = pd.DataFrame({
|
||||
"prob": probs,
|
||||
"actual": actuals_bin,
|
||||
})
|
||||
|
||||
metrics = calibrator.train_calibration(
|
||||
df=calib_df,
|
||||
market=market_key,
|
||||
prob_col="prob",
|
||||
actual_col="actual",
|
||||
min_samples=100,
|
||||
save=True,
|
||||
)
|
||||
calibration_results[market_key] = metrics.to_dict()
|
||||
print(f" [Calib] {market_key}: Brier={metrics.brier_score:.4f} | "
|
||||
f"ECE={metrics.calibration_error:.4f} | n={metrics.sample_count}")
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step 7 — Save results
|
||||
# ------------------------------------------------------------------
|
||||
output_path = os.path.join(REPORTS_DIR, "backtest_results.json")
|
||||
full_report = {
|
||||
**summary,
|
||||
"calibration": calibration_results,
|
||||
"runtime_seconds": round(time.time() - t_start, 1),
|
||||
}
|
||||
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(full_report, f, indent=2)
|
||||
print(f"\n[Step 7] Report saved to {output_path}")
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Final summary table
|
||||
# ------------------------------------------------------------------
|
||||
print("\n" + "=" * 70)
|
||||
print(" FINAL SUMMARY TABLE")
|
||||
print("=" * 70)
|
||||
print(f"{'Market':<8} {'Overall Acc':>12} {'Matches':>8} "
|
||||
f"{'Best Band (acc)':>18}")
|
||||
print("-" * 70)
|
||||
for market_label, _, _, _, _ in markets:
|
||||
ms = summary["markets"].get(market_label, {})
|
||||
overall = ms.get("overall_accuracy", 0)
|
||||
total_m = ms.get("total_matches", 0)
|
||||
bands_d = ms.get("by_confidence_band", {})
|
||||
# Find best accuracy band with >= 50 matches
|
||||
best_band = "-"
|
||||
best_acc = 0.0
|
||||
for band, bdata in bands_d.items():
|
||||
if bdata["count"] >= 50 and bdata["accuracy"] > best_acc:
|
||||
best_acc = bdata["accuracy"]
|
||||
best_band = f"{band} ({best_acc:.1f}%)"
|
||||
print(f"{market_label:<8} {overall:>11.1f}% {total_m:>8d} {best_band:>18s}")
|
||||
|
||||
elapsed_total = time.time() - t_start
|
||||
print(f"\nTotal runtime: {elapsed_total:.1f}s")
|
||||
print("=" * 70)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,459 @@
|
||||
"""
|
||||
League-Specific Model Trainer
|
||||
==============================
|
||||
Trains dedicated XGBoost models + isotonic calibration for each qualified league.
|
||||
|
||||
Tiers:
|
||||
- >=500 FT matches → full XGBoost (12 markets) + calibration
|
||||
- 100-499 matches → isotonic calibration only (over general V25 predictions)
|
||||
- <100 matches → skipped
|
||||
|
||||
Usage:
|
||||
python scripts/train_league_models.py
|
||||
python scripts/train_league_models.py --min-samples 300 # stricter threshold
|
||||
python scripts/train_league_models.py --colab # Colab-friendly output
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import pickle
|
||||
import argparse
|
||||
import time
|
||||
import warnings
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import xgboost as xgb
|
||||
from sklearn.isotonic import IsotonicRegression
|
||||
from sklearn.metrics import accuracy_score, log_loss
|
||||
|
||||
warnings.filterwarnings("ignore")
|
||||
optuna_available = False
|
||||
try:
|
||||
import optuna
|
||||
optuna.logging.set_verbosity(optuna.logging.WARNING)
|
||||
optuna_available = True
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
DATA_PATH = os.path.join(AI_ENGINE_DIR, "data", "training_data.csv")
|
||||
MODELS_DIR = os.path.join(AI_ENGINE_DIR, "models", "league_specific")
|
||||
REPORTS_DIR = os.path.join(AI_ENGINE_DIR, "reports", "league_models")
|
||||
QUALIFIED_LEAGUES_PATH = os.path.join(os.path.dirname(AI_ENGINE_DIR), "qualified_leagues.json")
|
||||
|
||||
os.makedirs(MODELS_DIR, exist_ok=True)
|
||||
os.makedirs(REPORTS_DIR, exist_ok=True)
|
||||
|
||||
# ─── Markets ────────────────────────────────────────────────────────
|
||||
MARKETS = {
|
||||
"MS": {"label": "label_ms", "num_class": 3, "min_samples": 200},
|
||||
"OU15": {"label": "label_ou15", "num_class": 2, "min_samples": 150},
|
||||
"OU25": {"label": "label_ou25", "num_class": 2, "min_samples": 150},
|
||||
"OU35": {"label": "label_ou35", "num_class": 2, "min_samples": 150},
|
||||
"BTTS": {"label": "label_btts", "num_class": 2, "min_samples": 150},
|
||||
"HT": {"label": "label_ht_result", "num_class": 3, "min_samples": 150},
|
||||
"HT_OU05": {"label": "label_ht_ou05", "num_class": 2, "min_samples": 150},
|
||||
"HT_OU15": {"label": "label_ht_ou15", "num_class": 2, "min_samples": 150},
|
||||
"HTFT": {"label": "label_ht_ft", "num_class": 9, "min_samples": 300},
|
||||
"OE": {"label": "label_odd_even", "num_class": 2, "min_samples": 150},
|
||||
"CARDS": {"label": "label_cards_ou45", "num_class": 2, "min_samples": 150},
|
||||
"HANDICAP": {"label": "label_handicap_ms", "num_class": 3, "min_samples": 200},
|
||||
}
|
||||
|
||||
# Feature columns (from training_data.csv, excluding metadata + labels)
|
||||
SKIP_COLS = {
|
||||
"match_id", "home_team_id", "away_team_id", "league_id", "mst_utc",
|
||||
"score_home", "score_away", "total_goals", "ht_score_home", "ht_score_away",
|
||||
"ht_total_goals",
|
||||
"label_ms", "label_ou05", "label_ou15", "label_ou25", "label_ou35",
|
||||
"label_btts", "label_ht_result", "label_ht_ou05", "label_ht_ou15",
|
||||
"label_ht_ft", "label_odd_even", "label_yellow_cards", "label_cards_ou45",
|
||||
"label_handicap_ms",
|
||||
}
|
||||
|
||||
# XGBoost defaults — fast, no Optuna
|
||||
XGB_PARAMS_BINARY = {
|
||||
"objective": "binary:logistic",
|
||||
"eval_metric": "logloss",
|
||||
"max_depth": 4,
|
||||
"eta": 0.05,
|
||||
"subsample": 0.8,
|
||||
"colsample_bytree": 0.8,
|
||||
"min_child_weight": 5,
|
||||
"gamma": 0.1,
|
||||
"reg_lambda": 1.0,
|
||||
"verbosity": 0,
|
||||
"seed": 42,
|
||||
"nthread": -1,
|
||||
}
|
||||
|
||||
XGB_PARAMS_MULTI = {
|
||||
**XGB_PARAMS_BINARY,
|
||||
"objective": "multi:softprob",
|
||||
"eval_metric": "mlogloss",
|
||||
}
|
||||
|
||||
|
||||
def load_data() -> pd.DataFrame:
|
||||
print(f"Loading training data from {DATA_PATH} ...")
|
||||
df = pd.read_csv(DATA_PATH, low_memory=False)
|
||||
print(f" {len(df):,} rows, {len(df.columns)} columns")
|
||||
return df
|
||||
|
||||
|
||||
def get_feature_cols(df: pd.DataFrame) -> list:
|
||||
return [c for c in df.columns if c not in SKIP_COLS]
|
||||
|
||||
|
||||
def load_qualified_leagues() -> list:
|
||||
if os.path.exists(QUALIFIED_LEAGUES_PATH):
|
||||
with open(QUALIFIED_LEAGUES_PATH) as f:
|
||||
return json.load(f)
|
||||
# fallback: all leagues in CSV
|
||||
return []
|
||||
|
||||
|
||||
def train_xgb_market(
|
||||
X_train: np.ndarray,
|
||||
y_train: np.ndarray,
|
||||
X_test: np.ndarray,
|
||||
y_test: np.ndarray,
|
||||
num_class: int,
|
||||
feature_cols: list,
|
||||
) -> tuple:
|
||||
"""Train XGBoost for one market. Returns (model, accuracy, logloss)."""
|
||||
params = dict(XGB_PARAMS_MULTI if num_class > 2 else XGB_PARAMS_BINARY)
|
||||
if num_class > 2:
|
||||
params["num_class"] = num_class
|
||||
|
||||
dtrain = xgb.DMatrix(X_train, label=y_train, feature_names=feature_cols)
|
||||
dtest = xgb.DMatrix(X_test, label=y_test, feature_names=feature_cols)
|
||||
|
||||
model = xgb.train(
|
||||
params,
|
||||
dtrain,
|
||||
num_boost_round=300,
|
||||
evals=[(dtest, "val")],
|
||||
early_stopping_rounds=30,
|
||||
verbose_eval=False,
|
||||
)
|
||||
|
||||
raw = model.predict(dtest)
|
||||
if num_class > 2:
|
||||
probs = raw.reshape(-1, num_class)
|
||||
preds = np.argmax(probs, axis=1)
|
||||
ll = log_loss(y_test, probs)
|
||||
else:
|
||||
preds = (raw >= 0.5).astype(int)
|
||||
ll = log_loss(y_test, raw)
|
||||
|
||||
acc = accuracy_score(y_test, preds)
|
||||
return model, acc, ll
|
||||
|
||||
|
||||
def train_isotonic(raw_probs: np.ndarray, y_true: np.ndarray) -> IsotonicRegression:
|
||||
iso = IsotonicRegression(out_of_bounds="clip")
|
||||
iso.fit(raw_probs, y_true)
|
||||
return iso
|
||||
|
||||
|
||||
def get_general_v25_probs(df_league: pd.DataFrame, feature_cols: list, market: str, num_class: int):
|
||||
"""Use general V25 model to get predictions on this league's matches (for cal-only leagues)."""
|
||||
try:
|
||||
from models.v25_ensemble import get_v25_predictor
|
||||
v25 = get_v25_predictor()
|
||||
if not v25._loaded:
|
||||
v25.load_models()
|
||||
|
||||
label_col = MARKETS[market]["label"]
|
||||
valid = df_league[feature_cols + [label_col]].dropna()
|
||||
if len(valid) < 50:
|
||||
return None, None
|
||||
|
||||
market_key_map = {
|
||||
"MS": "ms", "OU15": "ou15", "OU25": "ou25", "OU35": "ou35",
|
||||
"BTTS": "btts", "HT": "ht_result", "HT_OU05": "ht_ou05",
|
||||
"HT_OU15": "ht_ou15", "HTFT": "htft", "OE": "odd_even",
|
||||
"CARDS": "cards_ou45", "HANDICAP": "handicap_ms",
|
||||
}
|
||||
mkey = market_key_map.get(market)
|
||||
if not mkey or not v25.has_market(mkey):
|
||||
return None, None
|
||||
|
||||
X = valid[feature_cols].fillna(0).values
|
||||
y = valid[label_col].values
|
||||
|
||||
all_probs = []
|
||||
for i in range(0, len(X), 500):
|
||||
batch = X[i:i+500]
|
||||
feat_dict = {col: float(batch[j, k]) for j, row in enumerate(batch) for k, col in enumerate(feature_cols)}
|
||||
# batch predict
|
||||
df_batch = pd.DataFrame(batch, columns=feature_cols)
|
||||
dmat = xgb.DMatrix(df_batch)
|
||||
models = v25.models.get(mkey, {})
|
||||
batch_probs = []
|
||||
if "xgb" in models:
|
||||
p = models["xgb"].predict(dmat)
|
||||
if num_class > 2:
|
||||
p = p.reshape(-1, num_class)
|
||||
batch_probs.append(p)
|
||||
if batch_probs:
|
||||
all_probs.append(np.mean(batch_probs, axis=0))
|
||||
|
||||
if not all_probs:
|
||||
return None, None
|
||||
|
||||
probs = np.vstack(all_probs) if num_class > 2 else np.concatenate(all_probs)
|
||||
return probs, y
|
||||
except Exception as e:
|
||||
return None, None
|
||||
|
||||
|
||||
def process_league(
|
||||
league_id: str,
|
||||
df_league: pd.DataFrame,
|
||||
feature_cols: list,
|
||||
full_model: bool,
|
||||
league_name: str,
|
||||
) -> dict:
|
||||
"""Train models for one league. Returns metrics dict."""
|
||||
n = len(df_league)
|
||||
out_dir = os.path.join(MODELS_DIR, league_id)
|
||||
os.makedirs(out_dir, exist_ok=True)
|
||||
|
||||
metrics = {"league_id": league_id, "league_name": league_name, "n_matches": n, "markets": {}}
|
||||
|
||||
# Time-based split: last 20% as test
|
||||
split_idx = int(n * 0.80)
|
||||
df_sorted = df_league.sort_values("mst_utc")
|
||||
df_train = df_sorted.iloc[:split_idx]
|
||||
df_test = df_sorted.iloc[split_idx:]
|
||||
|
||||
saved_feature_cols = False
|
||||
|
||||
for market, cfg in MARKETS.items():
|
||||
label_col = cfg["label"]
|
||||
num_class = cfg["num_class"]
|
||||
min_samp = cfg["min_samples"]
|
||||
|
||||
if label_col not in df_league.columns:
|
||||
continue
|
||||
|
||||
valid_train = df_train[feature_cols + [label_col]].dropna()
|
||||
valid_test = df_test[feature_cols + [label_col]].dropna()
|
||||
|
||||
if len(valid_train) < min_samp or len(valid_test) < 30:
|
||||
continue
|
||||
|
||||
X_train = valid_train[feature_cols].fillna(0).values
|
||||
y_train = valid_train[label_col].values.astype(int)
|
||||
X_test = valid_test[feature_cols].fillna(0).values
|
||||
y_test = valid_test[label_col].values.astype(int)
|
||||
|
||||
mkt_metrics = {"n_train": len(X_train), "n_test": len(X_test)}
|
||||
|
||||
if full_model:
|
||||
try:
|
||||
model, acc, ll = train_xgb_market(X_train, y_train, X_test, y_test, num_class, feature_cols)
|
||||
model_path = os.path.join(out_dir, f"xgb_{market.lower()}.json")
|
||||
model.save_model(model_path)
|
||||
mkt_metrics.update({"accuracy": round(acc, 4), "logloss": round(ll, 4), "model": "xgb"})
|
||||
|
||||
if not saved_feature_cols:
|
||||
with open(os.path.join(out_dir, "feature_cols.json"), "w") as f:
|
||||
json.dump(feature_cols, f)
|
||||
saved_feature_cols = True
|
||||
|
||||
# Isotonic calibration from own model predictions
|
||||
dtest_xgb = xgb.DMatrix(X_test, feature_names=feature_cols)
|
||||
raw = model.predict(dtest_xgb)
|
||||
if num_class > 2:
|
||||
raw = raw.reshape(-1, num_class)
|
||||
for cls_idx in range(num_class):
|
||||
iso = train_isotonic(raw[:, cls_idx], (y_test == cls_idx).astype(int))
|
||||
with open(os.path.join(out_dir, f"cal_{market.lower()}_{cls_idx}.pkl"), "wb") as f:
|
||||
pickle.dump(iso, f)
|
||||
else:
|
||||
iso = train_isotonic(raw, y_test)
|
||||
with open(os.path.join(out_dir, f"cal_{market.lower()}.pkl"), "wb") as f:
|
||||
pickle.dump(iso, f)
|
||||
|
||||
except Exception as e:
|
||||
mkt_metrics["error"] = str(e)
|
||||
else:
|
||||
# Calibration only: use general V25 model
|
||||
try:
|
||||
all_valid = df_league[feature_cols + [label_col]].dropna()
|
||||
if len(all_valid) < min_samp:
|
||||
continue
|
||||
|
||||
X_all = all_valid[feature_cols].fillna(0).values
|
||||
y_all = all_valid[label_col].values.astype(int)
|
||||
|
||||
# Use V25 general model
|
||||
from models.v25_ensemble import get_v25_predictor
|
||||
v25 = get_v25_predictor()
|
||||
if not v25._loaded:
|
||||
v25.load_models()
|
||||
|
||||
market_key_map = {
|
||||
"MS": "ms", "OU15": "ou15", "OU25": "ou25", "OU35": "ou35",
|
||||
"BTTS": "btts", "HT": "ht_result", "HT_OU05": "ht_ou05",
|
||||
"HT_OU15": "ht_ou15", "HTFT": "htft", "OE": "odd_even",
|
||||
"CARDS": "cards_ou45", "HANDICAP": "handicap_ms",
|
||||
}
|
||||
mkey = market_key_map.get(market)
|
||||
if not mkey or not v25.has_market(mkey):
|
||||
continue
|
||||
|
||||
df_feat = pd.DataFrame(X_all, columns=feature_cols)
|
||||
dmat = xgb.DMatrix(df_feat)
|
||||
models_v25 = v25.models.get(mkey, {})
|
||||
if "xgb" not in models_v25:
|
||||
continue
|
||||
raw = models_v25["xgb"].predict(dmat)
|
||||
|
||||
if num_class > 2:
|
||||
raw = raw.reshape(-1, num_class)
|
||||
for cls_idx in range(num_class):
|
||||
iso = train_isotonic(raw[:, cls_idx], (y_all == cls_idx).astype(int))
|
||||
with open(os.path.join(out_dir, f"cal_{market.lower()}_{cls_idx}.pkl"), "wb") as f:
|
||||
pickle.dump(iso, f)
|
||||
else:
|
||||
iso = train_isotonic(raw, y_all)
|
||||
with open(os.path.join(out_dir, f"cal_{market.lower()}.pkl"), "wb") as f:
|
||||
pickle.dump(iso, f)
|
||||
|
||||
mkt_metrics.update({"n_train": len(X_all), "model": "cal_only"})
|
||||
except Exception as e:
|
||||
mkt_metrics["error"] = str(e)
|
||||
|
||||
metrics["markets"][market] = mkt_metrics
|
||||
|
||||
# Save metrics
|
||||
with open(os.path.join(out_dir, "metrics.json"), "w") as f:
|
||||
json.dump(metrics, f, indent=2)
|
||||
|
||||
return metrics
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--min-samples", type=int, default=500, help="Min matches for full model")
|
||||
parser.add_argument("--cal-min", type=int, default=100, help="Min matches for calibration")
|
||||
parser.add_argument("--colab", action="store_true", help="Colab-friendly verbose output")
|
||||
args = parser.parse_args()
|
||||
|
||||
start_total = time.time()
|
||||
|
||||
df = load_data()
|
||||
feature_cols = get_feature_cols(df)
|
||||
print(f"Feature columns: {len(feature_cols)}")
|
||||
|
||||
qualified = load_qualified_leagues()
|
||||
if not qualified:
|
||||
qualified = df["league_id"].unique().tolist()
|
||||
print(f"Qualified leagues: {len(qualified)}")
|
||||
|
||||
# Get league names
|
||||
league_names = {}
|
||||
try:
|
||||
import psycopg2
|
||||
from data.db import get_clean_dsn
|
||||
conn = psycopg2.connect(get_clean_dsn())
|
||||
cur = conn.cursor()
|
||||
cur.execute("SELECT id, name FROM leagues WHERE id = ANY(%s)", (qualified,))
|
||||
league_names = {r[0]: r[1] for r in cur.fetchall()}
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Filter to qualified leagues with enough data
|
||||
counts = df[df["league_id"].isin(qualified)].groupby("league_id").size()
|
||||
full_model_ids = counts[counts >= args.min_samples].index.tolist()
|
||||
cal_only_ids = counts[(counts >= args.cal_min) & (counts < args.min_samples)].index.tolist()
|
||||
|
||||
print(f"\nTam model ({args.min_samples}+ maç): {len(full_model_ids)} lig")
|
||||
print(f"Kalibrasyon ({args.cal_min}-{args.min_samples-1} maç): {len(cal_only_ids)} lig")
|
||||
print(f"Atlandı (<{args.cal_min} maç): {len([l for l in qualified if l not in full_model_ids and l not in cal_only_ids])} lig")
|
||||
print()
|
||||
|
||||
all_results = []
|
||||
total = len(full_model_ids) + len(cal_only_ids)
|
||||
done = 0
|
||||
|
||||
for league_id, full_model in (
|
||||
[(lid, True) for lid in full_model_ids] +
|
||||
[(lid, False) for lid in cal_only_ids]
|
||||
):
|
||||
t0 = time.time()
|
||||
df_league = df[df["league_id"] == league_id].copy()
|
||||
n = len(df_league)
|
||||
name = league_names.get(league_id, league_id[:12])
|
||||
tier = "FULL" if full_model else "CAL"
|
||||
|
||||
try:
|
||||
result = process_league(league_id, df_league, feature_cols, full_model, name)
|
||||
done += 1
|
||||
elapsed = time.time() - t0
|
||||
|
||||
# Build accuracy string for key markets
|
||||
acc_parts = []
|
||||
for mkt in ["MS", "OU15", "OU25", "BTTS"]:
|
||||
m = result["markets"].get(mkt, {})
|
||||
if "accuracy" in m:
|
||||
acc_parts.append(f"{mkt}={m['accuracy']*100:.1f}%")
|
||||
acc_str = " | ".join(acc_parts) if acc_parts else "(cal only)"
|
||||
|
||||
print(f"[{done:>3}/{total}] [{tier}] {name:<35} {n:>6,} maç | {acc_str} | {elapsed:.1f}s")
|
||||
all_results.append(result)
|
||||
|
||||
except Exception as e:
|
||||
done += 1
|
||||
print(f"[{done:>3}/{total}] [{tier}] {name:<35} ERROR: {e}")
|
||||
|
||||
if done % 10 == 0:
|
||||
elapsed_total = time.time() - start_total
|
||||
remaining = (elapsed_total / done) * (total - done)
|
||||
print(f" ── {done}/{total} tamamlandı | geçen: {elapsed_total/60:.1f}dk | kalan tahmini: {remaining/60:.1f}dk ──")
|
||||
|
||||
# Final report
|
||||
total_elapsed = time.time() - start_total
|
||||
print(f"\n{'='*70}")
|
||||
print(f"TAMAMLANDI: {len(all_results)}/{total} lig | Süre: {total_elapsed/60:.1f} dakika")
|
||||
print(f"{'='*70}")
|
||||
|
||||
# Top 20 by accuracy
|
||||
printable = [(r["league_name"], r["n_matches"], r["markets"]) for r in all_results
|
||||
if "MS" in r["markets"] and "accuracy" in r["markets"]["MS"]]
|
||||
printable.sort(key=lambda x: x[2]["MS"].get("accuracy", 0), reverse=True)
|
||||
|
||||
print(f"\n{'Liga':<35} {'Maç':>6} {'MS':>7} {'OU15':>7} {'OU25':>7} {'BTTS':>7}")
|
||||
print("-" * 70)
|
||||
for name, n, mkts in printable[:30]:
|
||||
ms = mkts.get("MS", {}).get("accuracy", 0) * 100
|
||||
ou15 = mkts.get("OU15", {}).get("accuracy", 0) * 100
|
||||
ou25 = mkts.get("OU25", {}).get("accuracy", 0) * 100
|
||||
btts = mkts.get("BTTS", {}).get("accuracy", 0) * 100
|
||||
print(f"{name:<35} {n:>6,} {ms:>6.1f}% {ou15:>6.1f}% {ou25:>6.1f}% {btts:>6.1f}%")
|
||||
|
||||
# Save master report
|
||||
report = {
|
||||
"generated_at": datetime.now().isoformat(),
|
||||
"total_leagues": len(all_results),
|
||||
"elapsed_minutes": round(total_elapsed / 60, 1),
|
||||
"results": all_results,
|
||||
}
|
||||
report_path = os.path.join(REPORTS_DIR, "league_models_report.json")
|
||||
with open(report_path, "w") as f:
|
||||
json.dump(report, f, indent=2)
|
||||
print(f"\nRapor kaydedildi: {report_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,259 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# League-Specific Model Trainer \u2014 Google Colab\n",
|
||||
"164 lig i\u00e7in XGBoost + isotonic kalibrasyon. 12 market.\n",
|
||||
"Modeller Drive'a kaydedilir, `models/league_specific/` klas\u00f6r\u00fcne kopyalan\u0131r.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Mount Drive\n",
|
||||
"from google.colab import drive\n",
|
||||
"drive.mount('/content/drive')\n",
|
||||
"\n",
|
||||
"DRIVE_DIR = '/content/drive/MyDrive/iddaai'\n",
|
||||
"import os\n",
|
||||
"os.makedirs(DRIVE_DIR, exist_ok=True)\n",
|
||||
"print('Drive mounted:', DRIVE_DIR)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# training_data.csv zaten Drive da: /content/drive/MyDrive/iddaai/training_data.csv\n",
|
||||
"# Sadece qualified_leagues.json upload et (iddaai-be/ klas\u00f6r\u00fcnden)\n",
|
||||
"from google.colab import files\n",
|
||||
"import shutil\n",
|
||||
"print(\"qualified_leagues.json dosyasini upload edin\")\n",
|
||||
"uploaded = files.upload()\n",
|
||||
"for fname in uploaded:\n",
|
||||
" shutil.copy(fname, f\"{DRIVE_DIR}/{fname}\")\n",
|
||||
" print(f\"Kaydedildi: {DRIVE_DIR}/{fname}\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Upload training_data.csv and qualified_leagues.json from local machine\n",
|
||||
"from google.colab import files\n",
|
||||
"print('training_data.csv upload edin (ai-engine/data/training_data.csv)')\n",
|
||||
"uploaded = files.upload()\n",
|
||||
"import shutil\n",
|
||||
"for fname in uploaded:\n",
|
||||
" shutil.copy(fname, f'{DRIVE_DIR}/{fname}')\n",
|
||||
" print(f'Saved: {DRIVE_DIR}/{fname}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os, json, pickle, time, warnings\n",
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"import xgboost as xgb\n",
|
||||
"from sklearn.isotonic import IsotonicRegression\n",
|
||||
"from sklearn.metrics import accuracy_score, log_loss\n",
|
||||
"warnings.filterwarnings('ignore')\n",
|
||||
"\n",
|
||||
"DRIVE_DIR = '/content/drive/MyDrive/iddaai'\n",
|
||||
"DATA_PATH = f'{DRIVE_DIR}/training_data.csv'\n",
|
||||
"QL_PATH = f'{DRIVE_DIR}/qualified_leagues.json'\n",
|
||||
"MODELS_DIR = f'{DRIVE_DIR}/league_specific'\n",
|
||||
"os.makedirs(MODELS_DIR, exist_ok=True)\n",
|
||||
"\n",
|
||||
"MARKETS = {\n",
|
||||
" 'MS': {'label': 'label_ms', 'num_class': 3, 'min_samples': 200},\n",
|
||||
" 'OU15': {'label': 'label_ou15', 'num_class': 2, 'min_samples': 150},\n",
|
||||
" 'OU25': {'label': 'label_ou25', 'num_class': 2, 'min_samples': 150},\n",
|
||||
" 'OU35': {'label': 'label_ou35', 'num_class': 2, 'min_samples': 150},\n",
|
||||
" 'BTTS': {'label': 'label_btts', 'num_class': 2, 'min_samples': 150},\n",
|
||||
" 'HT': {'label': 'label_ht_result', 'num_class': 3, 'min_samples': 150},\n",
|
||||
" 'HT_OU05': {'label': 'label_ht_ou05', 'num_class': 2, 'min_samples': 150},\n",
|
||||
" 'HT_OU15': {'label': 'label_ht_ou15', 'num_class': 2, 'min_samples': 150},\n",
|
||||
" 'HTFT': {'label': 'label_ht_ft', 'num_class': 9, 'min_samples': 300},\n",
|
||||
" 'OE': {'label': 'label_odd_even', 'num_class': 2, 'min_samples': 150},\n",
|
||||
" 'CARDS': {'label': 'label_cards_ou45', 'num_class': 2, 'min_samples': 150},\n",
|
||||
" 'HANDICAP': {'label': 'label_handicap_ms', 'num_class': 3, 'min_samples': 200},\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"SKIP_COLS = {\n",
|
||||
" 'match_id','home_team_id','away_team_id','league_id','mst_utc',\n",
|
||||
" 'score_home','score_away','total_goals','ht_score_home','ht_score_away','ht_total_goals',\n",
|
||||
" 'label_ms','label_ou05','label_ou15','label_ou25','label_ou35','label_btts',\n",
|
||||
" 'label_ht_result','label_ht_ou05','label_ht_ou15','label_ht_ft',\n",
|
||||
" 'label_odd_even','label_yellow_cards','label_cards_ou45','label_handicap_ms',\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"XGB_BASE = {\n",
|
||||
" 'max_depth': 4, 'eta': 0.05, 'subsample': 0.8,\n",
|
||||
" 'colsample_bytree': 0.8, 'min_child_weight': 5,\n",
|
||||
" 'gamma': 0.1, 'reg_lambda': 1.0, 'verbosity': 0, 'seed': 42,\n",
|
||||
" 'nthread': -1,\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"df = pd.read_csv(DATA_PATH, low_memory=False)\n",
|
||||
"feature_cols = [c for c in df.columns if c not in SKIP_COLS]\n",
|
||||
"print(f'Y\u00fcklendi: {len(df):,} sat\u0131r | {len(feature_cols)} feature')\n",
|
||||
"\n",
|
||||
"qualified = json.load(open(QL_PATH)) if os.path.exists(QL_PATH) else df['league_id'].unique().tolist()\n",
|
||||
"counts = df[df['league_id'].isin(qualified)].groupby('league_id').size()\n",
|
||||
"full_ids = counts[counts >= 500].index.tolist()\n",
|
||||
"cal_ids = counts[(counts >= 100) & (counts < 500)].index.tolist()\n",
|
||||
"print(f'Tam model: {len(full_ids)} | Kalibrasyon: {len(cal_ids)} | Toplam: {len(full_ids)+len(cal_ids)}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def train_one_league(league_id, df_league, feature_cols, full_model):\n",
|
||||
" n = len(df_league)\n",
|
||||
" out_dir = f'{MODELS_DIR}/{league_id}'\n",
|
||||
" os.makedirs(out_dir, exist_ok=True)\n",
|
||||
" metrics = {}\n",
|
||||
"\n",
|
||||
" df_sorted = df_league.sort_values('mst_utc')\n",
|
||||
" split = int(n * 0.80)\n",
|
||||
" df_tr, df_te = df_sorted.iloc[:split], df_sorted.iloc[split:]\n",
|
||||
"\n",
|
||||
" saved_fc = False\n",
|
||||
"\n",
|
||||
" for market, cfg in MARKETS.items():\n",
|
||||
" lbl, nc, ms = cfg['label'], cfg['num_class'], cfg['min_samples']\n",
|
||||
" if lbl not in df_league.columns:\n",
|
||||
" continue\n",
|
||||
"\n",
|
||||
" if full_model:\n",
|
||||
" vtr = df_tr[feature_cols + [lbl]].dropna()\n",
|
||||
" vte = df_te[feature_cols + [lbl]].dropna()\n",
|
||||
" if len(vtr) < ms or len(vte) < 30:\n",
|
||||
" continue\n",
|
||||
" Xtr, ytr = vtr[feature_cols].fillna(0).values, vtr[lbl].values.astype(int)\n",
|
||||
" Xte, yte = vte[feature_cols].fillna(0).values, vte[lbl].values.astype(int)\n",
|
||||
"\n",
|
||||
" params = {**XGB_BASE, 'objective': 'multi:softprob' if nc > 2 else 'binary:logistic',\n",
|
||||
" 'eval_metric': 'mlogloss' if nc > 2 else 'logloss'}\n",
|
||||
" if nc > 2: params['num_class'] = nc\n",
|
||||
"\n",
|
||||
" dtr = xgb.DMatrix(Xtr, label=ytr, feature_names=feature_cols)\n",
|
||||
" dte = xgb.DMatrix(Xte, label=yte, feature_names=feature_cols)\n",
|
||||
" model = xgb.train(params, dtr, 300, [(dte,'v')], early_stopping_rounds=30, verbose_eval=False)\n",
|
||||
" model.save_model(f'{out_dir}/xgb_{market.lower()}.json')\n",
|
||||
"\n",
|
||||
" if not saved_fc:\n",
|
||||
" json.dump(feature_cols, open(f'{out_dir}/feature_cols.json','w'))\n",
|
||||
" saved_fc = True\n",
|
||||
"\n",
|
||||
" raw = model.predict(dte)\n",
|
||||
" if nc > 2:\n",
|
||||
" raw = raw.reshape(-1, nc)\n",
|
||||
" acc = accuracy_score(yte, np.argmax(raw, axis=1))\n",
|
||||
" for ci in range(nc):\n",
|
||||
" iso = IsotonicRegression(out_of_bounds='clip').fit(raw[:,ci], (yte==ci).astype(int))\n",
|
||||
" pickle.dump(iso, open(f'{out_dir}/cal_{market.lower()}_{ci}.pkl','wb'))\n",
|
||||
" else:\n",
|
||||
" acc = accuracy_score(yte, (raw>=0.5).astype(int))\n",
|
||||
" iso = IsotonicRegression(out_of_bounds='clip').fit(raw, yte)\n",
|
||||
" pickle.dump(iso, open(f'{out_dir}/cal_{market.lower()}.pkl','wb'))\n",
|
||||
"\n",
|
||||
" metrics[market] = {'accuracy': round(float(acc),4), 'n_train': len(Xtr)}\n",
|
||||
" else:\n",
|
||||
" # Cal only \u2014 store empty placeholder so prediction knows to use general V25\n",
|
||||
" metrics[market] = {'model': 'cal_only', 'n': n}\n",
|
||||
"\n",
|
||||
" json.dump({'league_id': league_id, 'n': n, 'markets': metrics},\n",
|
||||
" open(f'{out_dir}/metrics.json','w'), indent=2)\n",
|
||||
" return metrics\n",
|
||||
"\n",
|
||||
"start = time.time()\n",
|
||||
"all_ids = [(lid, True) for lid in full_ids] + [(lid, False) for lid in cal_ids]\n",
|
||||
"results = []\n",
|
||||
"\n",
|
||||
"for i, (lid, full) in enumerate(all_ids, 1):\n",
|
||||
" dfl = df[df['league_id'] == lid].copy()\n",
|
||||
" t0 = time.time()\n",
|
||||
" try:\n",
|
||||
" mkt_res = train_one_league(lid, dfl, feature_cols, full)\n",
|
||||
" ms_acc = mkt_res.get('MS', {}).get('accuracy', '-')\n",
|
||||
" results.append((lid, len(dfl), mkt_res))\n",
|
||||
" print(f'[{i:>3}/{len(all_ids)}] {lid[:20]:<20} n={len(dfl):>5,} MS={ms_acc} {time.time()-t0:.1f}s')\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f'[{i:>3}/{len(all_ids)}] {lid[:20]:<20} ERROR: {e}')\n",
|
||||
"\n",
|
||||
" if i % 20 == 0:\n",
|
||||
" el = time.time()-start\n",
|
||||
" print(f' \u2500\u2500 {i}/{len(all_ids)} done | {el/60:.1f}min elapsed | ~{el/i*(len(all_ids)-i)/60:.1f}min left \u2500\u2500')\n",
|
||||
"\n",
|
||||
"print(f'\\nBitti! {len(results)} lig | {(time.time()-start)/60:.1f} dakika')\n",
|
||||
"print(f'Modeller: {MODELS_DIR}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Sonu\u00e7lar\u0131 g\u00f6ster \u2014 MS accuracy s\u0131ralamas\u0131\n",
|
||||
"printable = [(lid, n, m) for lid, n, m in results if 'MS' in m and 'accuracy' in m['MS']]\n",
|
||||
"printable.sort(key=lambda x: x[2]['MS']['accuracy'], reverse=True)\n",
|
||||
"print(f'{\"Liga ID\":<30} {\"Ma\u00e7\":>6} {\"MS\":>7} {\"OU15\":>7} {\"OU25\":>7} {\"BTTS\":>7}')\n",
|
||||
"print('-'*70)\n",
|
||||
"for lid, n, m in printable[:30]:\n",
|
||||
" ms = m.get('MS', {}).get('accuracy', 0)*100\n",
|
||||
" ou15 = m.get('OU15',{}).get('accuracy', 0)*100\n",
|
||||
" ou25 = m.get('OU25',{}).get('accuracy', 0)*100\n",
|
||||
" btts = m.get('BTTS',{}).get('accuracy', 0)*100\n",
|
||||
" print(f'{lid:<30} {n:>6,} {ms:>6.1f}% {ou15:>6.1f}% {ou25:>6.1f}% {btts:>6.1f}%')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Zip ve indir\n",
|
||||
"import shutil\n",
|
||||
"zip_path = f'{DRIVE_DIR}/league_specific_models.zip'\n",
|
||||
"shutil.make_archive(zip_path.replace('.zip',''), 'zip', MODELS_DIR)\n",
|
||||
"print(f'Zip: {zip_path}')\n",
|
||||
"# \u0130ndirmek i\u00e7in:\n",
|
||||
"# from google.colab import files\n",
|
||||
"# files.download(zip_path)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.10.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -0,0 +1,108 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# HÜCRE 1 — Paketler\n",
|
||||
"!pip install xgboost lightgbm optuna scikit-learn pandas numpy -q\n",
|
||||
"print('Hazır')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# HÜCRE 2 — Drive bağla + CSV çek\n",
|
||||
"from google.colab import drive\n",
|
||||
"import os, shutil\n",
|
||||
"drive.mount('/content/drive')\n",
|
||||
"\n",
|
||||
"# training_data.csv'yi Drive'ın iddaai klasöründen kopyala\n",
|
||||
"shutil.copy('/content/drive/MyDrive/iddaai/training_data.csv', '/content/training_data.csv')\n",
|
||||
"print('CSV hazır:', os.path.getsize('/content/training_data.csv') // 1024 // 1024, 'MB')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# HÜCRE 3 — iddaai_colab3.zip upload et (ai-engine kodları)\n",
|
||||
"from google.colab import files\n",
|
||||
"import zipfile\n",
|
||||
"print('iddaai_colab3.zip dosyasını seç:')\n",
|
||||
"uploaded = files.upload()\n",
|
||||
"with zipfile.ZipFile('iddaai_colab3.zip') as z:\n",
|
||||
" z.extractall('/content')\n",
|
||||
"print('Kod hazır')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# HÜCRE 4 — training_data.csv'yi script'in beklediği yere koy\n",
|
||||
"import os, shutil\n",
|
||||
"os.makedirs('/content/ai-engine/data', exist_ok=True)\n",
|
||||
"shutil.copy('/content/training_data.csv', '/content/ai-engine/data/training_data.csv')\n",
|
||||
"print('Yerleştirildi')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# HÜCRE 5 — Eğitimi başlat (her 5 trial'da bir ilerleme gösterir)\n",
|
||||
"import subprocess, os\n",
|
||||
"\n",
|
||||
"proc = subprocess.Popen(\n",
|
||||
" ['python', 'scripts/train_v25_pro.py'],\n",
|
||||
" stdout=subprocess.PIPE,\n",
|
||||
" stderr=subprocess.STDOUT,\n",
|
||||
" text=True,\n",
|
||||
" cwd='/content/ai-engine',\n",
|
||||
" env={**os.environ, 'PYTHONPATH': '/content/ai-engine'}\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"for line in proc.stdout:\n",
|
||||
" print(line, end='', flush=True)\n",
|
||||
"\n",
|
||||
"proc.wait()\n",
|
||||
"print('\\nEĞİTİM BİTTİ!')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# HÜCRE 6 — Modelleri Drive'a kaydet\n",
|
||||
"import shutil, os\n",
|
||||
"os.makedirs('/content/drive/MyDrive/iddaai/models_v25', exist_ok=True)\n",
|
||||
"shutil.copytree(\n",
|
||||
" '/content/ai-engine/models/v25',\n",
|
||||
" '/content/drive/MyDrive/iddaai/models_v25',\n",
|
||||
" dirs_exist_ok=True\n",
|
||||
")\n",
|
||||
"print('Modeller Drive a kaydedildi: MyDrive/iddaai/models_v25/')"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"},
|
||||
"language_info": {"name": "python", "version": "3.10.0"}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -101,6 +101,32 @@ FEATURES = [
|
||||
"home_top_scorer_form", "away_top_scorer_form",
|
||||
"home_avg_player_exp", "away_avg_player_exp",
|
||||
"home_goals_diversity", "away_goals_diversity",
|
||||
# V27 H2H Expanded (4)
|
||||
"h2h_home_goals_avg", "h2h_away_goals_avg",
|
||||
"h2h_recent_trend", "h2h_venue_advantage",
|
||||
# V27 Rolling Stats (13)
|
||||
"home_rolling5_goals", "home_rolling5_conceded",
|
||||
"home_rolling10_goals", "home_rolling10_conceded",
|
||||
"home_rolling20_goals", "home_rolling20_conceded",
|
||||
"away_rolling5_goals", "away_rolling5_conceded",
|
||||
"away_rolling10_goals", "away_rolling10_conceded",
|
||||
"home_rolling5_cs", "away_rolling5_cs",
|
||||
# V27 Venue Stats (4)
|
||||
"home_venue_goals", "home_venue_conceded",
|
||||
"away_venue_goals", "away_venue_conceded",
|
||||
# V27 Goal Trend (2)
|
||||
"home_goal_trend", "away_goal_trend",
|
||||
# V27 Calendar (5)
|
||||
"home_days_rest", "away_days_rest",
|
||||
"match_month", "is_season_start", "is_season_end",
|
||||
# V27 Interaction (6)
|
||||
"attack_vs_defense_home", "attack_vs_defense_away",
|
||||
"xg_diff", "form_momentum_interaction",
|
||||
"elo_form_consistency", "upset_x_elo_gap",
|
||||
# V27 League Expanded (5)
|
||||
"league_home_win_rate", "league_draw_rate",
|
||||
"league_btts_rate", "league_ou25_rate",
|
||||
"league_reliability_score",
|
||||
]
|
||||
|
||||
MARKET_CONFIGS = [
|
||||
@@ -295,12 +321,18 @@ def train_market(df, target_col, market_name, num_class, n_trials):
|
||||
|
||||
print(f"[INFO] Split: train={len(X_train)} val={len(X_val)} cal={len(X_cal)} test={len(X_test)}")
|
||||
|
||||
def _cb(study, trial):
|
||||
if trial.number % 5 == 0 or trial.number == n_trials - 1:
|
||||
best = study.best_value if study.best_trial else float('inf')
|
||||
print(f" [{trial.number+1:>3}/{n_trials}] loss={trial.value:.4f} | best={best:.4f}", flush=True)
|
||||
|
||||
# ── Phase 1: Optuna XGBoost ──────────────────────────────────
|
||||
print(f"\n[OPTUNA] XGBoost tuning ({n_trials} trials)...")
|
||||
xgb_study = optuna.create_study(direction="minimize", sampler=TPESampler(seed=42))
|
||||
xgb_study.optimize(
|
||||
lambda trial: xgb_objective(trial, X_train, y_train, X_val, y_val, num_class),
|
||||
n_trials=n_trials,
|
||||
callbacks=[_cb],
|
||||
)
|
||||
xgb_best = xgb_study.best_params
|
||||
print(f"[OK] XGB best logloss: {xgb_study.best_value:.4f}")
|
||||
@@ -311,6 +343,7 @@ def train_market(df, target_col, market_name, num_class, n_trials):
|
||||
lgb_study.optimize(
|
||||
lambda trial: lgb_objective(trial, X_train, y_train, X_val, y_val, num_class),
|
||||
n_trials=n_trials,
|
||||
callbacks=[_cb],
|
||||
)
|
||||
lgb_best = lgb_study.best_params
|
||||
print(f"[OK] LGB best logloss: {lgb_study.best_value:.4f}")
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -19,11 +19,26 @@ class BettingBrain:
|
||||
SOFT_DIVERGENCE = 0.14
|
||||
EXTREME_MODEL_PROB = 0.85
|
||||
EXTREME_GAP = 0.30
|
||||
# Vetoes that is_value_sniper bypasses (does NOT bypass odds_below_minimum)
|
||||
SNIPER_BYPASSABLE_VETOES = {"calibrated_confidence_too_low", "play_score_too_low"}
|
||||
# Trap market: market implied probability massively exceeds historical band hit rate
|
||||
SNIPER_BYPASSABLE_VETOES = {"play_score_too_low"}
|
||||
TRAP_MARKET_GAP = 0.10
|
||||
|
||||
MARKET_MIN_CONFIDENCE = {
|
||||
"MS": 45.0,
|
||||
"DC": 55.0,
|
||||
"OU25": 48.0,
|
||||
"OU15": 55.0,
|
||||
"OU35": 42.0,
|
||||
"BTTS": 48.0,
|
||||
"HT": 55.0,
|
||||
"HTFT": 65.0,
|
||||
"OE": 55.0,
|
||||
"CARDS": 50.0,
|
||||
"HT_OU05": 55.0,
|
||||
"HT_OU15": 50.0,
|
||||
}
|
||||
|
||||
SNIPER_BLOCKED_MARKETS = {"HT", "HTFT", "OE", "CARDS", "HT_OU05", "HT_OU15"}
|
||||
|
||||
MARKET_PRIORS = {
|
||||
"DC": 4.0,
|
||||
"OU15": 3.0,
|
||||
@@ -31,10 +46,10 @@ class BettingBrain:
|
||||
"BTTS": 0.0,
|
||||
"MS": -2.0,
|
||||
"OU35": -2.0,
|
||||
"HT": -6.0,
|
||||
"HTFT": -12.0,
|
||||
"CARDS": -5.0,
|
||||
"OE": -8.0,
|
||||
"HT": -10.0,
|
||||
"HTFT": -18.0,
|
||||
"CARDS": -8.0,
|
||||
"OE": -12.0,
|
||||
}
|
||||
|
||||
def judge(self, package: Dict[str, Any]) -> Dict[str, Any]:
|
||||
@@ -182,8 +197,10 @@ class BettingBrain:
|
||||
issues.append("base_model_not_playable")
|
||||
|
||||
is_value_sniper = bool(row.get("is_value_sniper"))
|
||||
if market in self.SNIPER_BLOCKED_MARKETS:
|
||||
is_value_sniper = False
|
||||
if is_value_sniper:
|
||||
score += 35.0
|
||||
score += 20.0
|
||||
positives.append("value_sniper_override")
|
||||
|
||||
score += max(0.0, min(20.0, calibrated_conf * 0.22))
|
||||
@@ -197,9 +214,31 @@ class BettingBrain:
|
||||
risk = str((package.get("risk") or {}).get("level") or "MEDIUM").upper()
|
||||
score += {"LOW": 5.0, "MEDIUM": 0.0, "HIGH": -12.0, "EXTREME": -22.0}.get(risk, -4.0)
|
||||
|
||||
# League reliability penalty: weak leagues produce unreliable raw probabilities.
|
||||
# odds_reliability is pre-computed per-league from historical Brier score analysis.
|
||||
odds_rel = self._safe_float(row.get("odds_reliability"), 0.35) or 0.35
|
||||
if odds_rel < 0.30:
|
||||
score -= 22.0
|
||||
issues.append("very_low_reliability_league")
|
||||
if market in {"MS", "DC", "OU25", "BTTS"} and not is_value_sniper:
|
||||
vetoes.append("low_reliability_league_hard_block")
|
||||
elif odds_rel < 0.45:
|
||||
score -= 12.0
|
||||
issues.append("low_reliability_league")
|
||||
elif odds_rel < 0.55:
|
||||
score -= 5.0
|
||||
|
||||
# Inferred features penalty: when ELO/form/H2H come from live enrichment
|
||||
# (not pre-computed table), statistical quality is unknown — penalise hard.
|
||||
dq_flags = list(data_quality.get("flags") or [])
|
||||
if "ai_features_inferred_from_history" in dq_flags:
|
||||
score -= 18.0
|
||||
issues.append("inferred_statistical_features")
|
||||
|
||||
if odds < self.MIN_ODDS:
|
||||
vetoes.append("odds_below_minimum")
|
||||
if calibrated_conf < 38.0 and not is_value_sniper:
|
||||
min_conf = self.MARKET_MIN_CONFIDENCE.get(market, 45.0)
|
||||
if calibrated_conf < min_conf:
|
||||
vetoes.append("calibrated_confidence_too_low")
|
||||
if play_score < 50.0 and not is_value_sniper:
|
||||
vetoes.append("play_score_too_low")
|
||||
@@ -270,7 +309,7 @@ class BettingBrain:
|
||||
score -= 24.0
|
||||
vetoes.append("extreme_probability_without_evidence")
|
||||
|
||||
if market in {"HT", "HTFT", "OE"} and score < 86.0 and not is_value_sniper:
|
||||
if market in {"HT", "HTFT", "OE"} and score < 86.0:
|
||||
vetoes.append("volatile_market_requires_exceptional_evidence")
|
||||
|
||||
# Sniper override: bypass eligible vetoes when value sniper triggered
|
||||
|
||||
@@ -62,7 +62,7 @@ def generate_match_commentary(package: Dict[str, Any]) -> Dict[str, Any]:
|
||||
)
|
||||
|
||||
# ── Quick notes ───────────────────────────────────────────────
|
||||
notes = _build_notes(market_board, v27_engine, score_pred, risk, home, away)
|
||||
notes = _build_notes(market_board, v27_engine, score_pred, risk, home, away, league_name=match_info.get("league", ""))
|
||||
|
||||
# ── Contradiction detection ───────────────────────────────────
|
||||
contradictions = _detect_contradictions(market_board, v27_engine, package)
|
||||
@@ -206,11 +206,17 @@ def _build_notes(
|
||||
risk: Dict[str, Any],
|
||||
home: str,
|
||||
away: str,
|
||||
league_name: str = "",
|
||||
) -> List[str]:
|
||||
notes: List[str] = []
|
||||
triple_value = v27_engine.get("triple_value") or {}
|
||||
odds_band = v27_engine.get("odds_band") or {}
|
||||
|
||||
# Cup game note — model uses league statistics; cup dynamics differ
|
||||
_cup_kws = ("kupa", "cup", "coupe", "copa", "pokal", "ziraat", "trophy", "shield", "super cup", "süper kupa")
|
||||
if any(kw in (league_name or "").lower() for kw in _cup_kws):
|
||||
notes.append("⚠️ Kupa maçı: ev avantajı zayıf, rotasyon ve düşük motivasyon riski var")
|
||||
|
||||
# MS note
|
||||
ms = market_board.get("MS") or {}
|
||||
ms_conf = float(ms.get("confidence", 0) or 0)
|
||||
|
||||
@@ -0,0 +1,28 @@
|
||||
"""Orchestrator package — mixin modules split from the original 5786-line
|
||||
monolithic SingleMatchOrchestrator. Behaviour is identical to the pre-refactor
|
||||
version; only file layout has changed.
|
||||
"""
|
||||
|
||||
from services.orchestrator.data_loader import DataLoaderMixin
|
||||
from services.orchestrator.feature_builder import FeatureBuilderMixin
|
||||
from services.orchestrator.prediction import PredictionMixin
|
||||
from services.orchestrator.basketball import BasketballMixin
|
||||
from services.orchestrator.upper_brain import UpperBrainMixin
|
||||
from services.orchestrator.htms import HtmsMixin
|
||||
from services.orchestrator.coupon import CouponMixin
|
||||
from services.orchestrator.reversal import ReversalMixin
|
||||
from services.orchestrator.market_board import MarketBoardMixin
|
||||
from services.orchestrator.utils import UtilsMixin
|
||||
|
||||
__all__ = [
|
||||
"DataLoaderMixin",
|
||||
"FeatureBuilderMixin",
|
||||
"PredictionMixin",
|
||||
"BasketballMixin",
|
||||
"UpperBrainMixin",
|
||||
"HtmsMixin",
|
||||
"CouponMixin",
|
||||
"ReversalMixin",
|
||||
"MarketBoardMixin",
|
||||
"UtilsMixin",
|
||||
]
|
||||
@@ -0,0 +1,538 @@
|
||||
"""Basketball Mixin — basketball-specific market construction.
|
||||
|
||||
Auto-extracted mixin module — split from services/single_match_orchestrator.py.
|
||||
All methods here are composed into SingleMatchOrchestrator via inheritance.
|
||||
`self` attributes (self.dsn, self.enrichment, self.v25_predictor, etc.) are
|
||||
initialised in the main __init__.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import math
|
||||
import os
|
||||
import pickle
|
||||
from collections import defaultdict
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple, overload
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
from data.db import get_clean_dsn
|
||||
from schemas.prediction import FullMatchPrediction
|
||||
from schemas.match_data import MatchData
|
||||
from models.v25_ensemble import V25Predictor, get_v25_predictor
|
||||
try:
|
||||
from models.v27_predictor import V27Predictor, compute_divergence, compute_value_edge
|
||||
except ImportError:
|
||||
class V27Predictor: # type: ignore[no-redef]
|
||||
def __init__(self): self.models = {}
|
||||
def load_models(self): return False
|
||||
def predict_all(self, features): return {}
|
||||
def compute_divergence(*args, **kwargs):
|
||||
return {}
|
||||
def compute_value_edge(*args, **kwargs):
|
||||
return {}
|
||||
from features.odds_band_analyzer import OddsBandAnalyzer
|
||||
try:
|
||||
from models.basketball_v25 import (
|
||||
BasketballMatchPrediction,
|
||||
get_basketball_v25_predictor,
|
||||
)
|
||||
except ImportError:
|
||||
BasketballMatchPrediction = Any # type: ignore[misc]
|
||||
def get_basketball_v25_predictor() -> Any:
|
||||
raise ImportError("Basketball predictor is not available")
|
||||
from core.engines.player_predictor import PlayerPrediction, get_player_predictor
|
||||
from services.feature_enrichment import FeatureEnrichmentService
|
||||
from services.betting_brain import BettingBrain
|
||||
from services.v26_shadow_engine import V26ShadowEngine, get_v26_shadow_engine
|
||||
from services.match_commentary import generate_match_commentary
|
||||
from utils.top_leagues import load_top_league_ids
|
||||
from utils.league_reliability import load_league_reliability
|
||||
from config.config_loader import build_threshold_dict, get_threshold_default
|
||||
from models.calibration import get_calibrator
|
||||
|
||||
|
||||
class BasketballMixin:
|
||||
def _build_basketball_prediction_package(
|
||||
self,
|
||||
data: MatchData,
|
||||
prediction: Dict[str, Any],
|
||||
) -> Dict[str, Any]:
|
||||
quality = self._compute_data_quality(data)
|
||||
|
||||
raw_market_rows = self._build_basketball_market_rows(data, prediction)
|
||||
market_rows = [
|
||||
self._decorate_basketball_market_row(data, prediction, quality, row)
|
||||
for row in raw_market_rows
|
||||
]
|
||||
market_rows.sort(
|
||||
key=lambda row: (
|
||||
1 if row.get("playable") else 0,
|
||||
float(row.get("play_score", 0.0)),
|
||||
),
|
||||
reverse=True,
|
||||
)
|
||||
|
||||
playable_rows = [row for row in market_rows if row.get("playable")]
|
||||
|
||||
MIN_ODDS = 1.30
|
||||
playable_with_odds = [
|
||||
row for row in playable_rows
|
||||
if float(row.get("odds", 0.0)) >= MIN_ODDS
|
||||
]
|
||||
|
||||
if playable_with_odds:
|
||||
playable_with_odds.sort(
|
||||
key=lambda r: (
|
||||
float(r.get("ev_edge", 0.0)),
|
||||
float(r.get("play_score", 0.0)),
|
||||
),
|
||||
reverse=True,
|
||||
)
|
||||
main_pick = playable_with_odds[0]
|
||||
main_pick["is_guaranteed"] = False
|
||||
main_pick["pick_reason"] = "positive_ev_pick"
|
||||
else:
|
||||
fallback_with_odds = [r for r in market_rows if float(r.get("odds", 0.0)) > 1.0]
|
||||
fallback_with_odds.sort(key=lambda r: float(r.get("play_score", 0.0)), reverse=True)
|
||||
main_pick = fallback_with_odds[0] if fallback_with_odds else (market_rows[0] if market_rows else None)
|
||||
if main_pick:
|
||||
main_pick["is_guaranteed"] = False
|
||||
main_pick["playable"] = False
|
||||
main_pick["stake_units"] = 0.0
|
||||
main_pick["bet_grade"] = "PASS"
|
||||
main_pick["pick_reason"] = "no_playable_value_found"
|
||||
|
||||
supporting: List[Dict[str, Any]] = []
|
||||
for row in market_rows:
|
||||
if main_pick and row["market"] == main_pick["market"] and row["pick"] == main_pick["pick"]:
|
||||
continue
|
||||
supporting.append(row)
|
||||
supporting = supporting[:5]
|
||||
|
||||
bet_summary = [self._to_bet_summary_item(row) for row in market_rows]
|
||||
scenarios = self._build_basketball_scenarios(prediction)
|
||||
reasons = self._build_basketball_reasoning_factors(data, prediction, quality)
|
||||
|
||||
aggressive_pick: Optional[Dict[str, Any]] = None
|
||||
risk_level = prediction.get("risk_level", "MEDIUM")
|
||||
risk_score = float(prediction.get("risk_score", 50.0) or 50.0)
|
||||
|
||||
# Build aggressive pick if available from Spreak in market_board
|
||||
board = prediction.get("market_board", {})
|
||||
if risk_level in ("LOW", "MEDIUM") and "Spread" in board:
|
||||
spr_data = board["Spread"]
|
||||
probs = list(spr_data.values())
|
||||
keys = list(spr_data.keys())
|
||||
if len(probs) >= 2:
|
||||
prob_a = float(str(probs[0]).replace('%', '')) / 100.0
|
||||
prob_h = float(str(probs[1]).replace('%', '')) / 100.0
|
||||
max_prob = max(prob_a, prob_h)
|
||||
|
||||
spr_pick = "Home" if prob_h >= prob_a else "Away"
|
||||
|
||||
conf = 50.0
|
||||
line_str = "Spread"
|
||||
for b in prediction.get("bet_summary", []):
|
||||
if b["market"] == "Spread":
|
||||
conf = float(b["confidence"])
|
||||
line_str = b["pick"]
|
||||
|
||||
aggressive_pick = {
|
||||
"market": "SPREAD",
|
||||
"pick": line_str,
|
||||
"probability": round(max_prob, 4),
|
||||
"confidence": round(conf, 1),
|
||||
"odds": round(
|
||||
float(
|
||||
data.odds_data.get(
|
||||
"spread_h" if spr_pick == "Home" else "spread_a", 0.0
|
||||
)
|
||||
),
|
||||
2,
|
||||
),
|
||||
}
|
||||
|
||||
scores = prediction.get("score_prediction", {})
|
||||
home_score = scores.get("home_expected", 80.0)
|
||||
away_score = scores.get("away_expected", 80.0)
|
||||
total_score = scores.get("total_expected", 160.0)
|
||||
|
||||
mb_out = {
|
||||
"PLAYER_TOP": board.get("PLAYER_TOP", []),
|
||||
}
|
||||
|
||||
if "ML" in board:
|
||||
ml_data = board["ML"]
|
||||
keys = list(ml_data.keys())
|
||||
if len(keys) >= 2:
|
||||
mb_out["ML"] = {
|
||||
"pick": prediction.get("main_pick", ""),
|
||||
"confidence": 60.0,
|
||||
"probs": {
|
||||
"1": round(float(str(ml_data[keys[0]]).replace('%', '')) / 100.0, 4),
|
||||
"2": round(float(str(ml_data[keys[1]]).replace('%', '')) / 100.0, 4),
|
||||
},
|
||||
}
|
||||
|
||||
if "Totals" in board:
|
||||
tot_data = board["Totals"]
|
||||
keys = list(tot_data.keys())
|
||||
if len(keys) >= 2:
|
||||
mb_out["TOTAL"] = {
|
||||
"line": 160.5,
|
||||
"pick": prediction.get("main_pick", ""),
|
||||
"confidence": 60.0,
|
||||
"probs": {
|
||||
"under": round(float(str(tot_data[keys[0]]).replace('%', '')) / 100.0, 4),
|
||||
"over": round(float(str(tot_data[keys[1]]).replace('%', '')) / 100.0, 4),
|
||||
},
|
||||
}
|
||||
|
||||
if "Spread" in board:
|
||||
spr_data = board["Spread"]
|
||||
keys = list(spr_data.keys())
|
||||
if len(keys) >= 2:
|
||||
mb_out["SPREAD"] = {
|
||||
"line_home": 0.0,
|
||||
"pick": prediction.get("main_pick", ""),
|
||||
"confidence": 60.0,
|
||||
"probs": {
|
||||
"away_cover": round(float(str(spr_data[keys[0]]).replace('%', '')) / 100.0, 4),
|
||||
"home_cover": round(float(str(spr_data[keys[1]]).replace('%', '')) / 100.0, 4),
|
||||
},
|
||||
}
|
||||
|
||||
return {
|
||||
"model_version": str(prediction.get("engine_version") or "v28.main.basketball"),
|
||||
"match_info": {
|
||||
"match_id": data.match_id,
|
||||
"match_name": f"{data.home_team_name} vs {data.away_team_name}",
|
||||
"home_team": data.home_team_name,
|
||||
"away_team": data.away_team_name,
|
||||
"league": data.league_name,
|
||||
"match_date_ms": data.match_date_ms,
|
||||
"sport": data.sport,
|
||||
},
|
||||
"data_quality": quality,
|
||||
"risk": {
|
||||
"level": risk_level,
|
||||
"score": round(risk_score, 1),
|
||||
"is_surprise_risk": False,
|
||||
"surprise_type": "",
|
||||
"warnings": [],
|
||||
},
|
||||
"engine_breakdown": prediction.get("engine_breakdown")
|
||||
or {
|
||||
"team": 60.0,
|
||||
"player": 60.0,
|
||||
"odds": 80.0,
|
||||
"referee": 50.0,
|
||||
},
|
||||
"main_pick": main_pick,
|
||||
"bet_advice": {
|
||||
"playable": bool(main_pick and main_pick.get("playable")),
|
||||
"suggested_stake_units": float(main_pick.get("stake_units", 0.0))
|
||||
if (main_pick and main_pick.get("playable"))
|
||||
else 0.0,
|
||||
"reason": "playable_pick_found"
|
||||
if (main_pick and main_pick.get("playable"))
|
||||
else "no_bet_conditions_met",
|
||||
},
|
||||
"bet_summary": bet_summary,
|
||||
"supporting_picks": supporting,
|
||||
"aggressive_pick": aggressive_pick,
|
||||
"scenario_top5": scenarios,
|
||||
"score_prediction": {
|
||||
"ft": f"{int(round(home_score))}-{int(round(away_score))}",
|
||||
"ht": f"{int(round(home_score * 0.52))}-{int(round(away_score * 0.52))}",
|
||||
"xg_home": round(float(home_score), 2),
|
||||
"xg_away": round(float(away_score), 2),
|
||||
"xg_total": round(float(total_score), 2),
|
||||
},
|
||||
"market_board": mb_out,
|
||||
"reasoning_factors": reasons,
|
||||
}
|
||||
|
||||
def _build_basketball_market_rows(
|
||||
self,
|
||||
data: MatchData,
|
||||
pred: Dict[str, Any],
|
||||
) -> List[Dict[str, Any]]:
|
||||
odds = data.odds_data
|
||||
|
||||
market_board = pred.get("market_board", {})
|
||||
|
||||
# 1. Moneyline
|
||||
ml_row = None
|
||||
if "ML" in market_board:
|
||||
ml_data = market_board["ML"]
|
||||
# To get specific pick (MS 1 or MS 2), look at the probability values
|
||||
probs = list(ml_data.values())
|
||||
keys = list(ml_data.keys())
|
||||
if len(probs) >= 2:
|
||||
prob_1 = float(str(probs[0]).replace('%', '')) / 100.0
|
||||
prob_2 = float(str(probs[1]).replace('%', '')) / 100.0
|
||||
max_prob = max(prob_1, prob_2)
|
||||
|
||||
# Derive pick string
|
||||
ml_pick_val = keys[0] if prob_1 >= prob_2 else keys[1]
|
||||
ml_pick = "1" if "1" in ml_pick_val else "2"
|
||||
ml_odd_key = "ml_h" if ml_pick == "1" else "ml_a"
|
||||
|
||||
# Find confidence from bet summary
|
||||
conf = 50.0
|
||||
for b in pred.get("bet_summary", []):
|
||||
if b["market"] == "Moneyline": conf = float(b["confidence"])
|
||||
|
||||
ml_row = {
|
||||
"market": "ML",
|
||||
"pick": ml_pick,
|
||||
"probability": round(max_prob, 4),
|
||||
"confidence": round(conf, 1),
|
||||
"odds": round(float(odds.get(ml_odd_key, 0.0)), 2),
|
||||
}
|
||||
|
||||
# 2. Totals
|
||||
tot_row = None
|
||||
if "Totals" in market_board:
|
||||
tot_data = market_board["Totals"]
|
||||
probs = list(tot_data.values())
|
||||
keys = list(tot_data.keys())
|
||||
if len(probs) >= 2:
|
||||
prob_u = float(str(probs[0]).replace('%', '')) / 100.0
|
||||
prob_o = float(str(probs[1]).replace('%', '')) / 100.0
|
||||
max_prob = max(prob_u, prob_o)
|
||||
|
||||
pick_str = keys[1] if prob_o >= prob_u else keys[0]
|
||||
tot_pick = "Over" if "Over" in pick_str else "Under"
|
||||
line_val = pick_str.replace("Over", "").replace("Under", "").strip()
|
||||
|
||||
conf = 50.0
|
||||
for b in pred.get("bet_summary", []):
|
||||
if b["market"] == "Totals": conf = float(b["confidence"])
|
||||
|
||||
tot_row = {
|
||||
"market": "TOTAL",
|
||||
"pick": f"{tot_pick} {line_val}",
|
||||
"probability": round(max_prob, 4),
|
||||
"confidence": round(conf, 1),
|
||||
"odds": round(float(odds.get("tot_o" if tot_pick == "Over" else "tot_u", 0.0)), 2),
|
||||
}
|
||||
|
||||
# 3. Spread
|
||||
spr_row = None
|
||||
if "Spread" in market_board:
|
||||
spr_data = market_board["Spread"]
|
||||
probs = list(spr_data.values())
|
||||
keys = list(spr_data.keys())
|
||||
if len(probs) >= 2:
|
||||
prob_a = float(str(probs[0]).replace('%', '')) / 100.0
|
||||
prob_h = float(str(probs[1]).replace('%', '')) / 100.0
|
||||
max_prob = max(prob_a, prob_h)
|
||||
|
||||
spr_pick = "Home" if prob_h >= prob_a else "Away"
|
||||
|
||||
conf = 50.0
|
||||
line_str = ""
|
||||
for b in pred.get("bet_summary", []):
|
||||
if b["market"] == "Spread":
|
||||
conf = float(b["confidence"])
|
||||
line_str = b["pick"]
|
||||
|
||||
spr_row = {
|
||||
"market": "SPREAD",
|
||||
"pick": spr_pick + " " + line_str,
|
||||
"probability": round(max_prob, 4),
|
||||
"confidence": round(conf, 1),
|
||||
"odds": round(float(odds.get("spread_h" if spr_pick == "Home" else "spread_a", 0.0)), 2),
|
||||
}
|
||||
|
||||
# Return valid rows
|
||||
rows = []
|
||||
if ml_row: rows.append(ml_row)
|
||||
if tot_row: rows.append(tot_row)
|
||||
if spr_row: rows.append(spr_row)
|
||||
return rows
|
||||
|
||||
def _decorate_basketball_market_row(
|
||||
self,
|
||||
data: MatchData,
|
||||
prediction: Dict[str, Any],
|
||||
quality: Dict[str, Any],
|
||||
row: Dict[str, Any],
|
||||
) -> Dict[str, Any]:
|
||||
market = str(row.get("market") or "")
|
||||
raw_conf = float(row.get("confidence") or 0.0)
|
||||
prob = float(row.get("probability") or 0.0)
|
||||
odd = float(row.get("odds") or 0.0)
|
||||
|
||||
calibration = {"ML": 0.90, "TOTAL": 0.88, "SPREAD": 0.86}.get(market, 0.88)
|
||||
min_conf = {"ML": 55.0, "TOTAL": 56.0, "SPREAD": 55.0}.get(market, 55.0)
|
||||
|
||||
calibrated_conf = max(1.0, min(99.0, raw_conf * calibration))
|
||||
implied_prob = (1.0 / odd) if odd > 1.0 else 0.0
|
||||
edge = prob - implied_prob if implied_prob > 0 else 0.0
|
||||
|
||||
risk_level = str(prediction.get("risk_level", "MEDIUM")).upper()
|
||||
risk_penalty = {"LOW": 0.0, "MEDIUM": 3.0, "HIGH": 8.0, "EXTREME": 12.0}.get(
|
||||
risk_level,
|
||||
4.0,
|
||||
)
|
||||
quality_label = str(quality.get("label") or "MEDIUM").upper()
|
||||
quality_penalty = {"HIGH": 0.0, "MEDIUM": 2.0, "LOW": 6.0}.get(
|
||||
quality_label,
|
||||
4.0,
|
||||
)
|
||||
|
||||
base_score = calibrated_conf + (edge * 100.0)
|
||||
play_score = max(0.0, min(100.0, base_score - risk_penalty - quality_penalty))
|
||||
|
||||
reasons: List[str] = []
|
||||
playable = True
|
||||
|
||||
min_play_score = self.market_min_play_score.get(market, 68.0)
|
||||
min_edge = self.market_min_edge.get(market, 0.02)
|
||||
|
||||
if calibrated_conf < min_conf:
|
||||
playable = False
|
||||
reasons.append("below_calibrated_conf_threshold")
|
||||
if market in self.ODDS_REQUIRED_MARKETS and odd <= 1.01:
|
||||
playable = False
|
||||
reasons.append("market_odds_missing")
|
||||
if risk_level in ("HIGH", "EXTREME") and quality_label == "LOW":
|
||||
playable = False
|
||||
reasons.append("high_risk_low_data_quality")
|
||||
if odd > 1.0 and edge < -0.05:
|
||||
playable = False
|
||||
reasons.append("negative_model_edge")
|
||||
|
||||
if not reasons:
|
||||
reasons.append("market_passed_all_gates")
|
||||
|
||||
if not playable:
|
||||
grade = "PASS"
|
||||
stake_units = 0.0
|
||||
elif play_score >= 72:
|
||||
grade = "A"
|
||||
stake_units = 1.0
|
||||
elif play_score >= 61:
|
||||
grade = "B"
|
||||
stake_units = 0.5
|
||||
else:
|
||||
grade = "C"
|
||||
stake_units = 0.25
|
||||
|
||||
out = dict(row)
|
||||
out.update(
|
||||
{
|
||||
"raw_confidence": round(raw_conf, 1),
|
||||
"calibrated_confidence": round(calibrated_conf, 1),
|
||||
"min_required_confidence": round(min_conf, 1),
|
||||
"edge": round(edge, 4),
|
||||
"play_score": round(play_score, 1),
|
||||
"playable": playable,
|
||||
"bet_grade": grade,
|
||||
"stake_units": stake_units,
|
||||
"decision_reasons": reasons[:3],
|
||||
},
|
||||
)
|
||||
return out
|
||||
|
||||
def _build_basketball_scenarios(
|
||||
self,
|
||||
prediction: Dict[str, Any],
|
||||
) -> List[Dict[str, Any]]:
|
||||
scores = prediction.get("score_prediction", {})
|
||||
home = float(scores.get("home_expected", 80.0))
|
||||
away = float(scores.get("away_expected", 80.0))
|
||||
templates = [
|
||||
(0.00, 0.23),
|
||||
(+3.5, 0.20),
|
||||
(-3.5, 0.19),
|
||||
(+6.0, 0.16),
|
||||
(-6.0, 0.14),
|
||||
]
|
||||
out: List[Dict[str, Any]] = []
|
||||
for delta, prob in templates:
|
||||
h = int(round(home + delta))
|
||||
a = int(round(away - delta))
|
||||
out.append({"score": f"{h}-{a}", "prob": prob})
|
||||
return out
|
||||
|
||||
def _build_basketball_reasoning_factors(
|
||||
self,
|
||||
data: MatchData,
|
||||
prediction: Dict[str, Any],
|
||||
quality: Dict[str, Any],
|
||||
) -> List[str]:
|
||||
factors: List[str] = []
|
||||
|
||||
# XGBoost models are odds-aware, weight it heavily
|
||||
factors.append("market_signal_dominant")
|
||||
|
||||
if quality.get("label") in ("HIGH", "MEDIUM"):
|
||||
factors.append("player_form_signal_strong")
|
||||
else:
|
||||
factors.append("player_form_signal_limited")
|
||||
|
||||
if prediction.get("is_surprise_risk"):
|
||||
factors.append("upset_risk_detected")
|
||||
if quality.get("label") == "LOW":
|
||||
factors.append("limited_data_confidence")
|
||||
|
||||
factors.append("basketball_points_model")
|
||||
return factors
|
||||
|
||||
def _compute_basketball_data_quality(self, data: MatchData) -> Dict[str, Any]:
|
||||
flags: List[str] = []
|
||||
|
||||
has_ml = float(data.odds_data.get("ml_h", 0.0)) > 1.0 and float(data.odds_data.get("ml_a", 0.0)) > 1.0
|
||||
has_total = (
|
||||
float(data.odds_data.get("tot_line", 0.0)) > 0.0
|
||||
and float(data.odds_data.get("tot_o", 0.0)) > 1.0
|
||||
and float(data.odds_data.get("tot_u", 0.0)) > 1.0
|
||||
)
|
||||
has_spread = (
|
||||
"spread_home_line" in data.odds_data
|
||||
and float(data.odds_data.get("spread_h", 0.0)) > 1.0
|
||||
and float(data.odds_data.get("spread_a", 0.0)) > 1.0
|
||||
)
|
||||
|
||||
odds_components = [has_ml, has_total, has_spread]
|
||||
odds_score = sum(1.0 for x in odds_components if x) / 3.0
|
||||
if not has_ml:
|
||||
flags.append("missing_moneyline_odds")
|
||||
if not has_total:
|
||||
flags.append("missing_total_odds")
|
||||
if not has_spread:
|
||||
flags.append("missing_spread_odds")
|
||||
|
||||
# Basketball live lineup/referee coverage is structurally lower in this project.
|
||||
# Keep neutral baseline and rely mostly on odds depth.
|
||||
lineup_score = 0.7
|
||||
ref_score = 0.7
|
||||
|
||||
total_score = (odds_score * 0.75) + (lineup_score * 0.15) + (ref_score * 0.10)
|
||||
if total_score >= 0.75:
|
||||
label = "HIGH"
|
||||
elif total_score >= 0.52:
|
||||
label = "MEDIUM"
|
||||
else:
|
||||
label = "LOW"
|
||||
|
||||
return {
|
||||
"label": label,
|
||||
"score": round(total_score, 3),
|
||||
"home_lineup_count": len(data.home_lineup or []),
|
||||
"away_lineup_count": len(data.away_lineup or []),
|
||||
"lineup_source": data.lineup_source,
|
||||
"flags": flags,
|
||||
}
|
||||
@@ -0,0 +1,495 @@
|
||||
"""Coupon Mixin — multi-match coupon builder + daily bankers.
|
||||
|
||||
Auto-extracted mixin module — split from services/single_match_orchestrator.py.
|
||||
All methods here are composed into SingleMatchOrchestrator via inheritance.
|
||||
`self` attributes (self.dsn, self.enrichment, self.v25_predictor, etc.) are
|
||||
initialised in the main __init__.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import math
|
||||
import os
|
||||
import pickle
|
||||
from collections import defaultdict
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple, overload
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
from data.db import get_clean_dsn
|
||||
from schemas.prediction import FullMatchPrediction
|
||||
from schemas.match_data import MatchData
|
||||
from models.v25_ensemble import V25Predictor, get_v25_predictor
|
||||
try:
|
||||
from models.v27_predictor import V27Predictor, compute_divergence, compute_value_edge
|
||||
except ImportError:
|
||||
class V27Predictor: # type: ignore[no-redef]
|
||||
def __init__(self): self.models = {}
|
||||
def load_models(self): return False
|
||||
def predict_all(self, features): return {}
|
||||
def compute_divergence(*args, **kwargs):
|
||||
return {}
|
||||
def compute_value_edge(*args, **kwargs):
|
||||
return {}
|
||||
from features.odds_band_analyzer import OddsBandAnalyzer
|
||||
try:
|
||||
from models.basketball_v25 import (
|
||||
BasketballMatchPrediction,
|
||||
get_basketball_v25_predictor,
|
||||
)
|
||||
except ImportError:
|
||||
BasketballMatchPrediction = Any # type: ignore[misc]
|
||||
def get_basketball_v25_predictor() -> Any:
|
||||
raise ImportError("Basketball predictor is not available")
|
||||
from core.engines.player_predictor import PlayerPrediction, get_player_predictor
|
||||
from services.feature_enrichment import FeatureEnrichmentService
|
||||
from services.betting_brain import BettingBrain
|
||||
from services.v26_shadow_engine import V26ShadowEngine, get_v26_shadow_engine
|
||||
from services.match_commentary import generate_match_commentary
|
||||
from utils.top_leagues import load_top_league_ids
|
||||
from utils.league_reliability import load_league_reliability
|
||||
from config.config_loader import build_threshold_dict, get_threshold_default
|
||||
from models.calibration import get_calibrator
|
||||
|
||||
|
||||
class CouponMixin:
|
||||
def _prefilter_match_ids(self, match_ids: List[str], limit: int = 15) -> List[str]:
|
||||
"""
|
||||
40+ maç gelirse hepsini analiz etmek çok yavaş.
|
||||
DB'den hızlıca en kaliteli limit adet maçı seç:
|
||||
- Odds verisi olan maçlar önce
|
||||
- football_ai_features'da gerçek ELO'su olan maçlar
|
||||
- Yüksek lig güvenilirliği
|
||||
"""
|
||||
if len(match_ids) <= limit:
|
||||
return match_ids
|
||||
|
||||
try:
|
||||
with psycopg2.connect(self.dsn) as conn:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
cur.execute("""
|
||||
SELECT
|
||||
m.id,
|
||||
COUNT(oc.db_id) AS odds_count,
|
||||
COALESCE(f.home_elo, 1500) AS home_elo,
|
||||
lr.reliability_score
|
||||
FROM matches m
|
||||
LEFT JOIN odd_categories oc ON oc.match_id = m.id
|
||||
LEFT JOIN football_ai_features f ON f.match_id = m.id
|
||||
LEFT JOIN team_elo_ratings ter_h ON ter_h.team_id = m.home_team_id
|
||||
LEFT JOIN (
|
||||
SELECT league_id, AVG(home_elo) AS reliability_score
|
||||
FROM football_ai_features
|
||||
GROUP BY league_id
|
||||
) lr ON lr.league_id = m.league_id
|
||||
WHERE m.id = ANY(%s)
|
||||
GROUP BY m.id, f.home_elo, lr.reliability_score
|
||||
ORDER BY
|
||||
COUNT(oc.db_id) DESC,
|
||||
COALESCE(f.home_elo, 1500) DESC
|
||||
LIMIT %s
|
||||
""", (match_ids, limit))
|
||||
rows = cur.fetchall()
|
||||
filtered = [r["id"] for r in rows]
|
||||
# Eğer DB'den yeterli gelmediyse kalanları ekle
|
||||
remaining = [m for m in match_ids if m not in filtered]
|
||||
return filtered + remaining[:max(0, limit - len(filtered))]
|
||||
except Exception as e:
|
||||
print(f"⚠️ Prefilter failed, using original list: {e}")
|
||||
return match_ids[:limit]
|
||||
|
||||
def build_coupon(
|
||||
self,
|
||||
match_ids: List[str],
|
||||
strategy: str = "BALANCED",
|
||||
max_matches: Optional[int] = None,
|
||||
min_confidence: Optional[float] = None,
|
||||
) -> Dict[str, Any]:
|
||||
strategy_name = (strategy or "BALANCED").upper()
|
||||
|
||||
strategy_config = {
|
||||
"SAFE": {"max_matches": 4, "min_conf": 66.0, "prefilter": 12},
|
||||
"BALANCED": {"max_matches": 5, "min_conf": 58.0, "prefilter": 15},
|
||||
"AGGRESSIVE": {"max_matches": 8, "min_conf": 52.0, "prefilter": 20},
|
||||
"VALUE": {"max_matches": 8, "min_conf": 48.0, "prefilter": 20},
|
||||
"MIRACLE": {"max_matches": 10, "min_conf": 44.0, "prefilter": 25},
|
||||
}
|
||||
cfg = strategy_config.get(strategy_name, strategy_config["BALANCED"])
|
||||
max_allowed = max_matches if max_matches is not None else cfg["max_matches"]
|
||||
min_conf = min_confidence if min_confidence is not None else cfg["min_conf"]
|
||||
prefilter_limit = cfg["prefilter"]
|
||||
|
||||
# Çok fazla maç gelirse önce hızlı prefilter uygula
|
||||
if len(match_ids) > prefilter_limit:
|
||||
print(f"🔍 Prefiltering {len(match_ids)} → {prefilter_limit} matches for {strategy_name} coupon")
|
||||
match_ids = self._prefilter_match_ids(match_ids, prefilter_limit)
|
||||
|
||||
candidates: List[Dict[str, Any]] = []
|
||||
rejected: List[Dict[str, Any]] = []
|
||||
|
||||
for match_id in match_ids:
|
||||
package = self.analyze_match(match_id)
|
||||
if not package:
|
||||
rejected.append({"match_id": match_id, "reason": "match_not_found"})
|
||||
continue
|
||||
|
||||
risk_level = str(package.get("risk", {}).get("level", "MEDIUM")).upper()
|
||||
data_quality = str(package.get("data_quality", {}).get("label", "MEDIUM")).upper()
|
||||
match_candidates: List[Dict[str, Any]] = []
|
||||
seen_keys: Set[Tuple[str, str]] = set()
|
||||
bet_summary = package.get("bet_summary") or []
|
||||
|
||||
raw_picks = []
|
||||
for candidate in [
|
||||
package.get("main_pick"),
|
||||
package.get("value_pick"),
|
||||
*(package.get("supporting_picks") or []),
|
||||
]:
|
||||
if isinstance(candidate, dict):
|
||||
raw_picks.append(candidate)
|
||||
for candidate in bet_summary:
|
||||
if isinstance(candidate, dict):
|
||||
raw_picks.append(candidate)
|
||||
|
||||
for candidate in raw_picks:
|
||||
market = str(candidate.get("market") or "")
|
||||
pick = str(candidate.get("pick") or "")
|
||||
if not market or not pick:
|
||||
continue
|
||||
|
||||
dedupe_key = (market, pick)
|
||||
if dedupe_key in seen_keys:
|
||||
continue
|
||||
seen_keys.add(dedupe_key)
|
||||
|
||||
calibrated_conf = float(
|
||||
candidate.get("calibrated_confidence", candidate.get("confidence", 0.0))
|
||||
or 0.0
|
||||
)
|
||||
odds = float(candidate.get("odds", 0.0) or 0.0)
|
||||
probability = float(candidate.get("probability", 0.0) or 0.0)
|
||||
play_score = float(candidate.get("play_score", 0.0) or 0.0)
|
||||
ev_edge = float(
|
||||
candidate.get("ev_edge", candidate.get("edge", 0.0)) or 0.0
|
||||
)
|
||||
playable = bool(candidate.get("playable"))
|
||||
bet_grade = str(candidate.get("bet_grade", "PASS")).upper()
|
||||
|
||||
if odds <= 1.01:
|
||||
continue
|
||||
|
||||
strict_candidate = (
|
||||
playable
|
||||
and calibrated_conf >= min_conf
|
||||
and bet_grade != "PASS"
|
||||
)
|
||||
|
||||
if strategy_name == "SAFE":
|
||||
strict_pass = strict_candidate
|
||||
if odds > 2.35 or play_score < 60.0 or risk_level in {"HIGH", "EXTREME"}:
|
||||
strict_pass = False
|
||||
if data_quality == "LOW" or ev_edge < 0.01 or bet_grade == "PASS":
|
||||
strict_pass = False
|
||||
strict_score = (
|
||||
calibrated_conf * 1.10
|
||||
+ play_score * 0.90
|
||||
+ (ev_edge * 180.0)
|
||||
- abs(odds - 1.55) * 12.0
|
||||
)
|
||||
soft_pass = (
|
||||
calibrated_conf >= max(min_conf - 10.0, 56.0)
|
||||
and odds <= 2.70
|
||||
and play_score >= 50.0
|
||||
and risk_level != "EXTREME"
|
||||
and data_quality != "LOW"
|
||||
and ev_edge >= -0.01
|
||||
)
|
||||
soft_score = (
|
||||
calibrated_conf
|
||||
+ play_score * 0.85
|
||||
+ (ev_edge * 140.0)
|
||||
- abs(odds - 1.65) * 9.0
|
||||
)
|
||||
elif strategy_name == "BALANCED":
|
||||
strict_pass = strict_candidate
|
||||
if odds > 3.40 or play_score < 52.0 or risk_level == "EXTREME":
|
||||
strict_pass = False
|
||||
if ev_edge < 0.0 or bet_grade == "PASS":
|
||||
strict_pass = False
|
||||
strict_score = (
|
||||
calibrated_conf
|
||||
+ play_score
|
||||
+ (ev_edge * 220.0)
|
||||
+ min(odds, 3.0) * 3.0
|
||||
)
|
||||
soft_pass = (
|
||||
calibrated_conf >= max(min_conf - 10.0, 48.0)
|
||||
and odds <= 4.20
|
||||
and play_score >= 44.0
|
||||
and risk_level != "EXTREME"
|
||||
and ev_edge >= -0.015
|
||||
)
|
||||
soft_score = (
|
||||
calibrated_conf * 0.95
|
||||
+ play_score * 0.90
|
||||
+ (ev_edge * 180.0)
|
||||
+ min(odds, 3.5) * 3.5
|
||||
)
|
||||
elif strategy_name == "AGGRESSIVE":
|
||||
strict_pass = strict_candidate
|
||||
if odds < 1.35 or odds > 7.50 or play_score < 46.0:
|
||||
strict_pass = False
|
||||
if risk_level == "EXTREME" or bet_grade == "PASS":
|
||||
strict_pass = False
|
||||
strict_score = (
|
||||
calibrated_conf * 0.85
|
||||
+ play_score * 0.75
|
||||
+ (ev_edge * 260.0)
|
||||
+ min(odds, 6.0) * 7.0
|
||||
)
|
||||
soft_pass = (
|
||||
calibrated_conf >= max(min_conf - 10.0, 42.0)
|
||||
and 1.25 <= odds <= 8.50
|
||||
and play_score >= 40.0
|
||||
and risk_level != "EXTREME"
|
||||
and ev_edge >= -0.02
|
||||
)
|
||||
soft_score = (
|
||||
calibrated_conf * 0.80
|
||||
+ play_score * 0.70
|
||||
+ (ev_edge * 210.0)
|
||||
+ min(odds, 7.0) * 7.5
|
||||
)
|
||||
elif strategy_name == "VALUE":
|
||||
strict_pass = strict_candidate
|
||||
if odds < 1.55 or play_score < 48.0 or ev_edge < 0.03:
|
||||
strict_pass = False
|
||||
if risk_level == "EXTREME" or data_quality == "LOW" or bet_grade == "PASS":
|
||||
strict_pass = False
|
||||
strict_score = (
|
||||
calibrated_conf * 0.75
|
||||
+ play_score * 0.85
|
||||
+ (ev_edge * 320.0)
|
||||
+ min(odds, 6.5) * 8.0
|
||||
)
|
||||
soft_pass = (
|
||||
calibrated_conf >= max(min_conf - 10.0, 40.0)
|
||||
and odds >= 1.35
|
||||
and play_score >= 40.0
|
||||
and risk_level != "EXTREME"
|
||||
and data_quality != "LOW"
|
||||
and ev_edge >= 0.0
|
||||
)
|
||||
soft_score = (
|
||||
calibrated_conf * 0.70
|
||||
+ play_score * 0.80
|
||||
+ (ev_edge * 260.0)
|
||||
+ min(odds, 7.0) * 7.0
|
||||
)
|
||||
else: # MIRACLE
|
||||
strict_pass = strict_candidate
|
||||
if odds < 2.10 or play_score < 40.0 or ev_edge < 0.01:
|
||||
strict_pass = False
|
||||
if risk_level == "EXTREME" or bet_grade == "PASS":
|
||||
strict_pass = False
|
||||
strict_score = (
|
||||
calibrated_conf * 0.55
|
||||
+ play_score * 0.60
|
||||
+ (ev_edge * 260.0)
|
||||
+ min(odds, 10.0) * 10.0
|
||||
)
|
||||
soft_pass = (
|
||||
calibrated_conf >= max(min_conf - 10.0, 36.0)
|
||||
and odds >= 1.60
|
||||
and play_score >= 34.0
|
||||
and risk_level != "EXTREME"
|
||||
and ev_edge >= -0.02
|
||||
)
|
||||
soft_score = (
|
||||
calibrated_conf * 0.50
|
||||
+ play_score * 0.55
|
||||
+ (ev_edge * 200.0)
|
||||
+ min(odds, 10.0) * 9.0
|
||||
)
|
||||
|
||||
fallback_pass = (
|
||||
calibrated_conf >= max(min_conf - 14.0, 34.0)
|
||||
and odds >= 1.20
|
||||
and play_score >= 32.0
|
||||
and risk_level != "EXTREME"
|
||||
)
|
||||
fallback_score = (
|
||||
calibrated_conf * 0.60
|
||||
+ play_score * 0.65
|
||||
+ (ev_edge * 120.0)
|
||||
+ min(odds, 6.0) * 4.0
|
||||
)
|
||||
|
||||
strategy_score = strict_score
|
||||
selection_mode = "strict"
|
||||
if strict_pass:
|
||||
pass
|
||||
elif soft_pass:
|
||||
strategy_score = soft_score
|
||||
selection_mode = "soft"
|
||||
elif fallback_pass:
|
||||
strategy_score = fallback_score
|
||||
selection_mode = "fallback"
|
||||
else:
|
||||
continue
|
||||
|
||||
match_candidates.append(
|
||||
{
|
||||
"match_id": package["match_info"]["match_id"],
|
||||
"match_name": package["match_info"]["match_name"],
|
||||
"market": market,
|
||||
"pick": pick,
|
||||
"probability": probability,
|
||||
"confidence": calibrated_conf,
|
||||
"odds": odds,
|
||||
"risk_level": risk_level,
|
||||
"data_quality": data_quality,
|
||||
"bet_grade": bet_grade,
|
||||
"playable": playable,
|
||||
"play_score": round(play_score, 1),
|
||||
"ev_edge": round(ev_edge, 4),
|
||||
"selection_mode": selection_mode,
|
||||
"strategy_score": round(strategy_score, 3),
|
||||
}
|
||||
)
|
||||
|
||||
if not match_candidates:
|
||||
rejected.append(
|
||||
{
|
||||
"match_id": match_id,
|
||||
"reason": "no_strategy_fit",
|
||||
"threshold": min_conf,
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
match_candidates.sort(
|
||||
key=lambda item: (
|
||||
float(item.get("strategy_score", 0.0)),
|
||||
float(item.get("confidence", 0.0)),
|
||||
float(item.get("ev_edge", 0.0)),
|
||||
),
|
||||
reverse=True,
|
||||
)
|
||||
candidates.append(match_candidates[0])
|
||||
|
||||
candidates.sort(
|
||||
key=lambda item: (
|
||||
float(item.get("strategy_score", 0.0)),
|
||||
float(item.get("confidence", 0.0)),
|
||||
float(item.get("ev_edge", 0.0)),
|
||||
),
|
||||
reverse=True,
|
||||
)
|
||||
selected = candidates[: max(1, max_allowed)]
|
||||
|
||||
total_odds = 1.0
|
||||
win_probability = 1.0
|
||||
for pick in selected:
|
||||
odd = float(pick.get("odds") or 1.0)
|
||||
prob = float(pick.get("probability") or 0.0)
|
||||
total_odds *= odd if odd > 1.0 else 1.0
|
||||
win_probability *= prob
|
||||
|
||||
return {
|
||||
"strategy": strategy_name,
|
||||
"generated_at": __import__("datetime").datetime.utcnow().isoformat() + "Z",
|
||||
"match_count": len(selected),
|
||||
"bets": selected,
|
||||
"total_odds": round(total_odds, 2),
|
||||
"expected_win_rate": round(win_probability, 4),
|
||||
"rejected_matches": rejected,
|
||||
}
|
||||
|
||||
def get_daily_bankers_live(self, count: int = 3) -> List[Dict[str, Any]]:
|
||||
with psycopg2.connect(self.dsn) as conn:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT id
|
||||
FROM live_matches
|
||||
WHERE mst_utc > EXTRACT(EPOCH FROM NOW()) * 1000
|
||||
AND mst_utc < EXTRACT(EPOCH FROM NOW() + INTERVAL '24 hours') * 1000
|
||||
ORDER BY mst_utc ASC
|
||||
LIMIT 60
|
||||
""",
|
||||
)
|
||||
ids = [row["id"] for row in cur.fetchall()]
|
||||
|
||||
if not ids:
|
||||
return []
|
||||
|
||||
coupon = self.build_coupon(
|
||||
match_ids=ids,
|
||||
strategy="SAFE",
|
||||
max_matches=max(1, count),
|
||||
min_confidence=78.0,
|
||||
)
|
||||
return coupon.get("bets", [])[: max(1, count)]
|
||||
|
||||
def get_daily_bankers(self, count: int = 3) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Identifies the safest, highest value bets for the next 24 hours.
|
||||
"""
|
||||
now_ms = int(time.time() * 1000)
|
||||
horizon_ms = now_ms + (24 * 60 * 60 * 1000)
|
||||
|
||||
with psycopg2.connect(self.dsn) as conn:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
cur.execute("""
|
||||
SELECT m.id, m.match_name, m.mst_utc
|
||||
FROM matches m
|
||||
WHERE m.mst_utc >= %s AND m.mst_utc <= %s
|
||||
AND m.status = 'NS'
|
||||
AND EXISTS (SELECT 1 FROM odd_categories oc WHERE oc.match_id = m.id)
|
||||
ORDER BY m.mst_utc ASC
|
||||
LIMIT 50
|
||||
""", (now_ms, horizon_ms))
|
||||
matches = cur.fetchall()
|
||||
|
||||
potential_bankers = []
|
||||
print(f"🔍 Scanning {len(matches)} upcoming matches for Bankers...")
|
||||
|
||||
for match in matches:
|
||||
try:
|
||||
data = self._load_match_data(match['id'])
|
||||
if data is None: continue
|
||||
|
||||
result = self.analyze_match(match['id'])
|
||||
|
||||
if result and 'main_pick' in result:
|
||||
pick = result['main_pick']
|
||||
conf = pick.get('calibrated_confidence', pick.get('confidence', 0))
|
||||
odds = pick.get('odds', 0)
|
||||
market = pick.get('market', '')
|
||||
pick_name = pick.get('pick', '')
|
||||
|
||||
# Banker Criteria: High Confidence (>75%) AND Decent Odds (>1.30)
|
||||
if conf >= 75.0 and odds >= 1.30:
|
||||
score = conf * (odds - 1.0)
|
||||
potential_bankers.append({
|
||||
"match_id": match['id'],
|
||||
"match_name": match['match_name'] or f"{data.home_team_name} vs {data.away_team_name}",
|
||||
"league": data.league_name,
|
||||
"pick": f"{market} - {pick_name}",
|
||||
"confidence": conf,
|
||||
"odds": odds,
|
||||
"value_score": score
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
potential_bankers.sort(key=lambda x: x['value_score'], reverse=True)
|
||||
return potential_bankers[:count]
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,498 @@
|
||||
"""Feature Builder Mixin — V25/V28 feature vector assembly.
|
||||
|
||||
Auto-extracted mixin module — split from services/single_match_orchestrator.py.
|
||||
All methods here are composed into SingleMatchOrchestrator via inheritance.
|
||||
`self` attributes (self.dsn, self.enrichment, self.v25_predictor, etc.) are
|
||||
initialised in the main __init__.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import math
|
||||
import os
|
||||
import pickle
|
||||
from collections import defaultdict
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple, overload
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
from data.db import get_clean_dsn
|
||||
from schemas.prediction import FullMatchPrediction
|
||||
from schemas.match_data import MatchData
|
||||
from models.v25_ensemble import V25Predictor, get_v25_predictor
|
||||
try:
|
||||
from models.v27_predictor import V27Predictor, compute_divergence, compute_value_edge
|
||||
except ImportError:
|
||||
class V27Predictor: # type: ignore[no-redef]
|
||||
def __init__(self): self.models = {}
|
||||
def load_models(self): return False
|
||||
def predict_all(self, features): return {}
|
||||
def compute_divergence(*args, **kwargs):
|
||||
return {}
|
||||
def compute_value_edge(*args, **kwargs):
|
||||
return {}
|
||||
from features.odds_band_analyzer import OddsBandAnalyzer
|
||||
try:
|
||||
from models.basketball_v25 import (
|
||||
BasketballMatchPrediction,
|
||||
get_basketball_v25_predictor,
|
||||
)
|
||||
except ImportError:
|
||||
BasketballMatchPrediction = Any # type: ignore[misc]
|
||||
def get_basketball_v25_predictor() -> Any:
|
||||
raise ImportError("Basketball predictor is not available")
|
||||
from core.engines.player_predictor import PlayerPrediction, get_player_predictor
|
||||
from features.upset_engine import get_upset_engine
|
||||
from services.feature_enrichment import FeatureEnrichmentService
|
||||
from services.betting_brain import BettingBrain
|
||||
from services.v26_shadow_engine import V26ShadowEngine, get_v26_shadow_engine
|
||||
from services.match_commentary import generate_match_commentary
|
||||
from utils.top_leagues import load_top_league_ids
|
||||
from utils.league_reliability import load_league_reliability
|
||||
from config.config_loader import build_threshold_dict, get_threshold_default
|
||||
from models.calibration import get_calibrator
|
||||
|
||||
|
||||
class FeatureBuilderMixin:
|
||||
def _build_v25_features(self, data: MatchData) -> Dict[str, float]:
|
||||
"""
|
||||
Build the single authoritative V25 pre-match feature vector.
|
||||
"""
|
||||
odds = self._sanitize_v25_odds(data.odds_data or {})
|
||||
ms_h = float(odds.get('ms_h') or 0)
|
||||
ms_d = float(odds.get('ms_d') or 0)
|
||||
ms_a = float(odds.get('ms_a') or 0)
|
||||
|
||||
# Implied probabilities (vig-normalised)
|
||||
implied_home, implied_draw, implied_away = 0.33, 0.33, 0.33
|
||||
if ms_h > 0 and ms_d > 0 and ms_a > 0:
|
||||
raw_sum = 1 / ms_h + 1 / ms_d + 1 / ms_a
|
||||
implied_home = (1 / ms_h) / raw_sum
|
||||
implied_draw = (1 / ms_d) / raw_sum
|
||||
implied_away = (1 / ms_a) / raw_sum
|
||||
upset_potential = max(
|
||||
0.0,
|
||||
min(
|
||||
1.0,
|
||||
1.0 - abs(implied_home - implied_away) + (implied_draw * 0.35),
|
||||
),
|
||||
)
|
||||
|
||||
# All enrichment queries in a single DB connection
|
||||
home_elo, away_elo = 1500.0, 1500.0
|
||||
home_venue_elo, away_venue_elo = 1500.0, 1500.0
|
||||
home_form_elo_val, away_form_elo_val = 1500.0, 1500.0
|
||||
enr = self.enrichment
|
||||
# Defaults — overridden by successful queries
|
||||
home_stats = dict(enr._DEFAULT_TEAM_STATS)
|
||||
away_stats = dict(enr._DEFAULT_TEAM_STATS)
|
||||
h2h = dict(enr._DEFAULT_H2H)
|
||||
home_form = dict(enr._DEFAULT_FORM)
|
||||
away_form = dict(enr._DEFAULT_FORM)
|
||||
ref = dict(enr._DEFAULT_REFEREE)
|
||||
league = dict(enr._DEFAULT_LEAGUE)
|
||||
home_momentum, away_momentum = 0.0, 0.0
|
||||
home_rolling = dict(enr._DEFAULT_ROLLING)
|
||||
away_rolling = dict(enr._DEFAULT_ROLLING)
|
||||
home_venue = dict(enr._DEFAULT_VENUE)
|
||||
away_venue = dict(enr._DEFAULT_VENUE)
|
||||
home_rest, away_rest = 7.0, 7.0
|
||||
odds_band_features = {}
|
||||
enrichment_failures = []
|
||||
|
||||
try:
|
||||
with psycopg2.connect(self.dsn) as conn:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
# ELO
|
||||
try:
|
||||
cur.execute(
|
||||
"SELECT home_elo, away_elo, "
|
||||
" home_home_elo, away_away_elo, "
|
||||
" home_form_elo, away_form_elo "
|
||||
"FROM football_ai_features "
|
||||
"WHERE match_id = %s LIMIT 1",
|
||||
(data.match_id,),
|
||||
)
|
||||
elo_row = cur.fetchone()
|
||||
if elo_row:
|
||||
home_elo = float(elo_row.get('home_elo') or 1500.0)
|
||||
away_elo = float(elo_row.get('away_elo') or 1500.0)
|
||||
home_venue_elo = float(elo_row.get('home_home_elo') or home_elo)
|
||||
away_venue_elo = float(elo_row.get('away_away_elo') or away_elo)
|
||||
home_form_elo_val = float(elo_row.get('home_form_elo') or home_elo)
|
||||
away_form_elo_val = float(elo_row.get('away_form_elo') or away_elo)
|
||||
else:
|
||||
cur.execute(
|
||||
"SELECT team_id, overall_elo, home_elo, away_elo, form_elo "
|
||||
"FROM team_elo_ratings WHERE team_id IN (%s, %s)",
|
||||
(data.home_team_id, data.away_team_id),
|
||||
)
|
||||
by_team = {str(r.get("team_id")): r for r in cur.fetchall()}
|
||||
home_row = by_team.get(str(data.home_team_id))
|
||||
away_row = by_team.get(str(data.away_team_id))
|
||||
if home_row:
|
||||
home_elo = float(home_row.get("overall_elo") or 1500.0)
|
||||
home_venue_elo = float(home_row.get("home_elo") or home_elo)
|
||||
home_form_elo_val = float(home_row.get("form_elo") or home_elo)
|
||||
if away_row:
|
||||
away_elo = float(away_row.get("overall_elo") or 1500.0)
|
||||
away_venue_elo = float(away_row.get("away_elo") or away_elo)
|
||||
away_form_elo_val = float(away_row.get("form_elo") or away_elo)
|
||||
setattr(data, "feature_source", "football_ai_features" if elo_row else "live_prematch_enrichment")
|
||||
# Staleness check: both teams at exact 1500 → ELO was never computed
|
||||
if home_elo == 1500.0 and away_elo == 1500.0:
|
||||
enrichment_failures.append("elo_stale:both_teams_at_default_1500")
|
||||
except Exception as e:
|
||||
enrichment_failures.append(f"elo:{e}")
|
||||
setattr(data, "feature_source", "fallback_defaults")
|
||||
|
||||
# Team stats
|
||||
try:
|
||||
home_stats = enr.compute_team_stats(cur, data.home_team_id, data.match_date_ms)
|
||||
away_stats = enr.compute_team_stats(cur, data.away_team_id, data.match_date_ms)
|
||||
except Exception as e:
|
||||
enrichment_failures.append(f"team_stats:{e}")
|
||||
|
||||
# H2H
|
||||
try:
|
||||
h2h = enr.compute_h2h(cur, data.home_team_id, data.away_team_id, data.match_date_ms)
|
||||
except Exception as e:
|
||||
enrichment_failures.append(f"h2h:{e}")
|
||||
|
||||
# Form
|
||||
try:
|
||||
home_form = enr.compute_form_streaks(cur, data.home_team_id, data.match_date_ms)
|
||||
away_form = enr.compute_form_streaks(cur, data.away_team_id, data.match_date_ms)
|
||||
except Exception as e:
|
||||
enrichment_failures.append(f"form:{e}")
|
||||
|
||||
# Referee
|
||||
try:
|
||||
ref = enr.compute_referee_stats(cur, data.referee_name, data.match_date_ms)
|
||||
except Exception as e:
|
||||
enrichment_failures.append(f"referee:{e}")
|
||||
|
||||
# League
|
||||
try:
|
||||
league = enr.compute_league_averages(cur, data.league_id, data.match_date_ms)
|
||||
except Exception as e:
|
||||
enrichment_failures.append(f"league:{e}")
|
||||
|
||||
# Momentum
|
||||
try:
|
||||
home_momentum = enr.compute_momentum(cur, data.home_team_id, data.match_date_ms)
|
||||
away_momentum = enr.compute_momentum(cur, data.away_team_id, data.match_date_ms)
|
||||
except Exception as e:
|
||||
enrichment_failures.append(f"momentum:{e}")
|
||||
|
||||
# V27 Rolling + Venue + Rest
|
||||
try:
|
||||
home_rolling = enr.compute_rolling_stats(cur, data.home_team_id, data.match_date_ms)
|
||||
away_rolling = enr.compute_rolling_stats(cur, data.away_team_id, data.match_date_ms)
|
||||
home_venue = enr.compute_venue_stats(cur, data.home_team_id, data.match_date_ms, is_home=True)
|
||||
away_venue = enr.compute_venue_stats(cur, data.away_team_id, data.match_date_ms, is_home=False)
|
||||
home_rest = enr.compute_days_rest(cur, data.home_team_id, data.match_date_ms)
|
||||
away_rest = enr.compute_days_rest(cur, data.away_team_id, data.match_date_ms)
|
||||
except Exception as e:
|
||||
enrichment_failures.append(f"rolling/venue:{e}")
|
||||
|
||||
# V28 Odds-Band
|
||||
try:
|
||||
odds_band_features = self.odds_band_analyzer.compute_all(
|
||||
cur=cur,
|
||||
home_team_id=data.home_team_id,
|
||||
away_team_id=data.away_team_id,
|
||||
league_id=data.league_id,
|
||||
odds=odds,
|
||||
before_ts=data.match_date_ms,
|
||||
referee_name=data.referee_name,
|
||||
)
|
||||
except Exception as e:
|
||||
enrichment_failures.append(f"odds_band:{e}")
|
||||
|
||||
except Exception as e:
|
||||
enrichment_failures.append(f"db_connection:{e}")
|
||||
setattr(data, "feature_source", "fallback_defaults")
|
||||
|
||||
setattr(data, "odds_band_features", odds_band_features)
|
||||
if enrichment_failures:
|
||||
print(f"⚠️ Enrichment partial failures for {data.match_id}: {', '.join(enrichment_failures)}")
|
||||
|
||||
# Upset engine features
|
||||
upset_atmosphere, upset_motivation, upset_fatigue = 0.0, 0.0, 0.0
|
||||
try:
|
||||
upset_engine = get_upset_engine()
|
||||
upset_feats = upset_engine.get_features(
|
||||
home_team_name=getattr(data, 'home_team_name', '') or '',
|
||||
home_team_id=data.home_team_id,
|
||||
away_team_name=getattr(data, 'away_team_name', '') or '',
|
||||
league_name=getattr(data, 'league_name', '') or '',
|
||||
home_position=10,
|
||||
away_position=10,
|
||||
match_date_ms=data.match_date_ms,
|
||||
home_days_rest=int(home_rest),
|
||||
away_days_rest=int(away_rest),
|
||||
)
|
||||
upset_atmosphere = upset_feats.get('upset_atmosphere', 0.0)
|
||||
upset_motivation = upset_feats.get('upset_motivation', 0.0)
|
||||
upset_fatigue = upset_feats.get('upset_fatigue', 0.0)
|
||||
except Exception as e:
|
||||
print(f"⚠️ Upset engine failed: {e}")
|
||||
|
||||
odds_presence = {
|
||||
'odds_ms_h_present': 1.0 if ms_h > 1.01 else 0.0,
|
||||
'odds_ms_d_present': 1.0 if ms_d > 1.01 else 0.0,
|
||||
'odds_ms_a_present': 1.0 if ms_a > 1.01 else 0.0,
|
||||
'odds_ht_ms_h_present': 1.0 if float(odds.get('ht_h') or 0) > 1.01 else 0.0,
|
||||
'odds_ht_ms_d_present': 1.0 if float(odds.get('ht_d') or 0) > 1.01 else 0.0,
|
||||
'odds_ht_ms_a_present': 1.0 if float(odds.get('ht_a') or 0) > 1.01 else 0.0,
|
||||
'odds_ou05_o_present': 1.0 if float(odds.get('ou05_o') or 0) > 1.01 else 0.0,
|
||||
'odds_ou05_u_present': 1.0 if float(odds.get('ou05_u') or 0) > 1.01 else 0.0,
|
||||
'odds_ou15_o_present': 1.0 if float(odds.get('ou15_o') or 0) > 1.01 else 0.0,
|
||||
'odds_ou15_u_present': 1.0 if float(odds.get('ou15_u') or 0) > 1.01 else 0.0,
|
||||
'odds_ou25_o_present': 1.0 if float(odds.get('ou25_o') or 0) > 1.01 else 0.0,
|
||||
'odds_ou25_u_present': 1.0 if float(odds.get('ou25_u') or 0) > 1.01 else 0.0,
|
||||
'odds_ou35_o_present': 1.0 if float(odds.get('ou35_o') or 0) > 1.01 else 0.0,
|
||||
'odds_ou35_u_present': 1.0 if float(odds.get('ou35_u') or 0) > 1.01 else 0.0,
|
||||
'odds_ht_ou05_o_present': 1.0 if float(odds.get('ht_ou05_o') or 0) > 1.01 else 0.0,
|
||||
'odds_ht_ou05_u_present': 1.0 if float(odds.get('ht_ou05_u') or 0) > 1.01 else 0.0,
|
||||
'odds_ht_ou15_o_present': 1.0 if float(odds.get('ht_ou15_o') or 0) > 1.01 else 0.0,
|
||||
'odds_ht_ou15_u_present': 1.0 if float(odds.get('ht_ou15_u') or 0) > 1.01 else 0.0,
|
||||
'odds_btts_y_present': 1.0 if float(odds.get('btts_y') or 0) > 1.01 else 0.0,
|
||||
'odds_btts_n_present': 1.0 if float(odds.get('btts_n') or 0) > 1.01 else 0.0,
|
||||
}
|
||||
|
||||
# ── Calendar features (V27) ──
|
||||
import datetime
|
||||
match_dt = datetime.datetime.utcfromtimestamp(data.match_date_ms / 1000)
|
||||
match_month = match_dt.month
|
||||
is_season_start = 1.0 if match_month in (7, 8, 9) else 0.0
|
||||
is_season_end = 1.0 if match_month in (5, 6) else 0.0
|
||||
|
||||
# ── Cup game detection: dampen home advantage in feature space ──
|
||||
_league_name = (getattr(data, 'league_name', '') or '').lower()
|
||||
_cup_keywords = ("kupa", "cup", "coupe", "copa", "coppa", "pokal",
|
||||
"trophy", "shield", "ziraat", "süper kupa", "super cup")
|
||||
_is_cup = any(kw in _league_name for kw in _cup_keywords)
|
||||
|
||||
# ── Derived / Interaction features (V27) ──
|
||||
# Cup games: home ELO advantage is ~30% weaker (rotation, lower motivation)
|
||||
elo_diff = (home_elo - away_elo) * (0.70 if _is_cup else 1.0)
|
||||
form_elo_diff = home_form_elo_val - away_form_elo_val
|
||||
attack_vs_defense_home = data.home_goals_avg - data.away_conceded_avg
|
||||
attack_vs_defense_away = data.away_goals_avg - data.home_conceded_avg
|
||||
xga_home = data.home_conceded_avg
|
||||
xga_away = data.away_conceded_avg
|
||||
xg_diff = xga_home - xga_away
|
||||
mom_diff = home_momentum - away_momentum
|
||||
form_momentum_interaction = mom_diff * form_elo_diff / 1000.0
|
||||
elo_form_consistency = 1.0 - abs(elo_diff - form_elo_diff) / max(abs(elo_diff), 100.0)
|
||||
upset_x_elo_gap = upset_potential * abs(elo_diff) / 500.0
|
||||
|
||||
return {
|
||||
# META (1)
|
||||
'mst_utc': float(data.match_date_ms),
|
||||
# ELO (8)
|
||||
'home_overall_elo': home_elo,
|
||||
'away_overall_elo': away_elo,
|
||||
'elo_diff': elo_diff,
|
||||
'home_home_elo': home_venue_elo,
|
||||
'away_away_elo': away_venue_elo,
|
||||
'home_form_elo': home_form_elo_val,
|
||||
'away_form_elo': away_form_elo_val,
|
||||
'form_elo_diff': form_elo_diff,
|
||||
# Form (12)
|
||||
'home_goals_avg': data.home_goals_avg,
|
||||
'home_conceded_avg': data.home_conceded_avg,
|
||||
'away_goals_avg': data.away_goals_avg,
|
||||
'away_conceded_avg': data.away_conceded_avg,
|
||||
'home_clean_sheet_rate': home_form['clean_sheet_rate'],
|
||||
'away_clean_sheet_rate': away_form['clean_sheet_rate'],
|
||||
'home_scoring_rate': home_form['scoring_rate'],
|
||||
'away_scoring_rate': away_form['scoring_rate'],
|
||||
'home_winning_streak': home_form['winning_streak'],
|
||||
'away_winning_streak': away_form['winning_streak'],
|
||||
'home_unbeaten_streak': home_form['unbeaten_streak'],
|
||||
'away_unbeaten_streak': away_form['unbeaten_streak'],
|
||||
# H2H (10 — original 6 + V27 expanded 4)
|
||||
'h2h_total_matches': h2h['total_matches'],
|
||||
'h2h_home_win_rate': h2h['home_win_rate'],
|
||||
'h2h_draw_rate': h2h['draw_rate'],
|
||||
'h2h_avg_goals': h2h['avg_goals'],
|
||||
'h2h_btts_rate': h2h['btts_rate'],
|
||||
'h2h_over25_rate': h2h['over25_rate'],
|
||||
'h2h_home_goals_avg': h2h['home_goals_avg'],
|
||||
'h2h_away_goals_avg': h2h['away_goals_avg'],
|
||||
'h2h_recent_trend': h2h['recent_trend'],
|
||||
'h2h_venue_advantage': h2h['venue_advantage'],
|
||||
# Stats (8)
|
||||
'home_avg_possession': home_stats['avg_possession'],
|
||||
'away_avg_possession': away_stats['avg_possession'],
|
||||
'home_avg_shots_on_target': home_stats['avg_shots_on_target'],
|
||||
'away_avg_shots_on_target': away_stats['avg_shots_on_target'],
|
||||
'home_shot_conversion': home_stats['shot_conversion'],
|
||||
'away_shot_conversion': away_stats['shot_conversion'],
|
||||
'home_avg_corners': home_stats['avg_corners'],
|
||||
'away_avg_corners': away_stats['avg_corners'],
|
||||
# Odds (24)
|
||||
'odds_ms_h': ms_h,
|
||||
'odds_ms_d': ms_d,
|
||||
'odds_ms_a': ms_a,
|
||||
'implied_home': implied_home,
|
||||
'implied_draw': implied_draw,
|
||||
'implied_away': implied_away,
|
||||
'odds_ht_ms_h': float(odds.get('ht_h') or 0),
|
||||
'odds_ht_ms_d': float(odds.get('ht_d') or 0),
|
||||
'odds_ht_ms_a': float(odds.get('ht_a') or 0),
|
||||
'odds_ou05_o': float(odds.get('ou05_o') or 0),
|
||||
'odds_ou05_u': float(odds.get('ou05_u') or 0),
|
||||
'odds_ou15_o': float(odds.get('ou15_o') or 0),
|
||||
'odds_ou15_u': float(odds.get('ou15_u') or 0),
|
||||
'odds_ou25_o': float(odds.get('ou25_o') or 0),
|
||||
'odds_ou25_u': float(odds.get('ou25_u') or 0),
|
||||
'odds_ou35_o': float(odds.get('ou35_o') or 0),
|
||||
'odds_ou35_u': float(odds.get('ou35_u') or 0),
|
||||
'odds_ht_ou05_o': float(odds.get('ht_ou05_o') or 0),
|
||||
'odds_ht_ou05_u': float(odds.get('ht_ou05_u') or 0),
|
||||
'odds_ht_ou15_o': float(odds.get('ht_ou15_o') or 0),
|
||||
'odds_ht_ou15_u': float(odds.get('ht_ou15_u') or 0),
|
||||
'odds_btts_y': float(odds.get('btts_y') or 0),
|
||||
'odds_btts_n': float(odds.get('btts_n') or 0),
|
||||
**odds_presence,
|
||||
# League (9 — original 2 + V27 expanded 5 + xga 2)
|
||||
'home_xga': xga_home,
|
||||
'away_xga': xga_away,
|
||||
'league_avg_goals': league['avg_goals'],
|
||||
'league_zero_goal_rate': league['zero_goal_rate'],
|
||||
'league_home_win_rate': league['home_win_rate'],
|
||||
'league_draw_rate': league['draw_rate'],
|
||||
'league_btts_rate': league['btts_rate'],
|
||||
'league_ou25_rate': league['ou25_rate'],
|
||||
'league_reliability_score': league['reliability_score'],
|
||||
# Upset (4)
|
||||
'upset_atmosphere': upset_atmosphere,
|
||||
'upset_motivation': upset_motivation,
|
||||
'upset_fatigue': upset_fatigue,
|
||||
'upset_potential': upset_potential,
|
||||
# Referee (5)
|
||||
'referee_home_bias': ref['home_bias'],
|
||||
'referee_avg_goals': ref['avg_goals'],
|
||||
'referee_cards_total': ref['cards_total'],
|
||||
'referee_avg_yellow': ref['avg_yellow'],
|
||||
'referee_experience': ref['experience'],
|
||||
# Momentum (3)
|
||||
'home_momentum_score': home_momentum,
|
||||
'away_momentum_score': away_momentum,
|
||||
'momentum_diff': mom_diff,
|
||||
# ── V27 Rolling Stats (13) ──
|
||||
'home_rolling5_goals': home_rolling['rolling5_goals'],
|
||||
'home_rolling5_conceded': home_rolling['rolling5_conceded'],
|
||||
'home_rolling10_goals': home_rolling['rolling10_goals'],
|
||||
'home_rolling10_conceded': home_rolling['rolling10_conceded'],
|
||||
'home_rolling20_goals': home_rolling['rolling20_goals'],
|
||||
'home_rolling20_conceded': home_rolling['rolling20_conceded'],
|
||||
'away_rolling5_goals': away_rolling['rolling5_goals'],
|
||||
'away_rolling5_conceded': away_rolling['rolling5_conceded'],
|
||||
'away_rolling10_goals': away_rolling['rolling10_goals'],
|
||||
'away_rolling10_conceded': away_rolling['rolling10_conceded'],
|
||||
'home_rolling5_cs': home_rolling['rolling5_cs'],
|
||||
'away_rolling5_cs': away_rolling['rolling5_cs'],
|
||||
# ── V27 Venue Stats (4) ──
|
||||
'home_venue_goals': home_venue['venue_goals'],
|
||||
'home_venue_conceded': home_venue['venue_conceded'],
|
||||
'away_venue_goals': away_venue['venue_goals'],
|
||||
'away_venue_conceded': away_venue['venue_conceded'],
|
||||
# ── V27 Goal Trend (2) ──
|
||||
'home_goal_trend': home_rolling['rolling5_goals'] - home_rolling['rolling10_goals'],
|
||||
'away_goal_trend': away_rolling['rolling5_goals'] - away_rolling['rolling10_goals'],
|
||||
# ── V27 Calendar (4) ──
|
||||
'home_days_rest': home_rest,
|
||||
'away_days_rest': away_rest,
|
||||
'match_month': float(match_month),
|
||||
'is_season_start': is_season_start,
|
||||
'is_season_end': is_season_end,
|
||||
# ── V27 Interaction (6) ──
|
||||
'attack_vs_defense_home': attack_vs_defense_home,
|
||||
'attack_vs_defense_away': attack_vs_defense_away,
|
||||
'xg_diff': xg_diff,
|
||||
'form_momentum_interaction': form_momentum_interaction,
|
||||
'elo_form_consistency': elo_form_consistency,
|
||||
'upset_x_elo_gap': upset_x_elo_gap,
|
||||
# Squad Features (9) — PlayerPredictorEngine
|
||||
**self._get_squad_features(data),
|
||||
# V28 Odds-Band Historical Performance Features
|
||||
**odds_band_features,
|
||||
}
|
||||
|
||||
def _get_squad_features(self, data: MatchData) -> Dict[str, float]:
|
||||
"""Non-fatal squad analysis with 12 player-level features."""
|
||||
defaults = {
|
||||
'home_squad_quality': 12.0, 'away_squad_quality': 12.0, 'squad_diff': 0.0,
|
||||
'home_key_players': 3.0, 'away_key_players': 3.0,
|
||||
'home_missing_impact': 0.0, 'away_missing_impact': 0.0,
|
||||
'home_goals_form': 1.3, 'away_goals_form': 1.3,
|
||||
'home_lineup_goals_per90': 0.0, 'away_lineup_goals_per90': 0.0,
|
||||
'home_lineup_assists_per90': 0.0, 'away_lineup_assists_per90': 0.0,
|
||||
'home_squad_continuity': 0.5, 'away_squad_continuity': 0.5,
|
||||
'home_top_scorer_form': 0.0, 'away_top_scorer_form': 0.0,
|
||||
'home_avg_player_exp': 0.0, 'away_avg_player_exp': 0.0,
|
||||
'home_goals_diversity': 0.0, 'away_goals_diversity': 0.0,
|
||||
}
|
||||
try:
|
||||
engine = get_player_predictor()
|
||||
pred = engine.predict(
|
||||
match_id=data.match_id,
|
||||
home_team_id=data.home_team_id,
|
||||
away_team_id=data.away_team_id,
|
||||
home_lineup=data.home_lineup,
|
||||
away_lineup=data.away_lineup,
|
||||
sidelined_data=data.sidelined_data,
|
||||
)
|
||||
result = {
|
||||
'home_squad_quality': float(pred.home_squad_quality or 0.0),
|
||||
'away_squad_quality': float(pred.away_squad_quality or 0.0),
|
||||
'squad_diff': float(pred.squad_diff or 0.0),
|
||||
'home_key_players': float(pred.home_key_players or 0),
|
||||
'away_key_players': float(pred.away_key_players or 0),
|
||||
'home_missing_impact': float(pred.home_missing_impact or 0.0),
|
||||
'away_missing_impact': float(pred.away_missing_impact or 0.0),
|
||||
'home_goals_form': float(pred.home_goals_form or 0.0),
|
||||
'away_goals_form': float(pred.away_goals_form or 0.0),
|
||||
'home_lineup_goals_per90': float(pred.home_lineup_goals_per90 or 0.0),
|
||||
'away_lineup_goals_per90': float(pred.away_lineup_goals_per90 or 0.0),
|
||||
'home_lineup_assists_per90': float(pred.home_lineup_assists_per90 or 0.0),
|
||||
'away_lineup_assists_per90': float(pred.away_lineup_assists_per90 or 0.0),
|
||||
'home_squad_continuity': float(pred.home_squad_continuity or 0.5),
|
||||
'away_squad_continuity': float(pred.away_squad_continuity or 0.5),
|
||||
'home_top_scorer_form': float(pred.home_top_scorer_form or 0),
|
||||
'away_top_scorer_form': float(pred.away_top_scorer_form or 0),
|
||||
'home_avg_player_exp': float(pred.home_avg_player_exp or 0.0),
|
||||
'away_avg_player_exp': float(pred.away_avg_player_exp or 0.0),
|
||||
'home_goals_diversity': float(pred.home_goals_diversity or 0.0),
|
||||
'away_goals_diversity': float(pred.away_goals_diversity or 0.0),
|
||||
}
|
||||
for side in ('home', 'away'):
|
||||
sq = result[f'{side}_squad_quality']
|
||||
if sq > 50 or sq < 0:
|
||||
print(f"🚨 SCALE MISMATCH: {side}_squad_quality={sq:.1f} "
|
||||
f"(expected 3-36). Check player_predictor formula!")
|
||||
return result
|
||||
except Exception as e:
|
||||
print(f"⚠️ Squad features failed: {e}")
|
||||
return defaults
|
||||
|
||||
def _sanitize_v25_odds(self, odds_data: Dict[str, Any]) -> Dict[str, float]:
|
||||
sanitized: Dict[str, float] = {}
|
||||
for key in self.V25_ODDS_FEATURE_KEYS:
|
||||
sanitized[key] = self._real_market_odds(odds_data, key)
|
||||
for key in ("dc_1x", "dc_x2", "dc_12", "oe_odd", "oe_even", "cards_o", "cards_u", "hcap_h", "hcap_d", "hcap_a"):
|
||||
if key in odds_data:
|
||||
sanitized[key] = self._real_market_odds(odds_data, key)
|
||||
return sanitized
|
||||
@@ -0,0 +1,231 @@
|
||||
"""HT/MS Mixin — analyze_match_htms endpoint and helpers.
|
||||
|
||||
Auto-extracted mixin module — split from services/single_match_orchestrator.py.
|
||||
All methods here are composed into SingleMatchOrchestrator via inheritance.
|
||||
`self` attributes (self.dsn, self.enrichment, self.v25_predictor, etc.) are
|
||||
initialised in the main __init__.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import math
|
||||
import os
|
||||
import pickle
|
||||
from collections import defaultdict
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple, overload
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
from data.db import get_clean_dsn
|
||||
from schemas.prediction import FullMatchPrediction
|
||||
from schemas.match_data import MatchData
|
||||
from models.v25_ensemble import V25Predictor, get_v25_predictor
|
||||
try:
|
||||
from models.v27_predictor import V27Predictor, compute_divergence, compute_value_edge
|
||||
except ImportError:
|
||||
class V27Predictor: # type: ignore[no-redef]
|
||||
def __init__(self): self.models = {}
|
||||
def load_models(self): return False
|
||||
def predict_all(self, features): return {}
|
||||
def compute_divergence(*args, **kwargs):
|
||||
return {}
|
||||
def compute_value_edge(*args, **kwargs):
|
||||
return {}
|
||||
from features.odds_band_analyzer import OddsBandAnalyzer
|
||||
try:
|
||||
from models.basketball_v25 import (
|
||||
BasketballMatchPrediction,
|
||||
get_basketball_v25_predictor,
|
||||
)
|
||||
except ImportError:
|
||||
BasketballMatchPrediction = Any # type: ignore[misc]
|
||||
def get_basketball_v25_predictor() -> Any:
|
||||
raise ImportError("Basketball predictor is not available")
|
||||
from core.engines.player_predictor import PlayerPrediction, get_player_predictor
|
||||
from services.feature_enrichment import FeatureEnrichmentService
|
||||
from services.betting_brain import BettingBrain
|
||||
from services.v26_shadow_engine import V26ShadowEngine, get_v26_shadow_engine
|
||||
from services.match_commentary import generate_match_commentary
|
||||
from utils.top_leagues import load_top_league_ids
|
||||
from utils.league_reliability import load_league_reliability
|
||||
from config.config_loader import build_threshold_dict, get_threshold_default
|
||||
from models.calibration import get_calibrator
|
||||
|
||||
|
||||
class HtmsMixin:
|
||||
def analyze_match_htms(self, match_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
HT/MS focused response for upset-hunting workflows.
|
||||
|
||||
This endpoint is intentionally additive and does not mutate the
|
||||
standard /v20plus/analyze package contract.
|
||||
"""
|
||||
data = self._load_match_data(match_id)
|
||||
if data is None:
|
||||
return None
|
||||
|
||||
if str(data.sport or "").lower() != "football":
|
||||
return {
|
||||
"status": "skip",
|
||||
"match_id": match_id,
|
||||
"reason": "unsupported_sport",
|
||||
"engine_used": "htms_router",
|
||||
}
|
||||
|
||||
is_top_league = self._is_top_league(data.league_id)
|
||||
engine_used = "v20plus_top_htms"
|
||||
|
||||
# Hard gate: HT/MS upset model is trained on top leagues only.
|
||||
if not is_top_league:
|
||||
return {
|
||||
"status": "skip",
|
||||
"match_id": match_id,
|
||||
"reason": "out_of_training_scope",
|
||||
"engine_used": engine_used,
|
||||
"data_quality": {
|
||||
"label": "LOW",
|
||||
"flags": ["league_out_of_scope"],
|
||||
},
|
||||
}
|
||||
|
||||
missing_requirements = self._missing_htms_requirements(data)
|
||||
if missing_requirements:
|
||||
return {
|
||||
"status": "skip",
|
||||
"match_id": match_id,
|
||||
"reason": "missing_critical_data",
|
||||
"missing": missing_requirements,
|
||||
"engine_used": engine_used,
|
||||
"data_quality": {
|
||||
"label": "LOW",
|
||||
"flags": [f"missing_{item}" for item in missing_requirements],
|
||||
},
|
||||
}
|
||||
|
||||
base_package = self.analyze_match(match_id)
|
||||
if not base_package:
|
||||
return None
|
||||
data_quality = base_package.get("data_quality", {})
|
||||
market_board = base_package.get("market_board", {})
|
||||
ms_market = market_board.get("MS", {})
|
||||
ht_market = market_board.get("HT", {})
|
||||
htft_probs = market_board.get("HTFT", {}).get("probs", {})
|
||||
|
||||
reversal_probs = {
|
||||
"1/2": float(htft_probs.get("1/2", 0.0)),
|
||||
"2/1": float(htft_probs.get("2/1", 0.0)),
|
||||
"X/1": float(htft_probs.get("X/1", 0.0)),
|
||||
"X/2": float(htft_probs.get("X/2", 0.0)),
|
||||
}
|
||||
top_reversal = max(reversal_probs.items(), key=lambda item: item[1])
|
||||
|
||||
ms_conf = float(ms_market.get("confidence", 0.0))
|
||||
ht_conf = float(ht_market.get("confidence", 0.0))
|
||||
base_conf = (ms_conf + ht_conf) / 2.0
|
||||
|
||||
confidence_cap = 100.0
|
||||
penalties: List[str] = []
|
||||
if data.lineup_source == "probable_xi":
|
||||
confidence_cap = min(confidence_cap, 72.0)
|
||||
penalties.append("lineup_probable_xi")
|
||||
if data.lineup_source == "none":
|
||||
confidence_cap = min(confidence_cap, 58.0)
|
||||
penalties.append("lineup_unavailable")
|
||||
if str(data_quality.get("label", "LOW")).upper() == "LOW":
|
||||
confidence_cap = min(confidence_cap, 55.0)
|
||||
penalties.append("low_data_quality")
|
||||
|
||||
final_conf = min(base_conf, confidence_cap)
|
||||
|
||||
upset_score = self._compute_htms_upset_score(
|
||||
reversal_probs=reversal_probs,
|
||||
odds_data=data.odds_data,
|
||||
is_top_league=is_top_league,
|
||||
)
|
||||
upset_threshold = 58.0 if is_top_league else 54.0
|
||||
upset_playable = (
|
||||
upset_score >= upset_threshold
|
||||
and top_reversal[1] >= 0.045
|
||||
and final_conf >= 45.0
|
||||
and "low_data_quality" not in penalties
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "ok",
|
||||
"engine_used": engine_used,
|
||||
"match_info": base_package.get("match_info", {}),
|
||||
"data_quality": data_quality,
|
||||
"htms_core": {
|
||||
"ms_pick": ms_market.get("pick"),
|
||||
"ms_confidence": round(ms_conf, 1),
|
||||
"ht_pick": ht_market.get("pick"),
|
||||
"ht_confidence": round(ht_conf, 1),
|
||||
"combined_confidence": round(final_conf, 1),
|
||||
"confidence_cap": round(confidence_cap, 1),
|
||||
"penalties": penalties,
|
||||
},
|
||||
"surprise_hunter": {
|
||||
"upset_score": round(upset_score, 1),
|
||||
"threshold": upset_threshold,
|
||||
"playable": upset_playable,
|
||||
"top_reversal_pick": top_reversal[0],
|
||||
"top_reversal_prob": round(top_reversal[1], 4),
|
||||
"reversal_probs": {
|
||||
key: round(value, 4) for key, value in reversal_probs.items()
|
||||
},
|
||||
},
|
||||
"risk": base_package.get("risk", {}),
|
||||
"reasoning_factors": base_package.get("reasoning_factors", []),
|
||||
}
|
||||
|
||||
def _is_top_league(self, league_id: Optional[str]) -> bool:
|
||||
if not league_id:
|
||||
return False
|
||||
return str(league_id) in self.top_league_ids
|
||||
|
||||
def _missing_htms_requirements(self, data: MatchData) -> List[str]:
|
||||
missing: List[str] = []
|
||||
ms_keys = ("ms_h", "ms_d", "ms_a")
|
||||
ht_keys = ("ht_h", "ht_d", "ht_a")
|
||||
if not all(float(data.odds_data.get(k, 0.0) or 0.0) > 1.0 for k in ms_keys):
|
||||
missing.append("ms_odds")
|
||||
if not all(float(data.odds_data.get(k, 0.0) or 0.0) > 1.0 for k in ht_keys):
|
||||
missing.append("ht_odds")
|
||||
|
||||
return missing
|
||||
|
||||
def _compute_htms_upset_score(
|
||||
self,
|
||||
reversal_probs: Dict[str, float],
|
||||
odds_data: Dict[str, float],
|
||||
is_top_league: bool,
|
||||
) -> float:
|
||||
ms_h = self._to_float(odds_data.get("ms_h"), 0.0)
|
||||
ms_a = self._to_float(odds_data.get("ms_a"), 0.0)
|
||||
if ms_h <= 1.0 or ms_a <= 1.0:
|
||||
favorite_gap = 0.0
|
||||
else:
|
||||
favorite_gap = abs(ms_h - ms_a)
|
||||
|
||||
reversal_max = max(reversal_probs.values()) if reversal_probs else 0.0
|
||||
reversal_sum = sum(reversal_probs.values())
|
||||
|
||||
# Strong favorite + reversal probability is the core upset signal.
|
||||
gap_factor = min(1.0, favorite_gap / 2.0)
|
||||
score = (
|
||||
(reversal_max * 100.0 * 0.60)
|
||||
+ (reversal_sum * 100.0 * 0.25)
|
||||
+ (gap_factor * 100.0 * 0.15)
|
||||
)
|
||||
|
||||
if not is_top_league:
|
||||
# Non-top leagues are noisier; keep it slightly conservative.
|
||||
score *= 0.92
|
||||
return max(0.0, min(100.0, score))
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,662 @@
|
||||
"""Prediction Mixin — V25 signal extraction and prediction building.
|
||||
|
||||
Auto-extracted mixin module — split from services/single_match_orchestrator.py.
|
||||
All methods here are composed into SingleMatchOrchestrator via inheritance.
|
||||
`self` attributes (self.dsn, self.enrichment, self.v25_predictor, etc.) are
|
||||
initialised in the main __init__.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import math
|
||||
import os
|
||||
import pickle
|
||||
from collections import defaultdict
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple, overload
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
from data.db import get_clean_dsn
|
||||
from schemas.prediction import FullMatchPrediction
|
||||
from schemas.match_data import MatchData
|
||||
from models.v25_ensemble import V25Predictor, get_v25_predictor
|
||||
try:
|
||||
from models.v27_predictor import V27Predictor, compute_divergence, compute_value_edge
|
||||
except ImportError:
|
||||
class V27Predictor: # type: ignore[no-redef]
|
||||
def __init__(self): self.models = {}
|
||||
def load_models(self): return False
|
||||
def predict_all(self, features): return {}
|
||||
def compute_divergence(*args, **kwargs):
|
||||
return {}
|
||||
def compute_value_edge(*args, **kwargs):
|
||||
return {}
|
||||
from features.odds_band_analyzer import OddsBandAnalyzer
|
||||
try:
|
||||
from models.basketball_v25 import (
|
||||
BasketballMatchPrediction,
|
||||
get_basketball_v25_predictor,
|
||||
)
|
||||
except ImportError:
|
||||
BasketballMatchPrediction = Any # type: ignore[misc]
|
||||
def get_basketball_v25_predictor() -> Any:
|
||||
raise ImportError("Basketball predictor is not available")
|
||||
from core.engines.player_predictor import PlayerPrediction, get_player_predictor
|
||||
from services.feature_enrichment import FeatureEnrichmentService
|
||||
from services.betting_brain import BettingBrain
|
||||
from services.v26_shadow_engine import V26ShadowEngine, get_v26_shadow_engine
|
||||
from services.match_commentary import generate_match_commentary
|
||||
from utils.top_leagues import load_top_league_ids
|
||||
from utils.league_reliability import load_league_reliability
|
||||
from config.config_loader import build_threshold_dict, get_threshold_default, get_config
|
||||
from models.calibration import get_calibrator
|
||||
from models.league_model import get_league_model_loader, FILE_TO_SIGNAL
|
||||
|
||||
|
||||
class PredictionMixin:
|
||||
def _get_score_model(self) -> Optional[Dict]:
|
||||
"""Load XGBoost score prediction model (non-fatal)."""
|
||||
if hasattr(self, "_score_model_cache"):
|
||||
return self._score_model_cache
|
||||
score_model_path = os.path.join(
|
||||
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
|
||||
"models", "xgb_score.pkl",
|
||||
)
|
||||
try:
|
||||
if os.path.exists(score_model_path):
|
||||
with open(score_model_path, "rb") as f:
|
||||
model_data = pickle.load(f)
|
||||
if all(k in model_data for k in ("home_model", "away_model", "ht_home_model", "ht_away_model", "features")):
|
||||
self._score_model_cache = model_data
|
||||
print(f"[SCORE] ✅ Score model loaded ({len(model_data['features'])} features)")
|
||||
return self._score_model_cache
|
||||
except Exception as e:
|
||||
print(f"[SCORE] ⚠ Load failed (non-fatal, using heuristic): {e}")
|
||||
self._score_model_cache = None
|
||||
return None
|
||||
|
||||
def _predict_score_with_model(self, features: Dict[str, float]) -> Optional[Dict[str, float]]:
|
||||
"""Predict FT/HT scores using XGBoost score model."""
|
||||
score_model = self._get_score_model()
|
||||
if score_model is None:
|
||||
return None
|
||||
try:
|
||||
import pandas as _pd
|
||||
model_features = score_model["features"]
|
||||
row = {f: float(features.get(f, 0)) for f in model_features}
|
||||
df = _pd.DataFrame([row])
|
||||
ft_home = max(0.0, float(score_model["home_model"].predict(df)[0]))
|
||||
ft_away = max(0.0, float(score_model["away_model"].predict(df)[0]))
|
||||
ht_home = max(0.0, float(score_model["ht_home_model"].predict(df)[0]))
|
||||
ht_away = max(0.0, float(score_model["ht_away_model"].predict(df)[0]))
|
||||
return {
|
||||
"ft_home": round(ft_home, 2),
|
||||
"ft_away": round(ft_away, 2),
|
||||
"ht_home": round(ht_home, 2),
|
||||
"ht_away": round(ht_away, 2),
|
||||
}
|
||||
except Exception as e:
|
||||
print(f"[SCORE] ⚠ Prediction error (fallback to heuristic): {e}")
|
||||
return None
|
||||
|
||||
_V25_KEY_MAP = {
|
||||
"ms": "MS",
|
||||
"ou15": "OU15",
|
||||
"ou25": "OU25",
|
||||
"ou35": "OU35",
|
||||
"btts": "BTTS",
|
||||
"ht_result": "HT",
|
||||
"ht_ou05": "HT_OU05",
|
||||
"ht_ou15": "HT_OU15",
|
||||
"htft": "HTFT",
|
||||
"cards_ou45": "CARDS",
|
||||
"handicap_ms": "HCAP",
|
||||
"odd_even": "OE",
|
||||
}
|
||||
|
||||
def _get_v25_signal(
|
||||
self,
|
||||
data: MatchData,
|
||||
features: Optional[Dict[str, float]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Get V25 ensemble predictions for all available markets.
|
||||
Returns a dict keyed by UPPERCASE market name (MS, OU25, BTTS, etc.)
|
||||
each with a 'probs' sub-dict that _prob_map can consume.
|
||||
|
||||
CRITICAL: Keys MUST be uppercase to match _build_v25_prediction lookups.
|
||||
"""
|
||||
v25 = self._get_v25_predictor()
|
||||
feature_row = features or self._build_v25_features(data)
|
||||
|
||||
signal: Dict[str, Any] = {}
|
||||
|
||||
# ── League-specific model override ─────────────────────────────────
|
||||
league_id = getattr(data, "league_id", None)
|
||||
league_model = None
|
||||
if league_id:
|
||||
try:
|
||||
league_model = get_league_model_loader().get(league_id)
|
||||
except Exception:
|
||||
league_model = None
|
||||
|
||||
if league_model:
|
||||
# Predict all available markets with league-specific XGBoost
|
||||
for mkey, sig_key in FILE_TO_SIGNAL.items():
|
||||
probs = league_model.predict_market(mkey, feature_row)
|
||||
if probs:
|
||||
best_label = max(probs, key=probs.__getitem__)
|
||||
signal[sig_key] = {
|
||||
"probs": probs,
|
||||
"raw_probs": probs,
|
||||
"pick": best_label,
|
||||
"probability": float(probs[best_label]),
|
||||
"confidence": round(float(probs[best_label]) * 100.0, 1),
|
||||
"source": "league_specific",
|
||||
}
|
||||
if signal:
|
||||
print(f" [LEAGUE-MODEL] {league_id}: {len(signal)} markets predicted")
|
||||
# Fill remaining markets from general V25 (markets not in league model)
|
||||
# fall through to general prediction below for missing ones
|
||||
|
||||
def _temperature_scale(probs_dict: Dict[str, float], temperature: float = 1.5) -> Dict[str, float]:
|
||||
"""
|
||||
Apply temperature scaling to soften overconfident model outputs.
|
||||
|
||||
LightGBM often produces extreme probabilities (e.g., 0.999 / 0.001).
|
||||
Temperature scaling converts to log-odds, divides by T, then re-normalizes.
|
||||
T=1.0 → no change, T>1 → softer probabilities.
|
||||
|
||||
Standard approach for post-hoc model calibration (Guo et al., 2017).
|
||||
|
||||
V34: Reduced from 2.5 to 1.5 — V25 model is already calibrated via
|
||||
odds-aware training. Excessive flattening was destroying signal.
|
||||
"""
|
||||
import math
|
||||
eps = 1e-7 # numerical stability
|
||||
n = len(probs_dict)
|
||||
|
||||
# V34: Reduced temperature — odds-aware model is already calibrated
|
||||
# Binary markets (2-class) tend to be more overconfident in LGB
|
||||
if n <= 2:
|
||||
T = max(temperature, 1.5) # was 2.0
|
||||
elif n == 3:
|
||||
T = max(temperature * 0.8, 1.2) # was 1.5 — 3-way slightly less aggressive
|
||||
else:
|
||||
T = max(temperature * 0.6, 1.0) # was 1.3 — 9-way (HTFT) already spread
|
||||
|
||||
# Convert to log-odds and apply temperature
|
||||
labels = list(probs_dict.keys())
|
||||
log_odds = []
|
||||
for label in labels:
|
||||
p = max(eps, min(1.0 - eps, float(probs_dict[label])))
|
||||
log_odds.append(math.log(p) / T)
|
||||
|
||||
# Softmax re-normalization
|
||||
max_lo = max(log_odds)
|
||||
exp_vals = [math.exp(lo - max_lo) for lo in log_odds]
|
||||
total = sum(exp_vals)
|
||||
|
||||
scaled = {}
|
||||
for i, label in enumerate(labels):
|
||||
scaled[label] = exp_vals[i] / total
|
||||
|
||||
return scaled
|
||||
|
||||
calibrator = get_calibrator()
|
||||
_temperature = float(get_config().get('model_ensemble.temperature', 1.5))
|
||||
|
||||
# Map (market_key, label) → calibrator market key
|
||||
_CAL_KEY_MAP: Dict[str, str] = {
|
||||
"ms_1": "ms_home", "ms_x": "ms_draw", "ms_2": "ms_away",
|
||||
"ou15_over": "ou15", "ou15_under": "ou15",
|
||||
"ou25_over": "ou25", "ou25_under": "ou25",
|
||||
"ou35_over": "ou35", "ou35_under": "ou35",
|
||||
"btts_yes": "btts", "btts_no": "btts",
|
||||
"ht_1": "ht_home", "ht_x": "ht_draw", "ht_2": "ht_away",
|
||||
}
|
||||
|
||||
def _enrich_signal_entry(probs_dict: Dict[str, float], market_key: str = "") -> Dict[str, Any]:
|
||||
"""Temperature scaling + Isotonic calibration pipeline."""
|
||||
scaled_probs = _temperature_scale(probs_dict, temperature=_temperature)
|
||||
|
||||
# Isotonic calibration per outcome (if trained models exist)
|
||||
if market_key:
|
||||
calibrated = {}
|
||||
for label, prob in scaled_probs.items():
|
||||
raw_key = f"{market_key}_{label}".lower().replace(" ", "_")
|
||||
cal_key = _CAL_KEY_MAP.get(raw_key, raw_key)
|
||||
calibrated[label] = calibrator.calibrate(cal_key, prob)
|
||||
total = sum(calibrated.values())
|
||||
if total > 0:
|
||||
calibrated = {k: v / total for k, v in calibrated.items()}
|
||||
scaled_probs = calibrated
|
||||
|
||||
best_label = max(scaled_probs, key=scaled_probs.__getitem__)
|
||||
best_prob = float(scaled_probs[best_label])
|
||||
return {
|
||||
"probs": scaled_probs,
|
||||
"raw_probs": probs_dict,
|
||||
"pick": best_label,
|
||||
"probability": best_prob,
|
||||
"confidence": round(best_prob * 100.0, 1),
|
||||
}
|
||||
|
||||
# Core markets using dedicated methods (skip if league model already covered them)
|
||||
if "MS" not in signal:
|
||||
h, d, a = v25.predict_ms(feature_row)
|
||||
signal["MS"] = _enrich_signal_entry({"1": h, "X": d, "2": a}, "ms")
|
||||
print(f" [V25-SIGNAL] MS → H={h:.4f} D={d:.4f} A={a:.4f}")
|
||||
else:
|
||||
print(f" [LEAGUE-MODEL] MS → {signal['MS']['probs']}")
|
||||
|
||||
if "OU25" not in signal:
|
||||
over25, under25 = v25.predict_ou25(feature_row)
|
||||
signal["OU25"] = _enrich_signal_entry({"Over": over25, "Under": under25}, "ou25")
|
||||
print(f" [V25-SIGNAL] OU25 → O={over25:.4f} U={under25:.4f}")
|
||||
|
||||
if "BTTS" not in signal:
|
||||
btts_y, btts_n = v25.predict_btts(feature_row)
|
||||
signal["BTTS"] = _enrich_signal_entry({"Yes": btts_y, "No": btts_n}, "btts")
|
||||
print(f" [V25-SIGNAL] BTTS → Y={btts_y:.4f} N={btts_n:.4f}")
|
||||
|
||||
# Additional markets via generic predict_market (skip if league model covered them)
|
||||
for model_key, label_map in [
|
||||
("ou15", {"Over": 0, "Under": None}),
|
||||
("ou35", {"Over": 0, "Under": None}),
|
||||
("ht_result", {"1": 0, "X": 1, "2": 2}),
|
||||
("ht_ou05", {"Over": 0, "Under": None}),
|
||||
("ht_ou15", {"Over": 0, "Under": None}),
|
||||
("htft", None),
|
||||
("cards_ou45", {"Over": 0, "Under": None}),
|
||||
("handicap_ms", {"1": 0, "X": 1, "2": 2}),
|
||||
("odd_even", {"Odd": 0, "Even": None}),
|
||||
]:
|
||||
out_key = str(self._V25_KEY_MAP.get(model_key, model_key.upper()))
|
||||
if out_key in signal:
|
||||
continue # already predicted by league-specific model
|
||||
if not v25.has_market(model_key):
|
||||
continue
|
||||
raw = v25.predict_market(model_key, feature_row)
|
||||
if raw is None:
|
||||
continue
|
||||
|
||||
if label_map is None:
|
||||
# HTFT — 9 combinations
|
||||
htft_labels = ["1/1", "1/X", "1/2", "X/1", "X/X", "X/2", "2/1", "2/X", "2/2"]
|
||||
probs_dict = {}
|
||||
for i, label in enumerate(htft_labels):
|
||||
probs_dict[label] = float(raw[i]) if i < len(raw) else 0.0
|
||||
signal[out_key] = _enrich_signal_entry(probs_dict, model_key)
|
||||
elif len(label_map) == 2:
|
||||
# Binary market
|
||||
labels = list(label_map.keys())
|
||||
p = float(raw[0]) if len(raw) >= 1 else None
|
||||
if p is None:
|
||||
print(f" [V25-SIGNAL] {out_key} → EMPTY raw output, skipped")
|
||||
continue
|
||||
signal[out_key] = _enrich_signal_entry({labels[0]: p, labels[1]: 1.0 - p}, model_key)
|
||||
elif len(label_map) == 3:
|
||||
# 3-class market
|
||||
labels = list(label_map.keys())
|
||||
probs_dict = {}
|
||||
for i, label in enumerate(labels):
|
||||
if i >= len(raw):
|
||||
print(f" [V25-SIGNAL] {out_key} → insufficient probabilities in raw output")
|
||||
break
|
||||
probs_dict[label] = float(raw[i])
|
||||
else:
|
||||
signal[out_key] = _enrich_signal_entry(probs_dict, model_key)
|
||||
|
||||
if out_key in signal:
|
||||
print(f" [V25-SIGNAL] {out_key} → {signal[out_key]['probs']}")
|
||||
|
||||
print(f" [V25-SIGNAL] Total markets with real predictions: {len(signal)}")
|
||||
if not signal:
|
||||
raise RuntimeError("V25 model produced ZERO market predictions — cannot continue")
|
||||
|
||||
return signal
|
||||
|
||||
@staticmethod
|
||||
def _prob_map(signal: Optional[Dict[str, Any]], market: str, defaults: Dict[str, float]) -> Dict[str, float]:
|
||||
"""Extract normalised probabilities from signal.
|
||||
|
||||
If the signal contains real model output for this market, use it.
|
||||
If the market is missing from the signal, log a warning and return
|
||||
the defaults as a LAST RESORT (so the pipeline doesn't crash).
|
||||
The defaults are ONLY used for non-core / secondary markets that
|
||||
may not have a trained model yet (e.g. CARDS, HCAP, OE).
|
||||
"""
|
||||
market_payload = signal.get(market, {}) if isinstance(signal, dict) else {}
|
||||
probs = market_payload.get("probs", {}) if isinstance(market_payload, dict) else {}
|
||||
if not isinstance(probs, dict) or not probs:
|
||||
print(f" ⚠️ [PROB_MAP] Market '{market}' NOT found in V25 signal — model output missing")
|
||||
return dict(defaults)
|
||||
out = {key: float(probs.get(key, value)) for key, value in defaults.items()}
|
||||
total = sum(out.values())
|
||||
if total <= 0:
|
||||
print(f" ⚠️ [PROB_MAP] Market '{market}' has zero total probability")
|
||||
return dict(defaults)
|
||||
return {key: value / total for key, value in out.items()}
|
||||
|
||||
@staticmethod
|
||||
def _is_cup_game(league_name: str) -> bool:
|
||||
"""Detect cup/knockout competitions where home advantage is significantly weaker."""
|
||||
name = (league_name or "").lower()
|
||||
cup_keywords = (
|
||||
"kupa", "cup", "coupe", "copa", "coppa", "pokal",
|
||||
"trophy", "shield", "challenge",
|
||||
"ziraat", "süper kupa", "super cup",
|
||||
)
|
||||
return any(kw in name for kw in cup_keywords)
|
||||
|
||||
@staticmethod
|
||||
def _best_prob_pick(prob_map: Dict[str, float]) -> Tuple[str, float]:
|
||||
if not prob_map:
|
||||
return "", 0.0
|
||||
pick = max(prob_map, key=prob_map.__getitem__)
|
||||
return pick, float(prob_map[pick])
|
||||
|
||||
@staticmethod
|
||||
def _poisson_score_top5(home_xg: float, away_xg: float, max_goals: int = 5) -> List[Dict[str, Any]]:
|
||||
def poisson_p(lmbda: float, k: int) -> float:
|
||||
return math.exp(-lmbda) * (lmbda ** k) / math.factorial(k)
|
||||
|
||||
scores: List[Tuple[str, float]] = []
|
||||
for home_goals in range(max_goals + 1):
|
||||
for away_goals in range(max_goals + 1):
|
||||
prob = poisson_p(home_xg, home_goals) * poisson_p(away_xg, away_goals)
|
||||
scores.append((f"{home_goals}-{away_goals}", prob))
|
||||
scores.sort(key=lambda item: item[1], reverse=True)
|
||||
return [
|
||||
{"score": score, "prob": round(prob, 4)}
|
||||
for score, prob in scores[:5]
|
||||
]
|
||||
|
||||
def _build_v25_prediction(
|
||||
self,
|
||||
data: MatchData,
|
||||
features: Dict[str, float],
|
||||
v25_signal: Dict[str, Any],
|
||||
) -> FullMatchPrediction:
|
||||
prediction = FullMatchPrediction(
|
||||
match_id=data.match_id,
|
||||
home_team=data.home_team_name,
|
||||
away_team=data.away_team_name,
|
||||
)
|
||||
|
||||
ms_probs = self._prob_map(v25_signal, "MS", {"1": 0.33, "X": 0.34, "2": 0.33})
|
||||
ou15_probs = self._prob_map(v25_signal, "OU15", {"Under": 0.5, "Over": 0.5})
|
||||
ou25_probs = self._prob_map(v25_signal, "OU25", {"Under": 0.5, "Over": 0.5})
|
||||
ou35_probs = self._prob_map(v25_signal, "OU35", {"Under": 0.5, "Over": 0.5})
|
||||
btts_probs = self._prob_map(v25_signal, "BTTS", {"No": 0.5, "Yes": 0.5})
|
||||
ht_probs = self._prob_map(v25_signal, "HT", {"1": 0.33, "X": 0.34, "2": 0.33})
|
||||
ht_ou05_probs = self._prob_map(v25_signal, "HT_OU05", {"Under": 0.5, "Over": 0.5})
|
||||
ht_ou15_probs = self._prob_map(v25_signal, "HT_OU15", {"Under": 0.5, "Over": 0.5})
|
||||
htft_probs = self._prob_map(
|
||||
v25_signal,
|
||||
"HTFT",
|
||||
{"1/1": 1 / 9, "1/X": 1 / 9, "1/2": 1 / 9, "X/1": 1 / 9, "X/X": 1 / 9, "X/2": 1 / 9, "2/1": 1 / 9, "2/X": 1 / 9, "2/2": 1 / 9},
|
||||
)
|
||||
oe_probs = self._prob_map(v25_signal, "OE", {"Even": 0.5, "Odd": 0.5})
|
||||
cards_probs = self._prob_map(v25_signal, "CARDS", {"Under": 0.5, "Over": 0.5})
|
||||
hcap_probs = self._prob_map(v25_signal, "HCAP", {"1": 0.33, "X": 0.34, "2": 0.33})
|
||||
|
||||
# Cup game: dampen home advantage — model trained on league data overestimates home edge
|
||||
is_cup = self._is_cup_game(getattr(data, "league_name", "") or "")
|
||||
if is_cup:
|
||||
# Shift 8% of home probability toward away and draw (rotation, neutral venue effect)
|
||||
cup_transfer = ms_probs["1"] * 0.08
|
||||
ms_probs = {
|
||||
"1": ms_probs["1"] - cup_transfer,
|
||||
"X": ms_probs["X"] + cup_transfer * 0.4,
|
||||
"2": ms_probs["2"] + cup_transfer * 0.6,
|
||||
}
|
||||
total = sum(ms_probs.values())
|
||||
ms_probs = {k: v / total for k, v in ms_probs.items()}
|
||||
|
||||
prediction.ms_home_prob = ms_probs["1"]
|
||||
prediction.ms_draw_prob = ms_probs["X"]
|
||||
prediction.ms_away_prob = ms_probs["2"]
|
||||
prediction.ms_pick, ms_top = self._best_prob_pick(ms_probs)
|
||||
prediction.ms_confidence = ms_top * 100.0
|
||||
|
||||
prediction.dc_1x_prob = prediction.ms_home_prob + prediction.ms_draw_prob
|
||||
prediction.dc_x2_prob = prediction.ms_draw_prob + prediction.ms_away_prob
|
||||
prediction.dc_12_prob = prediction.ms_home_prob + prediction.ms_away_prob
|
||||
dc_probs = {"1X": prediction.dc_1x_prob, "X2": prediction.dc_x2_prob, "12": prediction.dc_12_prob}
|
||||
prediction.dc_pick, dc_top = self._best_prob_pick(dc_probs)
|
||||
prediction.dc_confidence = dc_top * 100.0
|
||||
|
||||
prediction.over_15_prob = ou15_probs["Over"]
|
||||
prediction.under_15_prob = ou15_probs["Under"]
|
||||
prediction.ou15_pick = "1.5 Üst" if prediction.over_15_prob >= prediction.under_15_prob else "1.5 Alt"
|
||||
prediction.ou15_confidence = max(prediction.over_15_prob, prediction.under_15_prob) * 100.0
|
||||
|
||||
prediction.over_25_prob = ou25_probs["Over"]
|
||||
prediction.under_25_prob = ou25_probs["Under"]
|
||||
prediction.ou25_pick = "2.5 Üst" if prediction.over_25_prob >= prediction.under_25_prob else "2.5 Alt"
|
||||
prediction.ou25_confidence = max(prediction.over_25_prob, prediction.under_25_prob) * 100.0
|
||||
|
||||
prediction.over_35_prob = ou35_probs["Over"]
|
||||
prediction.under_35_prob = ou35_probs["Under"]
|
||||
prediction.ou35_pick = "3.5 Üst" if prediction.over_35_prob >= prediction.under_35_prob else "3.5 Alt"
|
||||
prediction.ou35_confidence = max(prediction.over_35_prob, prediction.under_35_prob) * 100.0
|
||||
|
||||
prediction.btts_yes_prob = btts_probs["Yes"]
|
||||
prediction.btts_no_prob = btts_probs["No"]
|
||||
prediction.btts_pick = "KG Var" if prediction.btts_yes_prob >= prediction.btts_no_prob else "KG Yok"
|
||||
prediction.btts_confidence = max(prediction.btts_yes_prob, prediction.btts_no_prob) * 100.0
|
||||
|
||||
prediction.ht_home_prob = ht_probs["1"]
|
||||
prediction.ht_draw_prob = ht_probs["X"]
|
||||
prediction.ht_away_prob = ht_probs["2"]
|
||||
prediction.ht_pick, ht_top = self._best_prob_pick(ht_probs)
|
||||
prediction.ht_confidence = ht_top * 100.0
|
||||
|
||||
prediction.ht_over_05_prob = ht_ou05_probs["Over"]
|
||||
prediction.ht_under_05_prob = ht_ou05_probs["Under"]
|
||||
prediction.ht_ou_pick = "İY 0.5 Üst" if prediction.ht_over_05_prob >= prediction.ht_under_05_prob else "İY 0.5 Alt"
|
||||
|
||||
prediction.ht_over_15_prob = ht_ou15_probs["Over"]
|
||||
prediction.ht_under_15_prob = ht_ou15_probs["Under"]
|
||||
prediction.ht_ou15_pick = "İY 1.5 Üst" if prediction.ht_over_15_prob >= prediction.ht_under_15_prob else "İY 1.5 Alt"
|
||||
|
||||
prediction.ht_ft_probs = htft_probs
|
||||
|
||||
prediction.odd_prob = oe_probs["Odd"]
|
||||
prediction.even_prob = oe_probs["Even"]
|
||||
prediction.odd_even_pick = "Tek" if prediction.odd_prob >= prediction.even_prob else "Çift"
|
||||
|
||||
prediction.cards_over_prob = cards_probs["Over"]
|
||||
prediction.cards_under_prob = cards_probs["Under"]
|
||||
prediction.card_pick = "4.5 Üst" if prediction.cards_over_prob >= prediction.cards_under_prob else "4.5 Alt"
|
||||
prediction.cards_confidence = max(prediction.cards_over_prob, prediction.cards_under_prob) * 100.0
|
||||
|
||||
prediction.handicap_home_prob = hcap_probs["1"]
|
||||
prediction.handicap_draw_prob = hcap_probs["X"]
|
||||
prediction.handicap_away_prob = hcap_probs["2"]
|
||||
prediction.handicap_pick, hcap_top = self._best_prob_pick(hcap_probs)
|
||||
prediction.handicap_confidence = hcap_top * 100.0
|
||||
|
||||
# ── Score Prediction: Model-first, heuristic fallback ──────────
|
||||
ms_edge = prediction.ms_home_prob - prediction.ms_away_prob
|
||||
score_result = self._predict_score_with_model(features)
|
||||
if score_result is not None:
|
||||
# ML model predicted scores
|
||||
prediction.home_xg = score_result["ft_home"]
|
||||
prediction.away_xg = score_result["ft_away"]
|
||||
prediction.total_xg = round(prediction.home_xg + prediction.away_xg, 2)
|
||||
ht_home_xg = score_result["ht_home"]
|
||||
ht_away_xg = score_result["ht_away"]
|
||||
prediction.predicted_ft_score = f"{int(round(prediction.home_xg))}-{int(round(prediction.away_xg))}"
|
||||
prediction.predicted_ht_score = f"{int(round(ht_home_xg))}-{int(round(ht_away_xg))}"
|
||||
else:
|
||||
# Heuristic fallback (original formula)
|
||||
base_home_xg = max(0.25, (float(data.home_goals_avg or 1.3) + float(features.get("away_xga", data.away_conceded_avg) or 1.2)) / 2.0)
|
||||
base_away_xg = max(0.25, (float(data.away_goals_avg or 1.3) + float(features.get("home_xga", data.home_conceded_avg) or 1.2)) / 2.0)
|
||||
# ms_edge already computed above
|
||||
total_target = max(
|
||||
1.4,
|
||||
min(
|
||||
4.8,
|
||||
(float(features.get("league_avg_goals", 2.7)) * 0.55)
|
||||
+ ((float(data.home_goals_avg or 1.3) + float(data.away_goals_avg or 1.3)) * 0.45)
|
||||
+ ((prediction.over_25_prob - prediction.under_25_prob) * 1.15),
|
||||
),
|
||||
)
|
||||
home_xg = max(0.2, base_home_xg + (ms_edge * 0.55) + ((prediction.btts_yes_prob - 0.5) * 0.18))
|
||||
away_xg = max(0.2, base_away_xg - (ms_edge * 0.55) + ((prediction.btts_yes_prob - 0.5) * 0.18))
|
||||
scale = total_target / max(home_xg + away_xg, 0.1)
|
||||
prediction.home_xg = round(home_xg * scale, 2)
|
||||
prediction.away_xg = round(away_xg * scale, 2)
|
||||
prediction.total_xg = round(prediction.home_xg + prediction.away_xg, 2)
|
||||
|
||||
# Cup game: reduce xG by 20% — rotation + lower motivation + defensive tactics
|
||||
if is_cup:
|
||||
prediction.home_xg = round(prediction.home_xg * 0.80, 2)
|
||||
prediction.away_xg = round(prediction.away_xg * 0.80, 2)
|
||||
prediction.total_xg = round(prediction.home_xg + prediction.away_xg, 2)
|
||||
prediction.predicted_ft_score = f"{int(round(prediction.home_xg))}-{int(round(prediction.away_xg))}"
|
||||
prediction.predicted_ht_score = f"{int(round(prediction.home_xg * 0.45))}-{int(round(prediction.away_xg * 0.45))}"
|
||||
prediction.ft_scores_top5 = self._poisson_score_top5(prediction.home_xg, prediction.away_xg)
|
||||
|
||||
# Score prediction: find the most likely scoreline consistent with the MS pick
|
||||
# Instead of just rounding xG (misleading), filter Poisson top scores by result direction
|
||||
ms_pick = prediction.ms_pick # "1", "X", or "2"
|
||||
top5 = prediction.ft_scores_top5
|
||||
if top5 and ms_pick in ("1", "X", "2"):
|
||||
def _result_of(score_str: str) -> str:
|
||||
try:
|
||||
h, a = map(int, score_str.split("-"))
|
||||
if h > a: return "1"
|
||||
if h < a: return "2"
|
||||
return "X"
|
||||
except Exception:
|
||||
return "?"
|
||||
|
||||
# Filter to scorelines matching the predicted result
|
||||
matching = [s for s in top5 if _result_of(s["score"]) == ms_pick]
|
||||
if matching:
|
||||
best = matching[0] # already sorted by probability desc
|
||||
h_str, a_str = best["score"].split("-")
|
||||
prediction.predicted_ft_score = best["score"]
|
||||
# Recalculate HT score proportionally from the FT pick
|
||||
h_val, a_val = int(h_str), int(a_str)
|
||||
prediction.predicted_ht_score = f"{int(round(h_val * 0.45))}-{int(round(a_val * 0.45))}"
|
||||
|
||||
max_market_conf = max(
|
||||
prediction.ms_confidence,
|
||||
prediction.ou15_confidence,
|
||||
prediction.ou25_confidence,
|
||||
prediction.ou35_confidence,
|
||||
prediction.btts_confidence,
|
||||
prediction.ht_confidence,
|
||||
prediction.cards_confidence,
|
||||
prediction.handicap_confidence,
|
||||
)
|
||||
lineup_conf = max(0.0, min(1.0, float(getattr(data, "lineup_confidence", 0.0) or 0.0)))
|
||||
lineup_penalty = 12.0 if data.lineup_source == "none" else max(1.5, (1.0 - lineup_conf) * 8.0) if data.lineup_source == "probable_xi" else 0.0
|
||||
referee_penalty = 6.0 if not data.referee_name else 0.0
|
||||
parity_penalty = 8.0 if abs(ms_edge) < 0.08 else 0.0
|
||||
# Cup game penalty: model trained on league data has lower reliability for cup matches
|
||||
cup_penalty = 10.0 if is_cup else 0.0
|
||||
# Bookmaker margin penalty: high margin signals that even the market is uncertain
|
||||
bm_margin = 0.0
|
||||
odds_data = getattr(data, "odds_data", {}) or {}
|
||||
_h, _d, _a = float(odds_data.get("ms_h") or 0), float(odds_data.get("ms_d") or 0), float(odds_data.get("ms_a") or 0)
|
||||
if _h > 1.01 and _d > 1.01 and _a > 1.01:
|
||||
bm_margin = (1 / _h + 1 / _d + 1 / _a) - 1
|
||||
bookmaker_penalty = 12.0 if bm_margin > 0.20 else 6.0 if bm_margin > 0.15 else 0.0
|
||||
prediction.risk_score = round(min(100.0, max(10.0, 100.0 - max_market_conf + lineup_penalty + referee_penalty + parity_penalty + cup_penalty + bookmaker_penalty)), 1)
|
||||
if prediction.risk_score >= 78:
|
||||
prediction.risk_level = "EXTREME"
|
||||
elif prediction.risk_score >= 62:
|
||||
prediction.risk_level = "HIGH"
|
||||
elif prediction.risk_score >= 40:
|
||||
prediction.risk_level = "MEDIUM"
|
||||
else:
|
||||
prediction.risk_level = "LOW"
|
||||
prediction.is_surprise_risk = prediction.risk_level in {"HIGH", "EXTREME"} or prediction.ms_draw_prob >= 0.30
|
||||
prediction.surprise_type = "balanced_match_risk" if abs(ms_edge) < 0.08 else "draw_pressure" if prediction.ms_draw_prob >= 0.30 else ""
|
||||
prediction.risk_warnings = []
|
||||
if is_cup:
|
||||
prediction.risk_warnings.append("cup_game_home_advantage_reduced")
|
||||
if bookmaker_penalty > 0:
|
||||
prediction.risk_warnings.append(f"bookmaker_margin_high_{bm_margin*100:.0f}pct")
|
||||
if data.lineup_source == "probable_xi":
|
||||
prediction.risk_warnings.append("lineup_probable_not_confirmed")
|
||||
if lineup_conf < 0.65:
|
||||
prediction.risk_warnings.append("lineup_projection_low_confidence")
|
||||
if data.lineup_source == "none":
|
||||
prediction.risk_warnings.append("lineup_unavailable")
|
||||
if not data.referee_name:
|
||||
prediction.risk_warnings.append("missing_referee")
|
||||
if prediction.ms_draw_prob >= 0.30:
|
||||
prediction.risk_warnings.append("draw_probability_elevated")
|
||||
|
||||
prediction.upset_score = int(round(max(0.0, min(100.0, (prediction.ms_draw_prob + min(prediction.ms_home_prob, prediction.ms_away_prob)) * 100.0))))
|
||||
prediction.upset_level = "HIGH" if prediction.upset_score >= 65 else "MEDIUM" if prediction.upset_score >= 45 else "LOW"
|
||||
prediction.upset_reasons = [prediction.surprise_type] if prediction.surprise_type else []
|
||||
surprise = self._build_surprise_profile(data, prediction)
|
||||
prediction.surprise_score = surprise["score"]
|
||||
prediction.surprise_comment = surprise["comment"]
|
||||
prediction.surprise_reasons = surprise["reasons"]
|
||||
prediction.surprise_breakdown = surprise.get("breakdown", [])
|
||||
# Auto-flag is_surprise_risk when score crosses 45 even if other paths didn't fire
|
||||
if surprise["score"] >= 45.0:
|
||||
prediction.is_surprise_risk = True
|
||||
|
||||
prediction.team_confidence = round(max(35.0, min(95.0, 45.0 + (abs(ms_edge) * 85.0) + (abs(float(features.get("form_elo_diff", 0.0))) / 40.0))), 1)
|
||||
prediction.player_confidence = round(max(20.0, min(95.0, 38.0 + (float(features.get("home_key_players", 0.0)) + float(features.get("away_key_players", 0.0))) * 2.0 - (float(features.get("home_missing_impact", 0.0)) + float(features.get("away_missing_impact", 0.0))) * 22.0)), 1)
|
||||
prediction.odds_confidence = round(max(30.0, min(95.0, float(np.mean([prediction.ms_confidence, prediction.ou25_confidence, prediction.btts_confidence])))), 1)
|
||||
prediction.referee_confidence = 62.0 if data.referee_name else 35.0
|
||||
|
||||
prediction.total_cards_pred = 4.8 if prediction.cards_over_prob >= prediction.cards_under_prob else 4.1
|
||||
prediction.total_corners_pred = round(8.8 + (prediction.over_25_prob - 0.5) * 2.5, 1)
|
||||
prediction.corner_pick = "9.5 Üst" if prediction.total_corners_pred >= 9.5 else "9.5 Alt"
|
||||
prediction.analysis_details = {
|
||||
"primary_model": "v25",
|
||||
"features_source": "v25.pre_match",
|
||||
"market_count": len([key for key in v25_signal.keys() if key != "value_bets"]),
|
||||
"lineup_source": data.lineup_source,
|
||||
}
|
||||
return prediction
|
||||
|
||||
def _build_engine_breakdown(self, prediction: FullMatchPrediction) -> Dict[str, Any]:
|
||||
"""
|
||||
Engine breakdown with backward-compatible flat scores + rich detail siblings.
|
||||
|
||||
Shape:
|
||||
{
|
||||
team: 74.1, player: 55.7, odds: 55.2, referee: 62.0, # legacy flat scores
|
||||
detail: { team: {score, label, ...}, player: {...}, ... }
|
||||
}
|
||||
"""
|
||||
components = {
|
||||
"team": ("Takım modeli", float(prediction.team_confidence)),
|
||||
"player": ("Oyuncu / kadro modeli", float(prediction.player_confidence)),
|
||||
"odds": ("Oran piyasası", float(prediction.odds_confidence)),
|
||||
"referee": ("Hakem etkisi", float(prediction.referee_confidence)),
|
||||
}
|
||||
flat: Dict[str, Any] = {}
|
||||
detail: Dict[str, Any] = {}
|
||||
for key, (display, raw) in components.items():
|
||||
score = round(raw, 1)
|
||||
label, interpretation = self._confidence_label(score)
|
||||
flat[key] = score
|
||||
detail[key] = {
|
||||
"score": score,
|
||||
"label": label,
|
||||
"display_name": display,
|
||||
"interpretation": interpretation,
|
||||
}
|
||||
flat["detail"] = detail
|
||||
return flat
|
||||
@@ -0,0 +1,469 @@
|
||||
"""Reversal Mixin — HT/FT reversal watchlist and cycle metrics.
|
||||
|
||||
Auto-extracted mixin module — split from services/single_match_orchestrator.py.
|
||||
All methods here are composed into SingleMatchOrchestrator via inheritance.
|
||||
`self` attributes (self.dsn, self.enrichment, self.v25_predictor, etc.) are
|
||||
initialised in the main __init__.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import math
|
||||
import os
|
||||
import pickle
|
||||
from collections import defaultdict
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple, overload
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
from data.db import get_clean_dsn
|
||||
from schemas.prediction import FullMatchPrediction
|
||||
from schemas.match_data import MatchData
|
||||
from models.v25_ensemble import V25Predictor, get_v25_predictor
|
||||
try:
|
||||
from models.v27_predictor import V27Predictor, compute_divergence, compute_value_edge
|
||||
except ImportError:
|
||||
class V27Predictor: # type: ignore[no-redef]
|
||||
def __init__(self): self.models = {}
|
||||
def load_models(self): return False
|
||||
def predict_all(self, features): return {}
|
||||
def compute_divergence(*args, **kwargs):
|
||||
return {}
|
||||
def compute_value_edge(*args, **kwargs):
|
||||
return {}
|
||||
from features.odds_band_analyzer import OddsBandAnalyzer
|
||||
try:
|
||||
from models.basketball_v25 import (
|
||||
BasketballMatchPrediction,
|
||||
get_basketball_v25_predictor,
|
||||
)
|
||||
except ImportError:
|
||||
BasketballMatchPrediction = Any # type: ignore[misc]
|
||||
def get_basketball_v25_predictor() -> Any:
|
||||
raise ImportError("Basketball predictor is not available")
|
||||
from core.engines.player_predictor import PlayerPrediction, get_player_predictor
|
||||
from services.feature_enrichment import FeatureEnrichmentService
|
||||
from services.betting_brain import BettingBrain
|
||||
from services.v26_shadow_engine import V26ShadowEngine, get_v26_shadow_engine
|
||||
from services.match_commentary import generate_match_commentary
|
||||
from utils.top_leagues import load_top_league_ids
|
||||
from utils.league_reliability import load_league_reliability
|
||||
from config.config_loader import build_threshold_dict, get_threshold_default
|
||||
from models.calibration import get_calibrator
|
||||
|
||||
|
||||
class ReversalMixin:
|
||||
def get_reversal_watchlist(
|
||||
self,
|
||||
count: int = 20,
|
||||
horizon_hours: int = 72,
|
||||
min_score: float = 45.0,
|
||||
top_leagues_only: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
safe_count = max(1, min(100, int(count)))
|
||||
safe_horizon = max(6, min(168, int(horizon_hours)))
|
||||
safe_min_score = max(0.0, min(100.0, float(min_score)))
|
||||
now_ms = int(time.time() * 1000)
|
||||
horizon_ms = now_ms + (safe_horizon * 60 * 60 * 1000)
|
||||
|
||||
with psycopg2.connect(self.dsn) as conn:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT
|
||||
lm.id,
|
||||
lm.home_team_id,
|
||||
lm.away_team_id,
|
||||
lm.league_id,
|
||||
lm.mst_utc
|
||||
FROM live_matches lm
|
||||
WHERE lm.sport = 'football'
|
||||
AND lm.mst_utc >= %s
|
||||
AND lm.mst_utc <= %s
|
||||
ORDER BY lm.mst_utc ASC
|
||||
LIMIT 200
|
||||
""",
|
||||
(now_ms, horizon_ms),
|
||||
)
|
||||
raw_candidates = cur.fetchall()
|
||||
|
||||
candidates = [
|
||||
row
|
||||
for row in raw_candidates
|
||||
if row.get("home_team_id") and row.get("away_team_id")
|
||||
]
|
||||
if top_leagues_only:
|
||||
candidates = [
|
||||
row for row in candidates if self._is_top_league(row.get("league_id"))
|
||||
]
|
||||
|
||||
team_ids: Set[str] = set()
|
||||
pair_keys: Set[Tuple[str, str]] = set()
|
||||
for row in candidates:
|
||||
home_id = str(row["home_team_id"])
|
||||
away_id = str(row["away_team_id"])
|
||||
team_ids.add(home_id)
|
||||
team_ids.add(away_id)
|
||||
h, a = sorted((home_id, away_id))
|
||||
pair_keys.add((h, a))
|
||||
|
||||
team_cycle = self._fetch_team_reversal_cycle_metrics(cur, team_ids, now_ms)
|
||||
h2h_ctx = self._fetch_h2h_reversal_context(cur, pair_keys, now_ms)
|
||||
|
||||
watch_items_all: List[Dict[str, Any]] = []
|
||||
scanned = 0
|
||||
for row in candidates:
|
||||
match_id = str(row["id"])
|
||||
data = self._load_match_data(match_id)
|
||||
if data is None:
|
||||
continue
|
||||
|
||||
package = self.analyze_match(match_id)
|
||||
if not package:
|
||||
continue
|
||||
|
||||
scanned += 1
|
||||
htft_probs = package.get("market_board", {}).get("HTFT", {}).get("probs", {})
|
||||
prob_12 = float(htft_probs.get("1/2", 0.0))
|
||||
prob_21 = float(htft_probs.get("2/1", 0.0))
|
||||
if prob_12 <= 0.0 and prob_21 <= 0.0:
|
||||
continue
|
||||
overall_htft_pick = None
|
||||
overall_htft_prob = 0.0
|
||||
if htft_probs:
|
||||
overall_htft_pick, overall_htft_prob = max(
|
||||
htft_probs.items(),
|
||||
key=lambda item: float(item[1]),
|
||||
)
|
||||
|
||||
reversal_sum = prob_12 + prob_21
|
||||
reversal_max = max(prob_12, prob_21)
|
||||
top_pick = "2/1" if prob_21 >= prob_12 else "1/2"
|
||||
top_prob = prob_21 if top_pick == "2/1" else prob_12
|
||||
|
||||
ms_h = self._to_float(data.odds_data.get("ms_h"), 0.0)
|
||||
ms_a = self._to_float(data.odds_data.get("ms_a"), 0.0)
|
||||
gap = abs(ms_h - ms_a) if ms_h > 1.0 and ms_a > 1.0 else 0.0
|
||||
favorite_odd = min(ms_h, ms_a) if ms_h > 1.0 and ms_a > 1.0 else 0.0
|
||||
|
||||
# Reversal events are rare (~5% baseline), so convert raw probs to a more useful
|
||||
# watchlist scale where p in [0.02, 0.08] becomes meaningfully separable.
|
||||
base_score = (reversal_max * 100.0 * 8.0) + (reversal_sum * 100.0 * 4.0)
|
||||
|
||||
balance_bonus = 0.0
|
||||
if gap > 0.0:
|
||||
balance_bonus = max(0.0, (1.0 - min(gap, 1.2) / 1.2) * 7.0)
|
||||
elif ms_h > 1.0 and ms_a > 1.0:
|
||||
balance_bonus = 2.0
|
||||
|
||||
favorite_bonus = 0.0
|
||||
if favorite_odd > 0.0 and favorite_odd <= 1.70 and reversal_max >= 0.02:
|
||||
favorite_bonus = min(8.0, (1.70 - favorite_odd) * 12.0)
|
||||
|
||||
home_metrics = team_cycle.get(data.home_team_id, {})
|
||||
away_metrics = team_cycle.get(data.away_team_id, {})
|
||||
cycle_pressure = max(
|
||||
float(home_metrics.get("cycle_pressure", 0.0)),
|
||||
float(away_metrics.get("cycle_pressure", 0.0)),
|
||||
)
|
||||
cycle_bonus = cycle_pressure * 10.0
|
||||
|
||||
h, a = sorted((data.home_team_id, data.away_team_id))
|
||||
pair_key = (h, a)
|
||||
pair_ctx = h2h_ctx.get(pair_key, {})
|
||||
blowout_bonus = 0.0
|
||||
last_diff = int(pair_ctx.get("goal_diff", 0))
|
||||
if abs(last_diff) >= 3:
|
||||
blowout_bonus = 6.0
|
||||
if abs(last_diff) >= 5:
|
||||
blowout_bonus += 3.0
|
||||
|
||||
ou25_o = self._to_float(data.odds_data.get("ou25_o"), 0.0)
|
||||
tempo_bonus = 0.0
|
||||
if ou25_o > 1.0 and ou25_o <= 1.72:
|
||||
tempo_bonus = 2.5
|
||||
|
||||
watch_score = max(
|
||||
0.0,
|
||||
min(
|
||||
100.0,
|
||||
base_score + balance_bonus + favorite_bonus + cycle_bonus + blowout_bonus + tempo_bonus,
|
||||
),
|
||||
)
|
||||
reason_codes: List[str] = []
|
||||
if top_prob >= 0.045:
|
||||
reason_codes.append("reversal_prob_hot")
|
||||
elif top_prob >= 0.030:
|
||||
reason_codes.append("reversal_prob_warm")
|
||||
if gap > 0.0 and gap <= 0.80:
|
||||
reason_codes.append("balanced_matchup")
|
||||
if favorite_bonus > 0.0:
|
||||
reason_codes.append("strong_favorite_reversal_window")
|
||||
if cycle_pressure >= 0.55:
|
||||
reason_codes.append("team_reversal_cycle_pressure")
|
||||
if blowout_bonus > 0.0:
|
||||
reason_codes.append("h2h_blowout_rematch")
|
||||
if tempo_bonus > 0.0:
|
||||
reason_codes.append("high_tempo_profile")
|
||||
if not reason_codes:
|
||||
reason_codes.append("model_signal_only")
|
||||
|
||||
item = (
|
||||
{
|
||||
"match_id": data.match_id,
|
||||
"match_name": f"{data.home_team_name} vs {data.away_team_name}",
|
||||
"match_date_ms": data.match_date_ms,
|
||||
"league_id": data.league_id,
|
||||
"league": data.league_name,
|
||||
"risk_band": self._watchlist_risk_band(watch_score),
|
||||
"watch_score": round(watch_score, 2),
|
||||
"top_pick": top_pick,
|
||||
"top_pick_prob": round(top_prob, 4),
|
||||
"top_pick_scope": "reversal_only",
|
||||
"overall_htft_pick": overall_htft_pick,
|
||||
"overall_htft_pick_prob": round(float(overall_htft_prob), 4),
|
||||
"reversal_probs": {
|
||||
"1/2": round(prob_12, 4),
|
||||
"2/1": round(prob_21, 4),
|
||||
},
|
||||
"odds_snapshot": {
|
||||
"ms_h": round(ms_h, 2) if ms_h > 0 else None,
|
||||
"ms_a": round(ms_a, 2) if ms_a > 0 else None,
|
||||
"ms_gap": round(gap, 3),
|
||||
"favorite_odd": round(favorite_odd, 2) if favorite_odd > 0 else None,
|
||||
},
|
||||
"pattern_signals": {
|
||||
"home_cycle_pressure": round(float(home_metrics.get("cycle_pressure", 0.0)), 3),
|
||||
"away_cycle_pressure": round(float(away_metrics.get("cycle_pressure", 0.0)), 3),
|
||||
"home_matches_since_last_reversal": int(home_metrics.get("matches_since_last_reversal", 99)),
|
||||
"away_matches_since_last_reversal": int(away_metrics.get("matches_since_last_reversal", 99)),
|
||||
"h2h_last_goal_diff": last_diff if pair_ctx else None,
|
||||
"h2h_last_result": pair_ctx.get("result"),
|
||||
},
|
||||
"reason_codes": reason_codes,
|
||||
}
|
||||
)
|
||||
watch_items_all.append(item)
|
||||
|
||||
watch_items_all.sort(
|
||||
key=lambda item: (
|
||||
float(item.get("watch_score", 0.0)),
|
||||
float(item.get("top_pick_prob", 0.0)),
|
||||
),
|
||||
reverse=True,
|
||||
)
|
||||
|
||||
selected = [
|
||||
item for item in watch_items_all if float(item.get("watch_score", 0.0)) >= safe_min_score
|
||||
][:safe_count]
|
||||
preview = watch_items_all[: min(5, len(watch_items_all))]
|
||||
return {
|
||||
"engine": "v28.main",
|
||||
"generated_at": __import__("datetime").datetime.utcnow().isoformat() + "Z",
|
||||
"horizon_hours": safe_horizon,
|
||||
"min_score": round(safe_min_score, 2),
|
||||
"top_leagues_only": bool(top_leagues_only),
|
||||
"scanned_matches": scanned,
|
||||
"candidate_matches": len(candidates),
|
||||
"listed_matches": len(selected),
|
||||
"watchlist": selected,
|
||||
"top_candidates_preview": preview,
|
||||
}
|
||||
|
||||
def _fetch_team_reversal_cycle_metrics(
|
||||
self,
|
||||
cur: RealDictCursor,
|
||||
team_ids: Set[str],
|
||||
now_ms: int,
|
||||
) -> Dict[str, Dict[str, float]]:
|
||||
if not team_ids:
|
||||
return {}
|
||||
|
||||
cur.execute(
|
||||
"""
|
||||
WITH team_matches AS (
|
||||
SELECT
|
||||
m.home_team_id AS team_id,
|
||||
m.mst_utc,
|
||||
CASE
|
||||
WHEN m.ht_score_home > m.ht_score_away THEN 'L'
|
||||
WHEN m.ht_score_home < m.ht_score_away THEN 'T'
|
||||
ELSE 'D'
|
||||
END AS ht_state,
|
||||
CASE
|
||||
WHEN m.score_home > m.score_away THEN 'W'
|
||||
WHEN m.score_home < m.score_away THEN 'L'
|
||||
ELSE 'D'
|
||||
END AS ft_state
|
||||
FROM matches m
|
||||
WHERE m.status = 'FT'
|
||||
AND m.score_home IS NOT NULL
|
||||
AND m.score_away IS NOT NULL
|
||||
AND m.ht_score_home IS NOT NULL
|
||||
AND m.ht_score_away IS NOT NULL
|
||||
AND m.home_team_id = ANY(%s)
|
||||
AND m.mst_utc < %s
|
||||
UNION ALL
|
||||
SELECT
|
||||
m.away_team_id AS team_id,
|
||||
m.mst_utc,
|
||||
CASE
|
||||
WHEN m.ht_score_away > m.ht_score_home THEN 'L'
|
||||
WHEN m.ht_score_away < m.ht_score_home THEN 'T'
|
||||
ELSE 'D'
|
||||
END AS ht_state,
|
||||
CASE
|
||||
WHEN m.score_away > m.score_home THEN 'W'
|
||||
WHEN m.score_away < m.score_home THEN 'L'
|
||||
ELSE 'D'
|
||||
END AS ft_state
|
||||
FROM matches m
|
||||
WHERE m.status = 'FT'
|
||||
AND m.score_home IS NOT NULL
|
||||
AND m.score_away IS NOT NULL
|
||||
AND m.ht_score_home IS NOT NULL
|
||||
AND m.ht_score_away IS NOT NULL
|
||||
AND m.away_team_id = ANY(%s)
|
||||
AND m.mst_utc < %s
|
||||
),
|
||||
ranked AS (
|
||||
SELECT
|
||||
team_id,
|
||||
mst_utc,
|
||||
ht_state,
|
||||
ft_state,
|
||||
ROW_NUMBER() OVER (PARTITION BY team_id ORDER BY mst_utc DESC) AS rn
|
||||
FROM team_matches
|
||||
)
|
||||
SELECT team_id, mst_utc, ht_state, ft_state
|
||||
FROM ranked
|
||||
WHERE rn <= 80
|
||||
ORDER BY team_id ASC, mst_utc DESC
|
||||
""",
|
||||
(list(team_ids), now_ms, list(team_ids), now_ms),
|
||||
)
|
||||
rows = cur.fetchall()
|
||||
|
||||
by_team: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
|
||||
for row in rows:
|
||||
by_team[str(row["team_id"])].append(row)
|
||||
|
||||
out: Dict[str, Dict[str, float]] = {}
|
||||
for team_id in team_ids:
|
||||
team_rows = by_team.get(str(team_id), [])
|
||||
if not team_rows:
|
||||
out[str(team_id)] = {
|
||||
"recent_reversal_rate": 0.0,
|
||||
"matches_since_last_reversal": 99.0,
|
||||
"avg_gap_matches": 12.0,
|
||||
"cycle_pressure": 0.0,
|
||||
}
|
||||
continue
|
||||
|
||||
reversal_indexes: List[int] = []
|
||||
recent_reversal = 0
|
||||
recent_n = min(15, len(team_rows))
|
||||
for idx, row in enumerate(team_rows, start=1):
|
||||
ht_state = str(row.get("ht_state") or "")
|
||||
ft_state = str(row.get("ft_state") or "")
|
||||
is_reversal = (ht_state == "L" and ft_state == "L") or (ht_state == "T" and ft_state == "W")
|
||||
if idx <= recent_n and is_reversal:
|
||||
recent_reversal += 1
|
||||
if is_reversal:
|
||||
reversal_indexes.append(idx)
|
||||
|
||||
recent_rate = (recent_reversal / recent_n) if recent_n > 0 else 0.0
|
||||
since_last = float(reversal_indexes[0]) if reversal_indexes else 99.0
|
||||
|
||||
gaps: List[float] = []
|
||||
if len(reversal_indexes) >= 2:
|
||||
for i in range(1, len(reversal_indexes)):
|
||||
gaps.append(float(reversal_indexes[i] - reversal_indexes[i - 1]))
|
||||
avg_gap = (sum(gaps) / len(gaps)) if gaps else 12.0
|
||||
if avg_gap <= 0:
|
||||
avg_gap = 12.0
|
||||
|
||||
cycle_pressure = 0.0
|
||||
if reversal_indexes:
|
||||
tolerance = max(3.0, avg_gap * 0.7)
|
||||
diff = abs(since_last - avg_gap)
|
||||
cycle_pressure = max(0.0, 1.0 - (diff / tolerance))
|
||||
|
||||
out[str(team_id)] = {
|
||||
"recent_reversal_rate": round(recent_rate, 4),
|
||||
"matches_since_last_reversal": round(since_last, 2),
|
||||
"avg_gap_matches": round(avg_gap, 2),
|
||||
"cycle_pressure": round(cycle_pressure, 4),
|
||||
}
|
||||
return out
|
||||
|
||||
def _fetch_h2h_reversal_context(
|
||||
self,
|
||||
cur: RealDictCursor,
|
||||
pair_keys: Set[Tuple[str, str]],
|
||||
now_ms: int,
|
||||
) -> Dict[Tuple[str, str], Dict[str, Any]]:
|
||||
if not pair_keys:
|
||||
return {}
|
||||
|
||||
team_ids = sorted({team_id for pair in pair_keys for team_id in pair})
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT
|
||||
m.home_team_id,
|
||||
m.away_team_id,
|
||||
m.score_home,
|
||||
m.score_away,
|
||||
m.ht_score_home,
|
||||
m.ht_score_away,
|
||||
m.mst_utc
|
||||
FROM matches m
|
||||
WHERE m.status = 'FT'
|
||||
AND m.score_home IS NOT NULL
|
||||
AND m.score_away IS NOT NULL
|
||||
AND m.home_team_id = ANY(%s)
|
||||
AND m.away_team_id = ANY(%s)
|
||||
AND m.mst_utc < %s
|
||||
ORDER BY m.mst_utc DESC
|
||||
LIMIT 4000
|
||||
""",
|
||||
(team_ids, team_ids, now_ms),
|
||||
)
|
||||
rows = cur.fetchall()
|
||||
|
||||
out: Dict[Tuple[str, str], Dict[str, Any]] = {}
|
||||
for row in rows:
|
||||
home_id = str(row["home_team_id"])
|
||||
away_id = str(row["away_team_id"])
|
||||
h, a = sorted((home_id, away_id))
|
||||
key = (h, a)
|
||||
if key not in pair_keys or key in out:
|
||||
continue
|
||||
|
||||
score_home = int(row["score_home"])
|
||||
score_away = int(row["score_away"])
|
||||
goal_diff = score_home - score_away
|
||||
out[key] = {
|
||||
"goal_diff": goal_diff,
|
||||
"result": f"{score_home}-{score_away}",
|
||||
"match_date_ms": int(row["mst_utc"] or 0),
|
||||
}
|
||||
if len(out) >= len(pair_keys):
|
||||
break
|
||||
|
||||
return out
|
||||
|
||||
@staticmethod
|
||||
def _watchlist_risk_band(score: float) -> str:
|
||||
if score >= 68.0:
|
||||
return "HIGH"
|
||||
if score >= 54.0:
|
||||
return "MEDIUM"
|
||||
return "LOW"
|
||||
@@ -0,0 +1,350 @@
|
||||
"""Upper Brain Mixin — V27 cross-check guards and assessments.
|
||||
|
||||
Auto-extracted mixin module — split from services/single_match_orchestrator.py.
|
||||
All methods here are composed into SingleMatchOrchestrator via inheritance.
|
||||
`self` attributes (self.dsn, self.enrichment, self.v25_predictor, etc.) are
|
||||
initialised in the main __init__.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import math
|
||||
import os
|
||||
import pickle
|
||||
from collections import defaultdict
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple, overload
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
from data.db import get_clean_dsn
|
||||
from schemas.prediction import FullMatchPrediction
|
||||
from schemas.match_data import MatchData
|
||||
from models.v25_ensemble import V25Predictor, get_v25_predictor
|
||||
try:
|
||||
from models.v27_predictor import V27Predictor, compute_divergence, compute_value_edge
|
||||
except ImportError:
|
||||
class V27Predictor: # type: ignore[no-redef]
|
||||
def __init__(self): self.models = {}
|
||||
def load_models(self): return False
|
||||
def predict_all(self, features): return {}
|
||||
def compute_divergence(*args, **kwargs):
|
||||
return {}
|
||||
def compute_value_edge(*args, **kwargs):
|
||||
return {}
|
||||
from features.odds_band_analyzer import OddsBandAnalyzer
|
||||
try:
|
||||
from models.basketball_v25 import (
|
||||
BasketballMatchPrediction,
|
||||
get_basketball_v25_predictor,
|
||||
)
|
||||
except ImportError:
|
||||
BasketballMatchPrediction = Any # type: ignore[misc]
|
||||
def get_basketball_v25_predictor() -> Any:
|
||||
raise ImportError("Basketball predictor is not available")
|
||||
from core.engines.player_predictor import PlayerPrediction, get_player_predictor
|
||||
from services.feature_enrichment import FeatureEnrichmentService
|
||||
from services.betting_brain import BettingBrain
|
||||
from services.v26_shadow_engine import V26ShadowEngine, get_v26_shadow_engine
|
||||
from services.match_commentary import generate_match_commentary
|
||||
from utils.top_leagues import load_top_league_ids
|
||||
from utils.league_reliability import load_league_reliability
|
||||
from config.config_loader import build_threshold_dict, get_threshold_default
|
||||
from models.calibration import get_calibrator
|
||||
|
||||
|
||||
class UpperBrainMixin:
|
||||
def _apply_upper_brain_guards(self, package: Dict[str, Any]) -> Dict[str, Any]:
|
||||
return BettingBrain().judge(package)
|
||||
|
||||
v27_engine = package.get("v27_engine")
|
||||
if not isinstance(v27_engine, dict) or not v27_engine.get("triple_value"):
|
||||
return package
|
||||
|
||||
guarded = dict(package)
|
||||
vetoed_keys = set()
|
||||
guarded_keys = set()
|
||||
|
||||
def mark_guard(item: Dict[str, Any]) -> Dict[str, Any]:
|
||||
if not isinstance(item, dict):
|
||||
return item
|
||||
|
||||
out = dict(item)
|
||||
assessment = self._upper_brain_assessment(out, guarded)
|
||||
if not assessment.get("applies"):
|
||||
return out
|
||||
|
||||
key = f"{out.get('market')}:{out.get('pick')}"
|
||||
guarded_keys.add(key)
|
||||
out["upper_brain"] = assessment
|
||||
|
||||
reason_key = "decision_reasons" if "decision_reasons" in out else "reasons"
|
||||
reasons = list(out.get(reason_key) or [])
|
||||
for reason in assessment.get("reason_codes", []):
|
||||
if reason not in reasons:
|
||||
reasons.append(reason)
|
||||
out[reason_key] = reasons[:6]
|
||||
|
||||
if assessment.get("veto"):
|
||||
vetoed_keys.add(key)
|
||||
out["playable"] = False
|
||||
out["stake_units"] = 0.0
|
||||
out["bet_grade"] = "PASS"
|
||||
out["is_guaranteed"] = False
|
||||
out["pick_reason"] = "upper_brain_veto"
|
||||
if "signal_tier" in out:
|
||||
out["signal_tier"] = "PASS"
|
||||
elif assessment.get("downgrade"):
|
||||
out["is_guaranteed"] = False
|
||||
if out.get("signal_tier") == "CORE":
|
||||
out["signal_tier"] = "LEAN"
|
||||
if out.get("pick_reason") == "high_accuracy_market":
|
||||
out["pick_reason"] = "upper_brain_downgraded"
|
||||
|
||||
return out
|
||||
|
||||
main_pick = mark_guard(guarded.get("main_pick") or {})
|
||||
value_pick = mark_guard(guarded.get("value_pick") or {}) if guarded.get("value_pick") else None
|
||||
supporting = [
|
||||
mark_guard(row)
|
||||
for row in list(guarded.get("supporting_picks") or [])
|
||||
if isinstance(row, dict)
|
||||
]
|
||||
bet_summary = [
|
||||
mark_guard(row)
|
||||
for row in list(guarded.get("bet_summary") or [])
|
||||
if isinstance(row, dict)
|
||||
]
|
||||
|
||||
main_safe = bool(main_pick and main_pick.get("playable") and not main_pick.get("upper_brain", {}).get("veto"))
|
||||
if not main_safe:
|
||||
candidates = [
|
||||
row for row in supporting
|
||||
if row.get("playable")
|
||||
and not row.get("upper_brain", {}).get("veto")
|
||||
and float(row.get("odds", 0.0) or 0.0) >= 1.30
|
||||
]
|
||||
candidates.sort(key=lambda row: float(row.get("play_score", 0.0) or 0.0), reverse=True)
|
||||
if candidates:
|
||||
main_pick = dict(candidates[0])
|
||||
main_pick["is_guaranteed"] = False
|
||||
main_pick["pick_reason"] = "upper_brain_reselected"
|
||||
reasons = list(main_pick.get("decision_reasons") or [])
|
||||
if "upper_brain_reselected_after_veto" not in reasons:
|
||||
reasons.append("upper_brain_reselected_after_veto")
|
||||
main_pick["decision_reasons"] = reasons[:6]
|
||||
elif main_pick:
|
||||
main_pick["is_guaranteed"] = False
|
||||
main_pick["pick_reason"] = "upper_brain_no_safe_pick"
|
||||
|
||||
if main_pick:
|
||||
supporting = [
|
||||
row for row in supporting
|
||||
if not (
|
||||
row.get("market") == main_pick.get("market")
|
||||
and row.get("pick") == main_pick.get("pick")
|
||||
)
|
||||
][:6]
|
||||
|
||||
guarded["main_pick"] = main_pick if main_pick else None
|
||||
guarded["value_pick"] = value_pick
|
||||
guarded["supporting_picks"] = supporting
|
||||
guarded["bet_summary"] = bet_summary
|
||||
|
||||
playable = bool(main_pick and main_pick.get("playable") and not main_pick.get("upper_brain", {}).get("veto"))
|
||||
advice = dict(guarded.get("bet_advice") or {})
|
||||
advice["playable"] = playable
|
||||
advice["suggested_stake_units"] = float(main_pick.get("stake_units", 0.0)) if playable else 0.0
|
||||
if playable:
|
||||
advice["reason"] = "playable_pick_found"
|
||||
elif vetoed_keys:
|
||||
advice["reason"] = "upper_brain_no_safe_pick"
|
||||
else:
|
||||
advice["reason"] = "no_bet_conditions_met"
|
||||
guarded["bet_advice"] = advice
|
||||
|
||||
guarded["upper_brain"] = {
|
||||
"applied": True,
|
||||
"guarded_count": len(guarded_keys),
|
||||
"vetoed_count": len(vetoed_keys),
|
||||
"vetoed": sorted(vetoed_keys)[:8],
|
||||
"rules": {
|
||||
"min_band_sample": 8,
|
||||
"max_v25_v27_divergence": 0.18,
|
||||
"dc_requires_triple_value": True,
|
||||
},
|
||||
}
|
||||
guarded.setdefault("analysis_details", {})
|
||||
guarded["analysis_details"]["upper_brain_guards_applied"] = True
|
||||
guarded["analysis_details"]["upper_brain_vetoed_count"] = len(vetoed_keys)
|
||||
return guarded
|
||||
|
||||
def _upper_brain_assessment(
|
||||
self,
|
||||
item: Dict[str, Any],
|
||||
package: Dict[str, Any],
|
||||
) -> Dict[str, Any]:
|
||||
market = str(item.get("market") or "")
|
||||
pick = str(item.get("pick") or "")
|
||||
if not market or not pick:
|
||||
return {"applies": False}
|
||||
|
||||
v27_engine = package.get("v27_engine") or {}
|
||||
triple_value = v27_engine.get("triple_value") or {}
|
||||
model_prob = self._upper_brain_market_probability(item, package)
|
||||
v27_prob = self._upper_brain_v27_probability(market, pick, v27_engine)
|
||||
triple_key = self._upper_brain_triple_key(market, pick)
|
||||
triple = triple_value.get(triple_key) if triple_key else None
|
||||
|
||||
veto = False
|
||||
downgrade = False
|
||||
reasons: List[str] = []
|
||||
divergence = None
|
||||
|
||||
if model_prob is not None and v27_prob is not None:
|
||||
divergence = abs(float(model_prob) - float(v27_prob))
|
||||
if divergence >= 0.18:
|
||||
veto = True
|
||||
reasons.append("upper_brain_v25_v27_divergence")
|
||||
elif divergence >= 0.12:
|
||||
downgrade = True
|
||||
reasons.append("upper_brain_v25_v27_warning")
|
||||
|
||||
if isinstance(triple, dict):
|
||||
band_sample = int(float(triple.get("band_sample", 0) or 0))
|
||||
is_value = bool(triple.get("is_value"))
|
||||
if market == "DC":
|
||||
if band_sample < 8:
|
||||
veto = True
|
||||
reasons.append("upper_brain_band_sample_too_low")
|
||||
elif not is_value:
|
||||
veto = True
|
||||
reasons.append("upper_brain_triple_value_rejected")
|
||||
elif market in {"MS", "OU25"} and band_sample > 0 and band_sample < 8:
|
||||
downgrade = True
|
||||
reasons.append("upper_brain_band_sample_thin")
|
||||
elif market in {"OU15", "HT_OU05"} and band_sample < 8:
|
||||
downgrade = True
|
||||
reasons.append("upper_brain_band_sample_thin")
|
||||
|
||||
consensus = str(v27_engine.get("consensus") or "").upper()
|
||||
if consensus == "DISAGREE" and market in {"MS", "DC"} and not veto:
|
||||
downgrade = True
|
||||
reasons.append("upper_brain_consensus_disagree")
|
||||
|
||||
applies = bool(reasons or triple is not None or v27_prob is not None)
|
||||
return {
|
||||
"applies": applies,
|
||||
"veto": veto,
|
||||
"downgrade": downgrade,
|
||||
"reason_codes": reasons,
|
||||
"model_prob": round(float(model_prob), 4) if model_prob is not None else None,
|
||||
"v27_prob": round(float(v27_prob), 4) if v27_prob is not None else None,
|
||||
"divergence": round(float(divergence), 4) if divergence is not None else None,
|
||||
"triple_key": triple_key,
|
||||
"triple_value": triple,
|
||||
}
|
||||
|
||||
def _upper_brain_market_probability(
|
||||
self,
|
||||
item: Dict[str, Any],
|
||||
package: Dict[str, Any],
|
||||
) -> Optional[float]:
|
||||
raw_prob = item.get("probability")
|
||||
if raw_prob is not None:
|
||||
try:
|
||||
return float(raw_prob)
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
|
||||
market = str(item.get("market") or "")
|
||||
pick = str(item.get("pick") or "")
|
||||
board = package.get("market_board") or {}
|
||||
payload = board.get(market) if isinstance(board, dict) else None
|
||||
probs = payload.get("probs") if isinstance(payload, dict) else None
|
||||
if not isinstance(probs, dict):
|
||||
return None
|
||||
|
||||
prob_key = self._upper_brain_prob_key(market, pick)
|
||||
if prob_key is None:
|
||||
return None
|
||||
return self._safe_float(probs.get(prob_key))
|
||||
|
||||
def _upper_brain_v27_probability(
|
||||
self,
|
||||
market: str,
|
||||
pick: str,
|
||||
v27_engine: Dict[str, Any],
|
||||
) -> Optional[float]:
|
||||
predictions = v27_engine.get("predictions") or {}
|
||||
ms = predictions.get("ms") or {}
|
||||
ou25 = predictions.get("ou25") or {}
|
||||
|
||||
if market == "MS":
|
||||
ms_key = {"1": "home", "X": "draw", "2": "away"}.get(pick or "")
|
||||
return self._safe_float(ms.get(ms_key), 0.0) if ms_key else 0.0
|
||||
if market == "DC":
|
||||
if pick == "1X":
|
||||
return self._safe_float(ms.get("home"), 0.0) + self._safe_float(ms.get("draw"), 0.0)
|
||||
if pick == "X2":
|
||||
return self._safe_float(ms.get("draw"), 0.0) + self._safe_float(ms.get("away"), 0.0)
|
||||
if pick == "12":
|
||||
return self._safe_float(ms.get("home"), 0.0) + self._safe_float(ms.get("away"), 0.0)
|
||||
if market == "OU25":
|
||||
prob_key = self._upper_brain_prob_key(market, pick)
|
||||
return self._safe_float(ou25.get(prob_key), 0.0) if prob_key else 0.0
|
||||
return 0.0
|
||||
|
||||
@staticmethod
|
||||
def _upper_brain_prob_key(market: str, pick: str) -> Optional[str]:
|
||||
pick_norm = str(pick or "").strip().casefold()
|
||||
if market in {"MS", "HT", "HCAP"}:
|
||||
return pick if pick in {"1", "X", "2"} else None
|
||||
if market == "DC":
|
||||
return pick.upper() if pick.upper() in {"1X", "X2", "12"} else None
|
||||
if market in {"OU15", "OU25", "OU35", "HT_OU05", "HT_OU15", "CARDS"}:
|
||||
if "over" in pick_norm or "st" in pick_norm:
|
||||
return "over"
|
||||
if "under" in pick_norm or "alt" in pick_norm:
|
||||
return "under"
|
||||
if market == "BTTS":
|
||||
if "yes" in pick_norm or "var" in pick_norm:
|
||||
return "yes"
|
||||
if "no" in pick_norm or "yok" in pick_norm:
|
||||
return "no"
|
||||
if market == "OE":
|
||||
if "odd" in pick_norm or "tek" in pick_norm:
|
||||
return "odd"
|
||||
if "even" in pick_norm or "ift" in pick_norm:
|
||||
return "even"
|
||||
if market == "HTFT" and "/" in pick:
|
||||
return pick
|
||||
return None
|
||||
|
||||
def _upper_brain_triple_key(self, market: str, pick: str) -> Optional[str]:
|
||||
prob_key = self._upper_brain_prob_key(market, pick)
|
||||
if market == "MS":
|
||||
return {"1": "home", "2": "away"}.get(pick)
|
||||
if market == "DC":
|
||||
return f"dc_{pick.lower()}" if pick.upper() in {"1X", "X2", "12"} else None
|
||||
if market in {"OU15", "OU25", "OU35"} and prob_key == "over":
|
||||
return f"{market.lower()}_over"
|
||||
if market == "BTTS" and prob_key == "yes":
|
||||
return "btts_yes"
|
||||
if market == "HT":
|
||||
return {"1": "ht_home", "2": "ht_away"}.get(pick)
|
||||
if market in {"HT_OU05", "HT_OU15"} and prob_key == "over":
|
||||
return f"{market.lower()}_over"
|
||||
if market == "OE" and prob_key == "odd":
|
||||
return "oe_odd"
|
||||
if market == "CARDS" and prob_key == "over":
|
||||
return "cards_over"
|
||||
if market == "HTFT" and "/" in pick:
|
||||
return f"htft_{pick.replace('/', '').lower()}"
|
||||
return None
|
||||
@@ -0,0 +1,174 @@
|
||||
"""Utility Mixin — generic helpers (safe_float, label normalisation, JSON parsing).
|
||||
|
||||
Auto-extracted mixin module — split from services/single_match_orchestrator.py.
|
||||
All methods here are composed into SingleMatchOrchestrator via inheritance.
|
||||
`self` attributes (self.dsn, self.enrichment, self.v25_predictor, etc.) are
|
||||
initialised in the main __init__.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import math
|
||||
import os
|
||||
import pickle
|
||||
from collections import defaultdict
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple, overload
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
from data.db import get_clean_dsn
|
||||
from schemas.prediction import FullMatchPrediction
|
||||
from schemas.match_data import MatchData
|
||||
from models.v25_ensemble import V25Predictor, get_v25_predictor
|
||||
try:
|
||||
from models.v27_predictor import V27Predictor, compute_divergence, compute_value_edge
|
||||
except ImportError:
|
||||
class V27Predictor: # type: ignore[no-redef]
|
||||
def __init__(self): self.models = {}
|
||||
def load_models(self): return False
|
||||
def predict_all(self, features): return {}
|
||||
def compute_divergence(*args, **kwargs):
|
||||
return {}
|
||||
def compute_value_edge(*args, **kwargs):
|
||||
return {}
|
||||
from features.odds_band_analyzer import OddsBandAnalyzer
|
||||
try:
|
||||
from models.basketball_v25 import (
|
||||
BasketballMatchPrediction,
|
||||
get_basketball_v25_predictor,
|
||||
)
|
||||
except ImportError:
|
||||
BasketballMatchPrediction = Any # type: ignore[misc]
|
||||
def get_basketball_v25_predictor() -> Any:
|
||||
raise ImportError("Basketball predictor is not available")
|
||||
from core.engines.player_predictor import PlayerPrediction, get_player_predictor
|
||||
from services.feature_enrichment import FeatureEnrichmentService
|
||||
from services.betting_brain import BettingBrain
|
||||
from services.v26_shadow_engine import V26ShadowEngine, get_v26_shadow_engine
|
||||
from services.match_commentary import generate_match_commentary
|
||||
from utils.top_leagues import load_top_league_ids
|
||||
from utils.league_reliability import load_league_reliability
|
||||
from config.config_loader import build_threshold_dict, get_threshold_default
|
||||
from models.calibration import get_calibrator
|
||||
|
||||
|
||||
class UtilsMixin:
|
||||
@staticmethod
|
||||
@overload
|
||||
def _safe_float(value: Any, default: float) -> float: ...
|
||||
|
||||
@staticmethod
|
||||
@overload
|
||||
def _safe_float(value: Any, default: None = ...) -> Optional[float]: ...
|
||||
|
||||
@staticmethod
|
||||
def _safe_float(value: Any, default: Optional[float] = None) -> Optional[float]:
|
||||
try:
|
||||
return float(value)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
@staticmethod
|
||||
def _safe_float(value: Any, default: float = 0.0) -> float:
|
||||
try:
|
||||
return float(value)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
@staticmethod
|
||||
def _calibrator_key(market: str, pick: str) -> Optional[str]:
|
||||
"""Map (market, pick) → trained-calibrator key in models/calibration."""
|
||||
m = (market or "").upper()
|
||||
p = (pick or "").strip().casefold()
|
||||
if m == "MS":
|
||||
if p == "1":
|
||||
return "ms_home"
|
||||
if p == "x" or p == "0":
|
||||
return "ms_draw"
|
||||
if p == "2":
|
||||
return "ms_away"
|
||||
return None
|
||||
if m == "DC":
|
||||
return "dc"
|
||||
if m == "OU15" and ("over" in p or "üst" in p or "ust" in p):
|
||||
return "ou15"
|
||||
if m == "OU25" and ("over" in p or "üst" in p or "ust" in p):
|
||||
return "ou25"
|
||||
if m == "OU35" and ("over" in p or "üst" in p or "ust" in p):
|
||||
return "ou35"
|
||||
if m == "BTTS" and ("yes" in p or "var" in p):
|
||||
return "btts"
|
||||
if m == "HT":
|
||||
if p == "1":
|
||||
return "ht_home"
|
||||
if p == "x" or p == "0":
|
||||
return "ht_draw"
|
||||
if p == "2":
|
||||
return "ht_away"
|
||||
return None
|
||||
if m == "HTFT":
|
||||
return "ht_ft"
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _confidence_label(score: float) -> Tuple[str, str]:
|
||||
"""Turkish UX label + interpretation for a 0-100 confidence score."""
|
||||
if score >= 75:
|
||||
return "YUKSEK", "Bu sinyal güçlü ve güvenilir"
|
||||
if score >= 60:
|
||||
return "ORTA", "Sinyal makul, çelişen veri yok"
|
||||
if score >= 45:
|
||||
return "DUSUK", "Sinyal zayıf, dikkatli yorumla"
|
||||
return "COK_DUSUK", "Veri yetersiz veya çelişkili — bu motoru bu maç için ihmal et"
|
||||
|
||||
@staticmethod
|
||||
def _to_float(value: Any, default: float) -> float:
|
||||
try:
|
||||
if value is None:
|
||||
return default
|
||||
return float(value)
|
||||
except Exception:
|
||||
return default
|
||||
|
||||
@staticmethod
|
||||
def _normalize_text(value: Any) -> str:
|
||||
text = str(value or "").casefold().replace("i̇", "i")
|
||||
return " ".join(text.split())
|
||||
|
||||
def _selection_value(
|
||||
self,
|
||||
selections: Dict[str, Any],
|
||||
aliases: Tuple[str, ...],
|
||||
default: float,
|
||||
) -> float:
|
||||
if not isinstance(selections, dict):
|
||||
return default
|
||||
|
||||
normalized_aliases = {self._normalize_text(alias) for alias in aliases}
|
||||
for key, value in selections.items():
|
||||
key_norm = self._normalize_text(key)
|
||||
if key_norm in normalized_aliases:
|
||||
return self._to_float(value, default)
|
||||
|
||||
# Secondary match for entries like "2,5 Üst" or "Toplam Alt"
|
||||
for key, value in selections.items():
|
||||
key_norm = self._normalize_text(key)
|
||||
if any(alias in key_norm for alias in normalized_aliases):
|
||||
return self._to_float(value, default)
|
||||
|
||||
return default
|
||||
|
||||
def _parse_json_dict(self, payload: Any) -> Optional[Dict[str, Any]]:
|
||||
if isinstance(payload, str):
|
||||
try:
|
||||
payload = json.loads(payload)
|
||||
except Exception:
|
||||
return None
|
||||
return payload if isinstance(payload, dict) else None
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,75 +0,0 @@
|
||||
import sys
|
||||
import unittest
|
||||
from decimal import Decimal
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
AI_ENGINE_ROOT = Path(__file__).resolve().parents[1]
|
||||
if str(AI_ENGINE_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(AI_ENGINE_ROOT))
|
||||
|
||||
from core.engines.odds_predictor import OddsPredictorEngine
|
||||
from features.sidelined_analyzer import SidelinedAnalyzer
|
||||
|
||||
|
||||
class EngineNullSafetyTests(unittest.TestCase):
|
||||
def test_odds_predictor_accepts_decimal_inputs_without_crashing(self):
|
||||
engine = OddsPredictorEngine()
|
||||
|
||||
prediction = engine.predict(
|
||||
odds_data={
|
||||
"ms_h": Decimal("2.10"),
|
||||
"ms_d": Decimal("3.25"),
|
||||
"ms_a": Decimal("3.60"),
|
||||
"ou25_o": Decimal("1.90"),
|
||||
},
|
||||
)
|
||||
|
||||
self.assertGreater(prediction.market_home_prob, 0.0)
|
||||
self.assertGreater(prediction.market_draw_prob, 0.0)
|
||||
self.assertGreater(prediction.market_away_prob, 0.0)
|
||||
|
||||
def test_sidelined_analyzer_handles_non_numeric_fields(self):
|
||||
analyzer = SidelinedAnalyzer.__new__(SidelinedAnalyzer)
|
||||
analyzer.position_weights = {"K": 0.35, "D": 0.20, "O": 0.25, "F": 0.30}
|
||||
analyzer.max_rating = 10
|
||||
analyzer.adaptation_threshold = 10
|
||||
analyzer.adaptation_discount = 0.5
|
||||
analyzer.goalkeeper_penalty = 0.15
|
||||
analyzer.confidence_boost = 10
|
||||
analyzer.max_impact = 0.85
|
||||
analyzer.key_player_threshold = 3
|
||||
analyzer.recent_matches_lookback = 15
|
||||
analyzer._fetch_player_stats = MagicMock(return_value={})
|
||||
|
||||
result = analyzer.analyze(
|
||||
{
|
||||
"totalSidelined": 2,
|
||||
"players": [
|
||||
{
|
||||
"playerId": "p1",
|
||||
"playerName": "Player One",
|
||||
"positionShort": "O",
|
||||
"matchesMissed": "N/A",
|
||||
"average": "?",
|
||||
"type": "injury",
|
||||
},
|
||||
{
|
||||
"playerId": "p2",
|
||||
"playerName": "Player Two",
|
||||
"positionShort": "K",
|
||||
"matchesMissed": "12",
|
||||
"average": "6.7",
|
||||
"type": "suspension",
|
||||
},
|
||||
],
|
||||
},
|
||||
)
|
||||
|
||||
self.assertEqual(result.total_sidelined, 2)
|
||||
self.assertGreaterEqual(result.impact_score, 0.0)
|
||||
self.assertTrue(len(result.player_details) >= 2)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -8,9 +8,10 @@ AI_ENGINE_ROOT = Path(__file__).resolve().parents[1]
|
||||
if str(AI_ENGINE_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(AI_ENGINE_ROOT))
|
||||
|
||||
from models.v20_ensemble import FullMatchPrediction
|
||||
from schemas.prediction import FullMatchPrediction
|
||||
from schemas.match_data import MatchData
|
||||
from models.basketball_v25 import BasketballMatchPrediction
|
||||
from services.single_match_orchestrator import MatchData, SingleMatchOrchestrator
|
||||
from services.single_match_orchestrator import SingleMatchOrchestrator
|
||||
|
||||
|
||||
class _CursorContext:
|
||||
|
||||
@@ -0,0 +1,111 @@
|
||||
# Changelog - 2026-05-20
|
||||
|
||||
Bu dokuman, 20 Mayis 2026 tarihinde yapilan **League Tier Sistemi** calismasini ozetler. Amac: model egitiminde kullanilan ligleri kalite bazli filtreleyerek tahmin dogrulugunu artirmak.
|
||||
|
||||
## 1. Problem Analizi
|
||||
|
||||
- Model 443 lig ile egitiliyordu (DB'de toplam 1142 lig).
|
||||
- Dusuk kaliteli ligler (zayif bahis pazari, eksik veri) modelin sinyalini bozuyordu.
|
||||
- Mevcut performans: **MS %26.9** (rastgeleden kotu), **OU2.5 %57.7**, **BTTS %53.8**
|
||||
- Model hic "X" (draw) tahmini yapmiyordu — agir home bias vardi.
|
||||
- 19 gercek bahisten 5 kazanc, 14 kayip = **%26.3 win rate, -9.68 unit**
|
||||
|
||||
## 2. Cozum: League Tier Sistemi
|
||||
|
||||
Bookmaker margin (overround) analizine dayanarak 48 kaliteli lig secildi:
|
||||
|
||||
| Tier | Isim | Lig Sayisi | Kriter |
|
||||
|------|------|-----------|--------|
|
||||
| 1 | Elmas | 10 | En iyi veri kalitesi, en dusuk margin (Premier League, La Liga, Serie A vb.) |
|
||||
| 2 | Altin | 20 | Iyi veri kalitesi (Eredivisie, Ligue 2, Championship vb.) |
|
||||
| 3 | Gumus | 18 | Kabul edilebilir veri (Ekstraklasa, MLS, Liga MX vb.) |
|
||||
|
||||
## 3. Veritabani Degisiklikleri
|
||||
|
||||
### Yeni tablo: `league_tiers`
|
||||
```sql
|
||||
CREATE TABLE league_tiers (
|
||||
id SERIAL PRIMARY KEY,
|
||||
league_id TEXT NOT NULL UNIQUE REFERENCES leagues(id) ON DELETE CASCADE,
|
||||
tier INT NOT NULL DEFAULT 1,
|
||||
is_active BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
added_by TEXT,
|
||||
notes TEXT,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
CREATE INDEX idx_league_tiers_tier_active ON league_tiers(tier, is_active);
|
||||
```
|
||||
|
||||
### Prisma schema guncellendi
|
||||
- `LeagueTier` modeli eklendi (`prisma/schema.prisma`)
|
||||
- `League` modeline `leagueTier LeagueTier?` iliskisi eklendi
|
||||
- Not: Migration shadow DB hatasi nedeniyle tablo dogrudan SQL ile olusturuldu
|
||||
|
||||
## 4. Backend Degisiklikleri
|
||||
|
||||
### Yeni dosyalar
|
||||
|
||||
**`src/modules/leagues/league-tiers.service.ts`**
|
||||
- `findAll()` — tum tier'lari listele
|
||||
- `findActive()` — aktif tier'lari listele
|
||||
- `findByTier(tier)` — belirli tier'daki ligler
|
||||
- `addLeague(leagueId, tier, notes, addedBy)` — lig ekle/guncelle (upsert), otomatik JSON sync + retrain tetikle
|
||||
- `removeLeague(leagueId)` — ligi pasif yap (soft delete), JSON sync
|
||||
- `updateTier(leagueId, tier)` — tier seviyesi degistir
|
||||
- `deleteLeague(leagueId)` — kalici sil
|
||||
- `syncQualifiedLeagues()` — DB'den `qualified_leagues.json` dosyasini guncelle
|
||||
- `triggerModelRetrain()` — AI engine'e POST `/v1/admin/retrain`
|
||||
- `getStats()` — tier istatistikleri + toplam qualified match sayisi
|
||||
|
||||
**`src/modules/leagues/league-tiers.controller.ts`**
|
||||
- `GET /admin/league-tiers` — listele (`?active=true` filtreli)
|
||||
- `GET /admin/league-tiers/stats` — istatistikler
|
||||
- `GET /admin/league-tiers/tier/:tier` — tier bazli filtre
|
||||
- `POST /admin/league-tiers` — lig ekle (body: `{ leagueId, tier?, notes?, addedBy? }`)
|
||||
- `PUT /admin/league-tiers/:leagueId/tier` — tier guncelle
|
||||
- `PUT /admin/league-tiers/:leagueId/deactivate` — pasif yap
|
||||
- `DELETE /admin/league-tiers/:leagueId` — kalici sil
|
||||
- `POST /admin/league-tiers/sync` — JSON sync tetikle
|
||||
- `POST /admin/league-tiers/retrain` — model retrain tetikle
|
||||
- Tum endpointler `@Roles("superadmin")` ile korumali
|
||||
|
||||
### Guncellenen dosyalar
|
||||
|
||||
**`src/modules/leagues/leagues.module.ts`**
|
||||
- `HttpModule` import eklendi (retrain HTTP cagirisi icin)
|
||||
- `LeagueTiersController` ve `LeagueTiersService` register edildi
|
||||
|
||||
## 5. qualified_leagues.json Sync
|
||||
|
||||
- Onceki durum: **443 lig**
|
||||
- Yeni durum: **48 lig** (DB'deki aktif tier'lardan)
|
||||
- `data-fetcher.task.ts` zaten bu dosyayi okuyor — yeni mac verisi sadece 48 lig icin cekilecek
|
||||
- `extract_training_data.py` zaten bu dosyayi okuyor — egitim verisi sadece 48 ligden gelecek
|
||||
|
||||
## 6. Mevcut Veri Akisi
|
||||
|
||||
```
|
||||
Admin Panel (lig ekle/cikar)
|
||||
|
|
||||
v
|
||||
LeagueTiersService.addLeague() / removeLeague()
|
||||
|
|
||||
+---> DB guncelle (league_tiers tablosu)
|
||||
+---> syncQualifiedLeagues() ---> qualified_leagues.json guncelle
|
||||
+---> triggerModelRetrain() ---> AI Engine POST /v1/admin/retrain
|
||||
|
||||
Data Fetcher (cron):
|
||||
qualified_leagues.json ---> sadece bu liglerden live_matches cek
|
||||
|
||||
Training Pipeline:
|
||||
qualified_leagues.json ---> sadece bu liglerden training_data extract et
|
||||
```
|
||||
|
||||
## 7. Yapilacaklar
|
||||
|
||||
- [ ] AI engine'de `/v1/admin/retrain` endpoint olustur (Python tarafi)
|
||||
- [ ] Modeli yeni 48 lig dataseti ile yeniden egit
|
||||
- [ ] Admin panel frontend'inde league tier yonetim UI'i
|
||||
- [ ] Draw blindness sorununu arastir (model hic X tahmini yapmiyor)
|
||||
- [ ] Prisma migration gecmisini duzelt (manual SQL → migration align)
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user