Files
iddaai-be/ai-engine/models/v20_ensemble.py
T
fahricansecer 9027cc9900
Deploy Iddaai Backend / build-and-deploy (push) Successful in 3m21s
v28
2026-04-24 23:46:28 +03:00

1283 lines
49 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
V20 Ensemble Beast - Main Predictor
Combines 4 prediction engines with surprise detection.
This is the primary interface for V20 predictions.
"""
import os
import sys
import math
import json
import pickle
import time
import psycopg2
import pandas as pd
from typing import Dict, List, Optional, Tuple, Any
from dataclasses import dataclass, field
# Add paths
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from core.engines.team_predictor import get_team_predictor
from core.engines.player_predictor import get_player_predictor
from core.engines.odds_predictor import get_odds_predictor
from core.engines.referee_predictor import get_referee_predictor
from features.upset_engine import get_upset_engine
from features.upset_engine_v2 import get_upset_engine_v2
from features.feature_adapter import get_feature_adapter
from utils.top_leagues import load_top_league_ids
from data.db import get_clean_dsn
import xgboost as xgb
from models.calibration import Calibrator
# New Config & Calculators
from config.config_loader import get_config
from core.calculators.base_calculator import CalculationContext
from core.calculators.match_result_calculator import MatchResultCalculator
from core.calculators.over_under_calculator import OverUnderCalculator
from core.calculators.half_time_calculator import HalfTimeCalculator
from core.calculators.score_calculator import ScoreCalculator, ScorePrediction
from core.calculators.other_markets_calculator import OtherMarketsCalculator
from core.calculators.risk_assessor import RiskAssessor
from core.calculators.bet_recommender import BetRecommender
class _BoosterModelAdapter:
"""Adapter to provide predict_proba interface for raw xgboost.Booster models."""
def __init__(self, booster: xgb.Booster):
self._booster = booster
def predict_proba(self, features: pd.DataFrame):
dmat = xgb.DMatrix(features)
preds = self._booster.predict(dmat)
if len(preds.shape) == 1:
# binary: return [P(class0), P(class1)]
return [[float(1.0 - p), float(p)] for p in preds]
# multiclass: already (n, k)
return preds
@dataclass
class MarketPrediction:
"""Prediction for a single betting market."""
market_type: str
pick: str
probability: float
confidence: float
odds: float = 0.0
is_recommended: bool = False
is_value_bet: bool = False
edge: float = 0.0 # Expected edge over market
def to_dict(self) -> dict:
return {
"market_type": self.market_type,
"pick": self.pick,
"probability": round(self.probability * 100, 1),
"confidence": round(self.confidence, 1),
"odds": self.odds,
"is_recommended": self.is_recommended,
"is_value_bet": self.is_value_bet,
"edge": round(self.edge, 1)
}
@dataclass
class FullMatchPrediction:
"""Complete prediction for a match with ALL markets."""
match_id: str
home_team: str
away_team: str
match_date: str = ""
# === MAÇ SONUCU (1X2) ===
ms_home_prob: float = 0.33
ms_draw_prob: float = 0.33
ms_away_prob: float = 0.33
ms_pick: str = ""
ms_confidence: float = 0.0
# === ÇİFTE ŞANS ===
dc_1x_prob: float = 0.66
dc_x2_prob: float = 0.66
dc_12_prob: float = 0.66
dc_pick: str = ""
dc_confidence: float = 0.0
# === ALT/ÜST GOLLER ===
# 1.5
over_15_prob: float = 0.70
under_15_prob: float = 0.30
ou15_pick: str = ""
ou15_confidence: float = 0.0
# 2.5
over_25_prob: float = 0.50
under_25_prob: float = 0.50
ou25_pick: str = ""
ou25_confidence: float = 0.0
# 3.5
over_35_prob: float = 0.30
under_35_prob: float = 0.70
ou35_pick: str = ""
ou35_confidence: float = 0.0
# === KARŞILIKLI GOL (BTTS) ===
btts_yes_prob: float = 0.50
btts_no_prob: float = 0.50
btts_pick: str = ""
btts_confidence: float = 0.0
# === İLK YARI SONUCU ===
ht_home_prob: float = 0.30
ht_draw_prob: float = 0.40
ht_away_prob: float = 0.30
ht_pick: str = ""
ht_confidence: float = 0.0
# === SKOR TAHMİNLERİ ===
score: ScorePrediction = None
predicted_ft_score: str = "1-1"
predicted_ht_score: str = "0-0"
ft_scores_top5: List[Dict] = field(default_factory=list)
# === xG (Expected Goals) ===
home_xg: float = 1.3
away_xg: float = 1.1
total_xg: float = 2.4
# === RISK DEĞERLENDİRMESİ ===
risk_level: str = "MEDIUM" # LOW, MEDIUM, HIGH, EXTREME
risk_score: float = 0.0
is_surprise_risk: bool = False
surprise_type: str = ""
risk_warnings: List[str] = field(default_factory=list)
ht_ft_probs: Dict[str, float] = field(default_factory=dict)
# === GLM-5 SÜRPRİZ SKORU ===
upset_score: int = 0 # 0-100 arası sürpriz skoru
upset_level: str = "LOW" # LOW, MEDIUM, HIGH, EXTREME
upset_reasons: List[str] = field(default_factory=list)
# === ENGINE KATKILARI ===
team_confidence: float = 0.0
player_confidence: float = 0.0
odds_confidence: float = 0.0
referee_confidence: float = 0.0
# === KORNER & KART & DİĞER ===
total_corners_pred: float = 9.5
corner_pick: str = "9.5 Üst"
total_cards_pred: float = 4.5
card_pick: str = "4.5 Alt"
cards_over_prob: float = 0.50
cards_under_prob: float = 0.50
cards_confidence: float = 0.0
handicap_pick: str = ""
handicap_home_prob: float = 0.33
handicap_draw_prob: float = 0.34
handicap_away_prob: float = 0.33
handicap_confidence: float = 0.0
ht_over_05_prob: float = 0.65
ht_under_05_prob: float = 0.35
ht_over_15_prob: float = 0.30
ht_under_15_prob: float = 0.70
ht_ou_pick: str = "İY 0.5 Üst"
ht_ou15_pick: str = "İY 1.5 Alt"
odd_even_pick: str = "Çift"
odd_prob: float = 0.50 # Tek olasılığı
even_prob: float = 0.50 # Çift olasılığı
# === TAVSİYELER (RECOMMENDATIONS) ===
best_bet: Optional[MarketPrediction] = None
recommended_bets: List[MarketPrediction] = field(default_factory=list)
alternative_bet: Optional[MarketPrediction] = None
expert_recommendation: Dict[str, Any] = field(default_factory=dict)
# === DETAILED ANALYSIS ===
analysis_details: Dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> dict:
return {
"match_info": {
"match_id": self.match_id,
"home_team": self.home_team,
"away_team": self.away_team,
"match_date": self.match_date
},
"predictions": {
"match_result": {
"1": round(self.ms_home_prob * 100, 1),
"X": round(self.ms_draw_prob * 100, 1),
"2": round(self.ms_away_prob * 100, 1),
"pick": self.ms_pick,
"confidence": round(self.ms_confidence, 1)
},
"double_chance": {
"1X": round(self.dc_1x_prob * 100, 1),
"X2": round(self.dc_x2_prob * 100, 1),
"12": round(self.dc_12_prob * 100, 1),
"pick": self.dc_pick,
"confidence": round(self.dc_confidence, 1)
},
"over_under": {
"1.5": {
"over": round(self.over_15_prob * 100, 1),
"under": round(self.under_15_prob * 100, 1),
"pick": self.ou15_pick,
"confidence": round(self.ou15_confidence, 1)
},
"2.5": {
"over": round(self.over_25_prob * 100, 1),
"under": round(self.under_25_prob * 100, 1),
"pick": self.ou25_pick,
"confidence": round(self.ou25_confidence, 1)
},
"3.5": {
"over": round(self.over_35_prob * 100, 1),
"under": round(self.under_35_prob * 100, 1),
"pick": self.ou35_pick,
"confidence": round(self.ou35_confidence, 1)
}
},
"btts": {
"yes": round(self.btts_yes_prob * 100, 1),
"no": round(self.btts_no_prob * 100, 1),
"pick": self.btts_pick,
"confidence": round(self.btts_confidence, 1)
},
"first_half": {
"1": round(self.ht_home_prob * 100, 1),
"X": round(self.ht_draw_prob * 100, 1),
"2": round(self.ht_away_prob * 100, 1),
"pick": self.ht_pick,
"confidence": round(self.ht_confidence, 1),
"over_under_05": {
"over": round(self.ht_over_05_prob * 100, 1),
"under": round(self.ht_under_05_prob * 100, 1),
"pick": self.ht_ou_pick
},
"over_under_15": {
"over": round(self.ht_over_15_prob * 100, 1),
"under": round(self.ht_under_15_prob * 100, 1),
"pick": self.ht_ou15_pick
}
},
"scores": {
"predicted_ft": self.predicted_ft_score,
"predicted_ht": self.predicted_ht_score,
"top_5_ft_scores": self.ft_scores_top5
},
"others": {
"handicap": {
"pick": self.handicap_pick,
"confidence": round(self.handicap_confidence, 1),
"home": round(self.handicap_home_prob * 100, 1),
"draw": round(self.handicap_draw_prob * 100, 1),
"away": round(self.handicap_away_prob * 100, 1)
},
"corners": {
"total": round(self.total_corners_pred, 1),
"pick": self.corner_pick
},
"cards": {
"total": round(self.total_cards_pred, 1),
"pick": self.card_pick,
"confidence": round(self.cards_confidence, 1),
"over": round(self.cards_over_prob * 100, 1),
"under": round(self.cards_under_prob * 100, 1)
},
"odd_even": {
"pick": self.odd_even_pick,
"tek": round(self.odd_prob * 100, 1),
"cift": round(self.even_prob * 100, 1)
}
},
"xg": {
"home": round(self.home_xg, 2),
"away": round(self.away_xg, 2),
"total": round(self.total_xg, 2)
}
},
"risk": {
"level": self.risk_level,
"score": round(self.risk_score, 1),
"is_surprise_risk": self.is_surprise_risk,
"surprise_type": self.surprise_type,
"ht_ft_probs": {k: round(v * 100, 1) for k, v in self.ht_ft_probs.items()} if self.ht_ft_probs else {},
"warnings": self.risk_warnings
},
"upset_analysis": {
"score": self.upset_score,
"level": self.upset_level,
"reasons": self.upset_reasons
},
"engine_breakdown": {
"team_engine": round(self.team_confidence, 1),
"player_engine": round(self.player_confidence, 1),
"odds_engine": round(self.odds_confidence, 1),
"referee_engine": round(self.referee_confidence, 1)
},
"recommendations": {
"best_bet": self.best_bet.to_dict() if self.best_bet else None,
"all_recommended": [b.to_dict() for b in self.recommended_bets] if self.recommended_bets else [],
"alternative_bet": self.alternative_bet.to_dict() if self.alternative_bet else None
},
"analysis_details": self.analysis_details
}
class V20EnsemblePredictor:
HTFT_LABELS = ("1/1", "1/X", "1/2", "X/1", "X/X", "X/2", "2/1", "2/X", "2/2")
# Neutral defaults when MS odds are missing: avoid synthetic home-favorite bias.
DEFAULT_MS_H = 2.65
DEFAULT_MS_D = 3.20
DEFAULT_MS_A = 2.65
FOOTBALL_TOP_PRIOR = (
0.263760,
0.051786,
0.022942,
0.150168,
0.157798,
0.106064,
0.027622,
0.051226,
0.168634,
)
FOOTBALL_NON_TOP_PRIOR = (
0.265113,
0.048306,
0.020399,
0.147020,
0.152383,
0.113075,
0.026542,
0.046356,
0.180805,
)
# Top-league football priors conditioned on favorite side from MS (1X2) odds.
# Label order follows HTFT_LABELS.
FOOTBALL_TOP_PRIOR_HOME_FAV = (
0.321707,
0.054165,
0.017952,
0.179729,
0.161674,
0.078991,
0.031186,
0.047394,
0.107201,
)
FOOTBALL_TOP_PRIOR_AWAY_FAV = (
0.130654,
0.049139,
0.033754,
0.081975,
0.156142,
0.167164,
0.020207,
0.058324,
0.302641,
)
FOOTBALL_TOP_PRIOR_BALANCED = (
0.169429,
0.052486,
0.028545,
0.144567,
0.209024,
0.116943,
0.026703,
0.053407,
0.198895,
)
def __init__(self):
print("🚀 Initializing V20 Ensemble Beast...")
self.config = get_config()
# Engines
self.team_engine = get_team_predictor()
self.player_engine = get_player_predictor()
self.odds_engine = get_odds_predictor()
self.referee_engine = get_referee_predictor()
self.upset_engine = get_upset_engine()
self.upset_engine_v2 = get_upset_engine_v2() # GLM-5 enhanced
# Calculators
print("⚙️ Loading market calculators...")
self.match_result_calc = MatchResultCalculator(self.config)
self.over_under_calc = OverUnderCalculator(self.config)
self.half_time_calc = HalfTimeCalculator(self.config)
self.score_calc = ScoreCalculator(self.config)
print(" ✅ Score Calculator (XGBoost FT+HT) loaded")
self.other_markets_calc = OtherMarketsCalculator(self.config)
self.risk_assessor = RiskAssessor(self.config)
self.bet_recommender = BetRecommender(self.config)
# Expert Recommender (New Logic)
from core.calculators.expert_recommender import ExpertRecommender
self.expert_recommender = ExpertRecommender(self.config)
# XGBoost Integration
print("🤖 Loading XGBoost models...")
self.feature_adapter = get_feature_adapter()
self.calibrator = Calibrator()
self.xgb_models = {}
self.top_league_ids = load_top_league_ids()
print(f"📋 Loaded {len(self.top_league_ids)} top leagues for HT/FT tuning")
self.db_dsn = get_clean_dsn()
self.league_htft_prior_cache: Dict[Tuple[str, str], Optional[Tuple[float, ...]]] = {}
xgb_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "models", "xgboost")
model_files = {
"ms": "xgb_ms",
"ou25": "xgb_ou25",
"btts": "xgb_btts",
"ht_ft": "xgb_ht_ft",
"ht_result": "xgb_ht_result",
"ht_ou05": "xgb_ht_ou05",
"ht_ou15": "xgb_ht_ou15",
"odd_even": "xgb_odd_even",
"ou15": "xgb_ou15",
"ou35": "xgb_ou35",
"handicap_ms": "xgb_handicap_ms",
"cards_ou45": "xgb_cards_ou45",
}
only_keys = os.getenv("XGB_MODEL_KEYS", "").strip()
if only_keys:
selected_keys = {k.strip().lower() for k in only_keys.split(",") if k.strip()}
model_files = {k: v for k, v in model_files.items() if k in selected_keys}
if model_files:
print(f"️ XGB_MODEL_KEYS active -> loading only: {', '.join(sorted(model_files.keys()))}")
else:
print("⚠️ XGB_MODEL_KEYS set but no valid keys matched. Loading none.")
for key, base_name in model_files.items():
print(f" ⏳ Loading {key} from {base_name}.pkl/.json...", flush=True)
model, src, err = self._load_xgb_model(xgb_dir, base_name)
if model is not None:
self.xgb_models[key] = model
print(f" ✅ Loaded {key} ({src})")
elif err:
print(f" ⚠️ Failed to load {base_name}: {err}")
else:
print(f" ⚠️ Model not found: {base_name}.pkl or {base_name}.json")
print("✅ V20 Ensemble Beast ready!")
@staticmethod
def _load_xgb_model(xgb_dir: str, base_name: str):
pkl_path = os.path.join(xgb_dir, f"{base_name}.pkl")
json_path = os.path.join(xgb_dir, f"{base_name}.json")
if os.path.exists(pkl_path):
started = time.perf_counter()
with open(pkl_path, "rb") as f:
model = pickle.load(f)
elapsed = time.perf_counter() - started
return model, f"pkl {elapsed:.2f}s", None
if os.path.exists(json_path):
started = time.perf_counter()
# Preferred path: sklearn wrapper with predict_proba
try:
model = xgb.XGBClassifier()
model.load_model(json_path)
elapsed = time.perf_counter() - started
return model, f"json {elapsed:.2f}s", None
except Exception:
# Fallback: raw Booster + adapter
try:
booster = xgb.Booster()
booster.load_model(json_path)
model = _BoosterModelAdapter(booster)
elapsed = time.perf_counter() - started
return model, f"json/booster {elapsed:.2f}s", None
except Exception as e:
return None, "", e
return None, "", None
@staticmethod
def _safe_odd(value: Any) -> float:
try:
odd = float(value)
return odd if odd > 1.01 else 0.0
except (TypeError, ValueError):
return 0.0
@staticmethod
def _align_features(features: pd.DataFrame, model) -> pd.DataFrame:
"""Align DataFrame columns to the model's expected feature set.
Supports:
- sklearn wrappers (XGBClassifier / LGBMClassifier) → feature_names_in_
- raw xgboost.Booster → feature_names
- _BoosterModelAdapter → _booster.feature_names
If the model doesn't expose feature names, returns the DataFrame as-is.
"""
expected: Optional[List[str]] = None
# 1. sklearn wrapper (XGBClassifier, LGBMClassifier, CalibratedClassifierCV)
if hasattr(model, 'feature_names_in_'):
expected = list(model.feature_names_in_)
# 2. _BoosterModelAdapter
elif hasattr(model, '_booster') and hasattr(model._booster, 'feature_names'):
expected = model._booster.feature_names
# 3. raw xgboost.Booster
elif hasattr(model, 'feature_names') and model.feature_names:
expected = list(model.feature_names)
if expected is None:
return features
# Only keep columns that the model expects (order preserved)
available = [col for col in expected if col in features.columns]
if len(available) < len(expected):
missing = set(expected) - set(available)
print(f"⚠️ Feature alignment: {len(missing)} missing features filled with 0: {sorted(missing)[:5]}{'...' if len(missing) > 5 else ''}")
# Add missing columns with 0 (safe neutral default)
for col in expected:
if col not in features.columns:
features = features.copy()
features[col] = 0.0
return features[expected]
def _favorite_profile_from_odds(self, odds_data: Dict[str, float]) -> Tuple[str, float]:
"""
Returns (favorite_side, gap_to_second_favorite).
favorite_side: H, A, D, or U (unknown)
"""
ms_h = self._safe_odd((odds_data or {}).get("ms_h"))
ms_d = self._safe_odd((odds_data or {}).get("ms_d"))
ms_a = self._safe_odd((odds_data or {}).get("ms_a"))
candidates = [(side, odd) for side, odd in (("H", ms_h), ("D", ms_d), ("A", ms_a)) if odd > 0.0]
if len(candidates) < 2:
return "U", 0.0
candidates.sort(key=lambda item: item[1])
favorite_side, favorite_odd = candidates[0]
second_odd = candidates[1][1]
return favorite_side, max(0.0, second_odd - favorite_odd)
def _favorite_side_from_ms_odds(
self,
odds_data: Dict[str, float],
) -> str:
"""
Returns side from MS home/away odds only:
- H: home favorite
- A: away favorite
- B: balanced (home and away near-equal)
- U: unknown
"""
ms_h = self._safe_odd((odds_data or {}).get("ms_h"))
ms_a = self._safe_odd((odds_data or {}).get("ms_a"))
if ms_h <= 0.0 or ms_a <= 0.0:
return "U"
balance_gap = float(self.config.get("risk.htft_favorite_balance_gap", 0.20))
if abs(ms_h - ms_a) <= balance_gap:
return "B"
return "H" if ms_h < ms_a else "A"
def _get_top_odds_conditioned_prior(
self,
odds_data: Dict[str, float],
) -> Optional[Tuple[float, ...]]:
side = self._favorite_side_from_ms_odds(odds_data)
if side == "H":
return self.FOOTBALL_TOP_PRIOR_HOME_FAV
if side == "A":
return self.FOOTBALL_TOP_PRIOR_AWAY_FAV
if side == "B":
return self.FOOTBALL_TOP_PRIOR_BALANCED
return None
def _is_top_league(self, league_id: Optional[str]) -> bool:
if not league_id:
return False
return str(league_id) in self.top_league_ids
def _get_htft_league_prior(
self,
league_id: Optional[str],
sport: str,
) -> Optional[Tuple[float, ...]]:
sport_key = (sport or "").lower().strip()
if sport_key != "football" or not league_id:
return None
cache_key = (sport_key, str(league_id))
if cache_key in self.league_htft_prior_cache:
return self.league_htft_prior_cache[cache_key]
min_samples = int(self.config.get("risk.htft_prior_min_matches", 300))
combo_counts = {label: 0 for label in self.HTFT_LABELS}
try:
with psycopg2.connect(self.db_dsn) as conn:
with conn.cursor() as cur:
cur.execute(
"""
WITH base AS (
SELECT
CASE WHEN ht_score_home > ht_score_away THEN '1'
WHEN ht_score_home = ht_score_away THEN 'X'
ELSE '2' END AS ht,
CASE WHEN score_home > score_away THEN '1'
WHEN score_home = score_away THEN 'X'
ELSE '2' END AS ft
FROM matches
WHERE status = 'FT'
AND sport = %s
AND league_id = %s
AND ht_score_home IS NOT NULL
AND ht_score_away IS NOT NULL
AND score_home IS NOT NULL
AND score_away IS NOT NULL
)
SELECT ht || '/' || ft AS combo, COUNT(*)::bigint AS n
FROM base
GROUP BY combo
""",
(sport_key, str(league_id)),
)
rows = cur.fetchall()
except Exception:
self.league_htft_prior_cache[cache_key] = None
return None
total = 0
for combo, n in rows:
if combo in combo_counts:
combo_counts[combo] = int(n)
total += int(n)
if total < min_samples:
self.league_htft_prior_cache[cache_key] = None
return None
prior = tuple(combo_counts[label] / total for label in self.HTFT_LABELS)
self.league_htft_prior_cache[cache_key] = prior
return prior
def _postprocess_htft_probs(
self,
raw_probs: List[float],
odds_data: Optional[Dict[str, float]] = None,
sport: str = "football",
is_top_league: bool = False,
league_id: Optional[str] = None,
) -> List[float]:
"""
Stabilize HT/FT class probabilities.
Why:
- HT/FT reversals (1/2, 2/1) are rare and can be overestimated.
- We preserve ranking signal but make absolute probabilities conservative.
"""
probs = [max(1e-9, float(p)) for p in raw_probs[:9]]
if len(probs) != 9:
return [1.0 / 9.0] * 9
# Global calibration pass for HT/FT market.
probs = [self.calibrator.calibrate("ht_ft", p) for p in probs]
sport_key = (sport or "football").lower().strip()
# Temperature > 1.0 flattens over-confident distributions.
if sport_key == "basketball":
if is_top_league:
temperature = float(
self.config.get("risk.htft_temperature_basketball_top", self.config.get("risk.htft_temperature_basketball", 1.08)),
)
else:
temperature = float(
self.config.get("risk.htft_temperature_basketball_non_top", 1.15),
)
else:
if is_top_league:
temperature = float(
self.config.get("risk.htft_temperature_top", self.config.get("risk.htft_temperature", 1.25)),
)
else:
temperature = float(
self.config.get("risk.htft_temperature_non_top", 1.35),
)
if temperature > 1.0:
inv_t = 1.0 / temperature
probs = [p**inv_t for p in probs]
# Extra damping for reversal classes: 1/2 (idx 2), 2/1 (idx 6).
if is_top_league:
base_reversal_multiplier = float(
self.config.get("risk.htft_reversal_multiplier_top", self.config.get("risk.htft_reversal_multiplier", 0.60)),
)
favorite_reversal_multiplier = float(
self.config.get(
"risk.htft_reversal_multiplier_favorite_top",
self.config.get("risk.htft_reversal_multiplier_favorite", 0.72),
),
)
underdog_reversal_multiplier = float(
self.config.get(
"risk.htft_reversal_multiplier_underdog_top",
self.config.get("risk.htft_reversal_multiplier_underdog", 0.45),
),
)
basketball_reversal_multiplier = float(
self.config.get(
"risk.htft_reversal_multiplier_basketball_top",
self.config.get("risk.htft_reversal_multiplier_basketball", 0.90),
),
)
else:
base_reversal_multiplier = float(self.config.get("risk.htft_reversal_multiplier_non_top", 0.45))
favorite_reversal_multiplier = float(
self.config.get("risk.htft_reversal_multiplier_favorite_non_top", 0.55),
)
underdog_reversal_multiplier = float(
self.config.get("risk.htft_reversal_multiplier_underdog_non_top", 0.30),
)
basketball_reversal_multiplier = float(
self.config.get("risk.htft_reversal_multiplier_basketball_non_top", 0.75),
)
gap_medium = float(self.config.get("risk.htft_reversal_gap_medium", 0.50))
gap_strong = float(self.config.get("risk.htft_reversal_gap_strong", 1.00))
favorite_side, favorite_gap = self._favorite_profile_from_odds(odds_data or {})
def _reversal_multiplier(winner_side: str) -> float:
if sport_key == "basketball":
return basketball_reversal_multiplier
multiplier = base_reversal_multiplier
if favorite_side in ("H", "A"):
multiplier = (
favorite_reversal_multiplier
if winner_side == favorite_side
else underdog_reversal_multiplier
)
# If market heavily favors one side, penalize underdog-reversal harder.
if winner_side != favorite_side and favorite_gap >= gap_strong:
multiplier *= 0.80
elif winner_side != favorite_side and favorite_gap >= gap_medium:
multiplier *= 0.90
return max(0.20, min(1.10, multiplier))
# 1/2 => winner is Away, 2/1 => winner is Home
probs[2] *= _reversal_multiplier("A")
probs[6] *= _reversal_multiplier("H")
# Prior blend for football (league-specific if sufficient sample size).
if sport_key == "football":
league_prior = self._get_htft_league_prior(league_id=league_id, sport=sport_key)
if league_prior is not None:
prior = league_prior
blend = float(self.config.get("risk.htft_prior_blend_league", 0.65))
else:
prior = self.FOOTBALL_TOP_PRIOR if is_top_league else self.FOOTBALL_NON_TOP_PRIOR
blend = float(
self.config.get(
"risk.htft_prior_blend_top" if is_top_league else "risk.htft_prior_blend_non_top",
0.50 if is_top_league else 0.58,
),
)
if is_top_league:
side_prior = self._get_top_odds_conditioned_prior(odds_data or {})
if side_prior is not None:
if league_prior is not None:
odds_prior_blend = float(
self.config.get("risk.htft_prior_odds_blend_top_with_league", 0.22),
)
else:
odds_prior_blend = float(
self.config.get("risk.htft_prior_odds_blend_top", 0.35),
)
odds_prior_blend = max(0.0, min(0.80, odds_prior_blend))
prior = tuple(
((1.0 - odds_prior_blend) * prior[idx]) + (odds_prior_blend * side_prior[idx])
for idx in range(9)
)
blend = max(0.0, min(0.95, blend))
probs = [((1.0 - blend) * p) + (blend * prior[idx]) for idx, p in enumerate(probs)]
# Hard cap reversal classes by prior factor to avoid unrealistic spikes.
cap_factor = float(self.config.get("risk.htft_reversal_cap_factor", 2.3))
cap_factor = max(1.0, cap_factor)
for idx in (2, 6):
cap_val = prior[idx] * cap_factor
if probs[idx] > cap_val:
probs[idx] = cap_val
total = sum(probs)
if total <= 0:
return [1.0 / 9.0] * 9
return [p / total for p in probs]
def predict(self,
match_id: str,
home_team_id: str,
away_team_id: str,
home_team_name: str,
away_team_name: str,
match_date_ms: int,
odds_data: Dict[str, float] = None,
home_lineup: List[str] = None,
away_lineup: List[str] = None,
referee_name: str = None,
home_goals_avg: float = 1.5,
home_conceded_avg: float = 1.2,
away_goals_avg: float = 1.2,
away_conceded_avg: float = 1.4,
home_position: int = 10,
away_position: int = 10,
league_name: str = "",
league_id: str = None,
sport: str = "football",
sidelined_data: Dict = None) -> FullMatchPrediction:
"""
Generate complete V20 ensemble prediction.
Returns FullMatchPrediction with ALL markets.
"""
# Default odds if not provided
if odds_data is None:
odds_data = {
"ms_h": self.DEFAULT_MS_H,
"ms_d": self.DEFAULT_MS_D,
"ms_a": self.DEFAULT_MS_A,
}
# === 1. COLLECT ALL ENGINE PREDICTIONS ===
team_pred = self.team_engine.predict(
home_team_id=home_team_id,
away_team_id=away_team_id,
match_date_ms=match_date_ms,
home_team_name=home_team_name,
away_team_name=away_team_name
)
player_pred = self.player_engine.predict(
match_id=match_id,
home_team_id=home_team_id,
away_team_id=away_team_id,
home_lineup=home_lineup,
away_lineup=away_lineup,
sidelined_data=sidelined_data
)
odds_pred = self.odds_engine.predict(
odds_data=odds_data,
home_goals_avg=home_goals_avg,
home_conceded_avg=home_conceded_avg,
away_goals_avg=away_goals_avg,
away_conceded_avg=away_conceded_avg
)
referee_pred = self.referee_engine.predict(
match_id=match_id,
referee_name=referee_name,
league_id=league_id
)
upset_factors = self.upset_engine.calculate_upset_potential(
home_team_name=home_team_name,
home_team_id=home_team_id,
away_team_name=away_team_name,
league_name=league_name,
home_position=home_position,
away_position=away_position,
match_date_ms=match_date_ms
)
# GLM-5 Enhanced Upset Detection v2
# Determine favorite from odds
favorite_side = "home"
favorite_odds = odds_data.get("ms_h", 2.0) if odds_data else 2.0
if odds_data:
ms_h = odds_data.get("ms_h", 999)
ms_a = odds_data.get("ms_a", 999)
if ms_a < ms_h:
favorite_side = "away"
favorite_odds = ms_a
elif ms_h < ms_a:
favorite_side = "home"
favorite_odds = ms_h
else:
favorite_side = "draw"
favorite_odds = odds_data.get("ms_d", 3.0)
upset_factors_v2 = self.upset_engine_v2.calculate_upset_potential(
home_team_name=home_team_name,
home_team_id=home_team_id,
away_team_name=away_team_name,
league_name=league_name,
home_position=home_position,
away_position=away_position,
match_date_ms=match_date_ms,
odds_data=odds_data,
referee_name=referee_name,
home_form_score=team_pred.home_form_score if hasattr(team_pred, 'home_form_score') else 50.0,
away_form_score=team_pred.away_form_score if hasattr(team_pred, 'away_form_score') else 50.0,
favorite_side=favorite_side,
favorite_odds=favorite_odds
)
# === 2. DYNAMIC ENGINE WEIGHTS ===
w_team = self.config.get("engine_weights.team", 0.30)
w_player = self.config.get("engine_weights.player", 0.25)
w_odds = self.config.get("engine_weights.odds", 0.30)
w_referee = self.config.get("engine_weights.referee", 0.15)
# Redistribution Logic
if not player_pred.lineup_available:
min_w = self.config.get("engine_weights.min_weight", 0.05)
surplus = w_player - min_w
w_player = min_w
w_team += surplus * self.config.get("weight_redistribution.player_missing_to_team", 0.5)
w_odds += surplus * self.config.get("weight_redistribution.player_missing_to_odds", 0.5)
min_ref_matches = self.config.get("weight_redistribution.referee_min_matches", 5)
if referee_pred.matches_officiated < min_ref_matches:
min_w = self.config.get("engine_weights.min_weight", 0.05)
surplus = w_referee - min_w
w_referee = min_w
w_team += surplus * self.config.get("weight_redistribution.referee_missing_to_team", 0.4)
w_odds += surplus * self.config.get("weight_redistribution.referee_missing_to_odds", 0.6)
# Normalize
w_total = w_team + w_player + w_odds + w_referee
weights = {
"team": w_team / w_total,
"player": w_player / w_total,
"odds": w_odds / w_total,
"referee": w_referee / w_total
}
# Get Modifiers
player_mods = self.player_engine.get_1x2_modifier(player_pred)
referee_mods = self.referee_engine.get_modifiers(referee_pred)
# Calculate xG (Used by multiple calculators)
home_xg = (team_pred.home_xg + odds_pred.poisson_home_xg) / 2
away_xg = (team_pred.away_xg + odds_pred.poisson_away_xg) / 2
# === 3. CREATE CONTEXT ===
ctx = CalculationContext(
team_pred=team_pred,
player_pred=player_pred,
odds_pred=odds_pred,
referee_pred=referee_pred,
upset_factors=upset_factors,
weights=weights,
player_mods=player_mods,
referee_mods=referee_mods,
match_id=match_id,
home_team_name=home_team_name,
away_team_name=away_team_name,
odds_data=odds_data,
home_xg=home_xg,
away_xg=away_xg,
total_xg=home_xg + away_xg,
league_id=league_id,
sport=(sport or "football").lower().strip(),
is_top_league=self._is_top_league(league_id),
)
# === 4. XGBOOST INFERENCE ===
try:
# Prepare features (1 row DataFrame)
xgb_features = self.feature_adapter.get_features(ctx)
# Predict — per-model feature alignment
for key, model in self.xgb_models.items():
try:
model_features = self._align_features(xgb_features, model)
raw_pred = model.predict_proba(model_features)
except Exception as model_err:
print(f"⚠️ XGBoost {key} inference failed: {model_err}")
continue
# Handle multi-class (MS, HT_RESULT, HT/FT) vs binary
if key in ("ms", "ht_result"):
# raw_pred is (1, 3)
probs = raw_pred[0] # [Home, Draw, Away]
ctx.xgboost_preds[key] = {
"home": float(probs[0]),
"draw": float(probs[1]),
"away": float(probs[2])
}
elif key == "handicap_ms":
probs = raw_pred[0] # [H1, HX, H2]
ctx.xgboost_preds[key] = {
"h1": float(probs[0]),
"hx": float(probs[1]),
"h2": float(probs[2])
}
elif key == "ht_ft":
# raw_pred is (1, 9)
raw_probs = [float(p) for p in raw_pred[0]]
probs = self._postprocess_htft_probs(
raw_probs,
odds_data=odds_data,
sport=sport,
is_top_league=ctx.is_top_league,
league_id=league_id,
)
ctx.xgboost_preds[key] = {
label: float(probs[idx]) for idx, label in enumerate(self.HTFT_LABELS)
}
# Keep raw vector for optional calculators/debug consumers.
ctx.xgboost_preds["ht_ft_raw"] = raw_probs
else:
# Binary (OU/BTTS) - index 1 is the positive class probability
prob = float(raw_pred[0][1])
ctx.xgboost_preds[key] = prob
except Exception as e:
print(f"⚠️ XGBoost Inference Failed: {e}")
import traceback
traceback.print_exc()
# === 5. RUN CALCULATORS ===
ms_result = self.match_result_calc.calculate(ctx)
ou_result = self.over_under_calc.calculate(ctx)
ht_result = self.half_time_calc.calculate(ctx)
score_result = self.score_calc.calculate(ctx, ms_result)
other_result = self.other_markets_calc.calculate(ctx, ms_result)
risk_result = self.risk_assessor.calculate(ctx, ms_result)
# Use Reconciled Result
final_ms = score_result.reconciled_ms if score_result.reconciled_ms else ms_result
# Expert Recommendation (New Logic)
expert_result = self.expert_recommender.calculate(ctx, final_ms, ou_result, risk_result)
expert_data = {}
if expert_result:
expert_data = {
"main_pick": expert_result.main_pick,
"safe_alternative": expert_result.safe_alternative,
"value_picks": expert_result.value_picks,
"surprise_picks": expert_result.surprise_picks,
"market_summary": expert_result.market_summary
}
# Update context with risk info for recommender
ctx.risk_level = risk_result.risk_level
ctx.is_surprise = risk_result.is_surprise_risk
rec_result = self.bet_recommender.calculate(ctx, final_ms, ou_result, risk_result)
# === 5. ASSEMBLE PREDICTION ===
# Map MarketPredictionDTO to internal MarketPrediction
def _map_dto(dto):
if not dto: return None
return MarketPrediction(
market_type=dto.market_type,
pick=dto.pick,
probability=dto.probability,
confidence=dto.confidence,
odds=dto.odds,
is_recommended=dto.is_recommended,
is_value_bet=dto.is_value_bet,
edge=dto.edge
)
best_bet = _map_dto(rec_result.best_bet)
alt_bet = _map_dto(rec_result.alternative_bet)
recommended = [_map_dto(r) for r in rec_result.recommended_bets]
# Analysis Details
analysis_details = {
"home_form": f"Form Score: {round(0.5 + team_pred.form_advantage/2, 2)}",
"away_form": f"Form Score: {round(0.5 - team_pred.form_advantage/2, 2)}",
"key_players_missing": self._get_missing_desc(player_pred),
"referee_notes": f"{referee_name}: {round(referee_pred.avg_yellow_cards, 1)} Yellow Cards/Avg",
"market_trend": "Market data analyzed"
}
return FullMatchPrediction(
match_id=match_id,
home_team=home_team_name,
away_team=away_team_name,
# Match Result (Using Reconciled Final MS)
ms_home_prob=final_ms.ms_home_prob,
ms_draw_prob=final_ms.ms_draw_prob,
ms_away_prob=final_ms.ms_away_prob,
ms_pick=final_ms.ms_pick,
ms_confidence=final_ms.ms_confidence,
# Double Chance (Using Reconciled Final MS)
dc_1x_prob=final_ms.dc_1x_prob,
dc_x2_prob=final_ms.dc_x2_prob,
dc_12_prob=final_ms.dc_12_prob,
dc_pick=final_ms.dc_pick,
dc_confidence=final_ms.dc_confidence,
# Over/Under
over_15_prob=ou_result.over_15_prob,
under_15_prob=ou_result.under_15_prob,
ou15_pick=ou_result.ou15_pick,
ou15_confidence=ou_result.ou15_confidence,
over_25_prob=ou_result.over_25_prob,
under_25_prob=ou_result.under_25_prob,
ou25_pick=ou_result.ou25_pick,
ou25_confidence=ou_result.ou25_confidence,
over_35_prob=ou_result.over_35_prob,
under_35_prob=ou_result.under_35_prob,
ou35_pick=ou_result.ou35_pick,
ou35_confidence=ou_result.ou35_confidence,
# BTTS
btts_yes_prob=ou_result.btts_yes_prob,
btts_no_prob=ou_result.btts_no_prob,
btts_pick=ou_result.btts_pick,
btts_confidence=ou_result.btts_confidence,
# Half Time
ht_home_prob=ht_result.ht_home_prob,
ht_draw_prob=ht_result.ht_draw_prob,
ht_away_prob=ht_result.ht_away_prob,
ht_pick=ht_result.ht_pick,
ht_confidence=ht_result.ht_confidence,
# Score
score=score_result,
# HT O/U
ht_over_05_prob=ht_result.ht_over_05_prob,
ht_under_05_prob=ht_result.ht_under_05_prob,
ht_over_15_prob=ht_result.ht_over_15_prob,
ht_under_15_prob=ht_result.ht_under_15_prob,
ht_ou_pick=ht_result.ht_ou_pick,
ht_ou15_pick=ht_result.ht_ou15_pick,
# Scores (Reconciled check usually happens in ScoreCalc)
predicted_ft_score=score_result.predicted_ft_score,
predicted_ht_score=score_result.predicted_ht_score,
ft_scores_top5=score_result.ft_scores_top5,
# xG
home_xg=home_xg,
away_xg=away_xg,
total_xg=home_xg + away_xg,
# Others
total_corners_pred=other_result.total_corners_pred,
corner_pick=other_result.corner_pick,
total_cards_pred=other_result.total_cards_pred,
card_pick=other_result.card_pick,
cards_over_prob=other_result.cards_over_prob,
cards_under_prob=other_result.cards_under_prob,
cards_confidence=other_result.cards_confidence,
handicap_pick=other_result.handicap_pick,
handicap_home_prob=other_result.handicap_home_prob,
handicap_draw_prob=other_result.handicap_draw_prob,
handicap_away_prob=other_result.handicap_away_prob,
handicap_confidence=other_result.handicap_confidence,
odd_even_pick=other_result.odd_even_pick,
odd_prob=other_result.odd_prob,
even_prob=other_result.even_prob,
# Risk
risk_level=risk_result.risk_level,
risk_score=risk_result.risk_score,
is_surprise_risk=risk_result.is_surprise_risk,
surprise_type=risk_result.surprise_type,
ht_ft_probs=ctx.xgboost_preds.get("ht_ft", {}),
analysis_details=analysis_details,
risk_warnings=risk_result.risk_warnings,
# GLM-5 Sürpriz Skoru
upset_score=upset_factors_v2.upset_score,
upset_level=upset_factors_v2.upset_level,
upset_reasons=upset_factors_v2.reasoning,
# Engines
team_confidence=team_pred.confidence,
player_confidence=player_pred.confidence,
odds_confidence=odds_pred.confidence,
referee_confidence=referee_pred.confidence,
# Recs
best_bet=best_bet,
recommended_bets=recommended,
alternative_bet=alt_bet,
# Expert Recommendation (New)
expert_recommendation=expert_data
)
def _get_missing_desc(self, player_pred) -> List[str]:
if not player_pred.lineup_available:
return ["Lineups not confirmed"]
missing = []
if player_pred.home_missing_impact > 0.1:
missing.append(f"Home missing impact: {int(player_pred.home_missing_impact*100)}%")
if player_pred.away_missing_impact > 0.1:
missing.append(f"Away missing impact: {int(player_pred.away_missing_impact*100)}%")
return missing if missing else ["No significant missing players"]
# Singleton
_predictor: Optional[V20EnsemblePredictor] = None
def get_v20_predictor() -> V20EnsemblePredictor:
global _predictor
if _predictor is None:
_predictor = V20EnsemblePredictor()
return _predictor
if __name__ == "__main__":
predictor = get_v20_predictor()
print("\\n🧪 V20 Ensemble Beast Test")
print("=" * 60)
result = predictor.predict(
match_id="test_match",
home_team_id="test_home",
away_team_id="test_away",
home_team_name="Beşiktaş",
away_team_name="Galatasaray",
match_date_ms=1707393600000,
odds_data={
"ms_h": 2.50,
"ms_d": 3.20,
"ms_a": 2.80,
"ou25_o": 1.85
},
home_position=3,
away_position=1,
league_name="Süper Lig"
)
print(json.dumps(result.to_dict(), indent=2, ensure_ascii=False))