""" V20 Ensemble Beast - Main Predictor Combines 4 prediction engines with surprise detection. This is the primary interface for V20 predictions. """ import os import sys import math import json import pickle import time import psycopg2 import pandas as pd from typing import Dict, List, Optional, Tuple, Any from dataclasses import dataclass, field # Add paths sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from core.engines.team_predictor import get_team_predictor from core.engines.player_predictor import get_player_predictor from core.engines.odds_predictor import get_odds_predictor from core.engines.referee_predictor import get_referee_predictor from features.upset_engine import get_upset_engine from features.upset_engine_v2 import get_upset_engine_v2 from features.feature_adapter import get_feature_adapter from utils.top_leagues import load_top_league_ids from data.db import get_clean_dsn import xgboost as xgb from models.calibration import Calibrator # New Config & Calculators from config.config_loader import get_config from core.calculators.base_calculator import CalculationContext from core.calculators.match_result_calculator import MatchResultCalculator from core.calculators.over_under_calculator import OverUnderCalculator from core.calculators.half_time_calculator import HalfTimeCalculator from core.calculators.score_calculator import ScoreCalculator, ScorePrediction from core.calculators.other_markets_calculator import OtherMarketsCalculator from core.calculators.risk_assessor import RiskAssessor from core.calculators.bet_recommender import BetRecommender class _BoosterModelAdapter: """Adapter to provide predict_proba interface for raw xgboost.Booster models.""" def __init__(self, booster: xgb.Booster): self._booster = booster def predict_proba(self, features: pd.DataFrame): dmat = xgb.DMatrix(features) preds = self._booster.predict(dmat) if len(preds.shape) == 1: # binary: return [P(class0), P(class1)] return [[float(1.0 - p), float(p)] for p in preds] # multiclass: already (n, k) return preds @dataclass class MarketPrediction: """Prediction for a single betting market.""" market_type: str pick: str probability: float confidence: float odds: float = 0.0 is_recommended: bool = False is_value_bet: bool = False edge: float = 0.0 # Expected edge over market def to_dict(self) -> dict: return { "market_type": self.market_type, "pick": self.pick, "probability": round(self.probability * 100, 1), "confidence": round(self.confidence, 1), "odds": self.odds, "is_recommended": self.is_recommended, "is_value_bet": self.is_value_bet, "edge": round(self.edge, 1) } @dataclass class FullMatchPrediction: """Complete prediction for a match with ALL markets.""" match_id: str home_team: str away_team: str match_date: str = "" # === MAÇ SONUCU (1X2) === ms_home_prob: float = 0.33 ms_draw_prob: float = 0.33 ms_away_prob: float = 0.33 ms_pick: str = "" ms_confidence: float = 0.0 # === ÇİFTE ŞANS === dc_1x_prob: float = 0.66 dc_x2_prob: float = 0.66 dc_12_prob: float = 0.66 dc_pick: str = "" dc_confidence: float = 0.0 # === ALT/ÜST GOLLER === # 1.5 over_15_prob: float = 0.70 under_15_prob: float = 0.30 ou15_pick: str = "" ou15_confidence: float = 0.0 # 2.5 over_25_prob: float = 0.50 under_25_prob: float = 0.50 ou25_pick: str = "" ou25_confidence: float = 0.0 # 3.5 over_35_prob: float = 0.30 under_35_prob: float = 0.70 ou35_pick: str = "" ou35_confidence: float = 0.0 # === KARŞILIKLI GOL (BTTS) === btts_yes_prob: float = 0.50 btts_no_prob: float = 0.50 btts_pick: str = "" btts_confidence: float = 0.0 # === İLK YARI SONUCU === ht_home_prob: float = 0.30 ht_draw_prob: float = 0.40 ht_away_prob: float = 0.30 ht_pick: str = "" ht_confidence: float = 0.0 # === SKOR TAHMİNLERİ === score: Optional[ScorePrediction] = None predicted_ft_score: str = "1-1" predicted_ht_score: str = "0-0" ft_scores_top5: List[Dict] = field(default_factory=list) # === xG (Expected Goals) === home_xg: float = 1.3 away_xg: float = 1.1 total_xg: float = 2.4 # === RISK DEĞERLENDİRMESİ === risk_level: str = "MEDIUM" # LOW, MEDIUM, HIGH, EXTREME risk_score: float = 0.0 is_surprise_risk: bool = False surprise_type: str = "" risk_warnings: List[str] = field(default_factory=list) ht_ft_probs: Dict[str, float] = field(default_factory=dict) # === GLM-5 SÜRPRİZ SKORU === upset_score: int = 0 # 0-100 arası sürpriz skoru upset_level: str = "LOW" # LOW, MEDIUM, HIGH, EXTREME upset_reasons: List[str] = field(default_factory=list) # === SÜRPRİZ PROFİLİ === surprise_score: float = 0.0 # 0-100 overall surprise risk score surprise_comment: str = "" # Human-readable surprise commentary surprise_reasons: List[str] = field(default_factory=list) # Flagged risk reasons surprise_breakdown: List[Dict[str, Any]] = field(default_factory=list) # Per-factor {code, points, label} # === ENGINE KATKILARI === team_confidence: float = 0.0 player_confidence: float = 0.0 odds_confidence: float = 0.0 referee_confidence: float = 0.0 # === KORNER & KART & DİĞER === total_corners_pred: float = 9.5 corner_pick: str = "9.5 Üst" total_cards_pred: float = 4.5 card_pick: str = "4.5 Alt" cards_over_prob: float = 0.50 cards_under_prob: float = 0.50 cards_confidence: float = 0.0 handicap_pick: str = "" handicap_home_prob: float = 0.33 handicap_draw_prob: float = 0.34 handicap_away_prob: float = 0.33 handicap_confidence: float = 0.0 ht_over_05_prob: float = 0.65 ht_under_05_prob: float = 0.35 ht_over_15_prob: float = 0.30 ht_under_15_prob: float = 0.70 ht_ou_pick: str = "İY 0.5 Üst" ht_ou15_pick: str = "İY 1.5 Alt" odd_even_pick: str = "Çift" odd_prob: float = 0.50 # Tek olasılığı even_prob: float = 0.50 # Çift olasılığı # === TAVSİYELER (RECOMMENDATIONS) === best_bet: Optional[MarketPrediction] = None recommended_bets: List[MarketPrediction] = field(default_factory=list) alternative_bet: Optional[MarketPrediction] = None expert_recommendation: Dict[str, Any] = field(default_factory=dict) # === DETAILED ANALYSIS === analysis_details: Dict[str, Any] = field(default_factory=dict) def to_dict(self) -> dict: return { "match_info": { "match_id": self.match_id, "home_team": self.home_team, "away_team": self.away_team, "match_date": self.match_date }, "predictions": { "match_result": { "1": round(self.ms_home_prob * 100, 1), "X": round(self.ms_draw_prob * 100, 1), "2": round(self.ms_away_prob * 100, 1), "pick": self.ms_pick, "confidence": round(self.ms_confidence, 1) }, "double_chance": { "1X": round(self.dc_1x_prob * 100, 1), "X2": round(self.dc_x2_prob * 100, 1), "12": round(self.dc_12_prob * 100, 1), "pick": self.dc_pick, "confidence": round(self.dc_confidence, 1) }, "over_under": { "1.5": { "over": round(self.over_15_prob * 100, 1), "under": round(self.under_15_prob * 100, 1), "pick": self.ou15_pick, "confidence": round(self.ou15_confidence, 1) }, "2.5": { "over": round(self.over_25_prob * 100, 1), "under": round(self.under_25_prob * 100, 1), "pick": self.ou25_pick, "confidence": round(self.ou25_confidence, 1) }, "3.5": { "over": round(self.over_35_prob * 100, 1), "under": round(self.under_35_prob * 100, 1), "pick": self.ou35_pick, "confidence": round(self.ou35_confidence, 1) } }, "btts": { "yes": round(self.btts_yes_prob * 100, 1), "no": round(self.btts_no_prob * 100, 1), "pick": self.btts_pick, "confidence": round(self.btts_confidence, 1) }, "first_half": { "1": round(self.ht_home_prob * 100, 1), "X": round(self.ht_draw_prob * 100, 1), "2": round(self.ht_away_prob * 100, 1), "pick": self.ht_pick, "confidence": round(self.ht_confidence, 1), "over_under_05": { "over": round(self.ht_over_05_prob * 100, 1), "under": round(self.ht_under_05_prob * 100, 1), "pick": self.ht_ou_pick }, "over_under_15": { "over": round(self.ht_over_15_prob * 100, 1), "under": round(self.ht_under_15_prob * 100, 1), "pick": self.ht_ou15_pick } }, "scores": { "predicted_ft": self.predicted_ft_score, "predicted_ht": self.predicted_ht_score, "top_5_ft_scores": self.ft_scores_top5 }, "others": { "handicap": { "pick": self.handicap_pick, "confidence": round(self.handicap_confidence, 1), "home": round(self.handicap_home_prob * 100, 1), "draw": round(self.handicap_draw_prob * 100, 1), "away": round(self.handicap_away_prob * 100, 1) }, "corners": { "total": round(self.total_corners_pred, 1), "pick": self.corner_pick }, "cards": { "total": round(self.total_cards_pred, 1), "pick": self.card_pick, "confidence": round(self.cards_confidence, 1), "over": round(self.cards_over_prob * 100, 1), "under": round(self.cards_under_prob * 100, 1) }, "odd_even": { "pick": self.odd_even_pick, "tek": round(self.odd_prob * 100, 1), "cift": round(self.even_prob * 100, 1) } }, "xg": { "home": round(self.home_xg, 2), "away": round(self.away_xg, 2), "total": round(self.total_xg, 2) } }, "risk": { "level": self.risk_level, "score": round(self.risk_score, 1), "is_surprise_risk": self.is_surprise_risk, "surprise_type": self.surprise_type, "ht_ft_probs": {k: round(v * 100, 1) for k, v in self.ht_ft_probs.items()} if self.ht_ft_probs else {}, "warnings": self.risk_warnings }, "upset_analysis": { "score": self.upset_score, "level": self.upset_level, "reasons": self.upset_reasons }, "engine_breakdown": { "team_engine": round(self.team_confidence, 1), "player_engine": round(self.player_confidence, 1), "odds_engine": round(self.odds_confidence, 1), "referee_engine": round(self.referee_confidence, 1) }, "recommendations": { "best_bet": self.best_bet.to_dict() if self.best_bet else None, "all_recommended": [b.to_dict() for b in self.recommended_bets] if self.recommended_bets else [], "alternative_bet": self.alternative_bet.to_dict() if self.alternative_bet else None }, "analysis_details": self.analysis_details } class V20EnsemblePredictor: HTFT_LABELS = ("1/1", "1/X", "1/2", "X/1", "X/X", "X/2", "2/1", "2/X", "2/2") # Neutral defaults when MS odds are missing: avoid synthetic home-favorite bias. DEFAULT_MS_H = 2.65 DEFAULT_MS_D = 3.20 DEFAULT_MS_A = 2.65 FOOTBALL_TOP_PRIOR = ( 0.263760, 0.051786, 0.022942, 0.150168, 0.157798, 0.106064, 0.027622, 0.051226, 0.168634, ) FOOTBALL_NON_TOP_PRIOR = ( 0.265113, 0.048306, 0.020399, 0.147020, 0.152383, 0.113075, 0.026542, 0.046356, 0.180805, ) # Top-league football priors conditioned on favorite side from MS (1X2) odds. # Label order follows HTFT_LABELS. FOOTBALL_TOP_PRIOR_HOME_FAV = ( 0.321707, 0.054165, 0.017952, 0.179729, 0.161674, 0.078991, 0.031186, 0.047394, 0.107201, ) FOOTBALL_TOP_PRIOR_AWAY_FAV = ( 0.130654, 0.049139, 0.033754, 0.081975, 0.156142, 0.167164, 0.020207, 0.058324, 0.302641, ) FOOTBALL_TOP_PRIOR_BALANCED = ( 0.169429, 0.052486, 0.028545, 0.144567, 0.209024, 0.116943, 0.026703, 0.053407, 0.198895, ) def __init__(self): print("🚀 Initializing V20 Ensemble Beast...") self.config = get_config() # Engines self.team_engine = get_team_predictor() self.player_engine = get_player_predictor() self.odds_engine = get_odds_predictor() self.referee_engine = get_referee_predictor() self.upset_engine = get_upset_engine() self.upset_engine_v2 = get_upset_engine_v2() # GLM-5 enhanced # Calculators print("⚙️ Loading market calculators...") cfg: Any = self.config self.match_result_calc = MatchResultCalculator(cfg) self.over_under_calc = OverUnderCalculator(cfg) self.half_time_calc = HalfTimeCalculator(cfg) self.score_calc = ScoreCalculator(cfg) print(" ✅ Score Calculator (XGBoost FT+HT) loaded") self.other_markets_calc = OtherMarketsCalculator(cfg) self.risk_assessor = RiskAssessor(cfg) self.bet_recommender = BetRecommender(cfg) # Expert Recommender (New Logic) from core.calculators.expert_recommender import ExpertRecommender self.expert_recommender = ExpertRecommender(cfg) # XGBoost Integration print("🤖 Loading XGBoost models...") self.feature_adapter = get_feature_adapter() self.calibrator = Calibrator() self.xgb_models = {} self.top_league_ids = load_top_league_ids() print(f"📋 Loaded {len(self.top_league_ids)} top leagues for HT/FT tuning") self.db_dsn = get_clean_dsn() self.league_htft_prior_cache: Dict[Tuple[str, str], Optional[Tuple[float, ...]]] = {} xgb_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "models", "xgboost") model_files = { "ms": "xgb_ms", "ou25": "xgb_ou25", "btts": "xgb_btts", "ht_ft": "xgb_ht_ft", "ht_result": "xgb_ht_result", "ht_ou05": "xgb_ht_ou05", "ht_ou15": "xgb_ht_ou15", "odd_even": "xgb_odd_even", "ou15": "xgb_ou15", "ou35": "xgb_ou35", "handicap_ms": "xgb_handicap_ms", "cards_ou45": "xgb_cards_ou45", } only_keys = os.getenv("XGB_MODEL_KEYS", "").strip() if only_keys: selected_keys = {k.strip().lower() for k in only_keys.split(",") if k.strip()} model_files = {k: v for k, v in model_files.items() if k in selected_keys} if model_files: print(f"ℹ️ XGB_MODEL_KEYS active -> loading only: {', '.join(sorted(model_files.keys()))}") else: print("⚠️ XGB_MODEL_KEYS set but no valid keys matched. Loading none.") for key, base_name in model_files.items(): print(f" ⏳ Loading {key} from {base_name}.pkl/.json...", flush=True) model, src, err = self._load_xgb_model(xgb_dir, base_name) if model is not None: self.xgb_models[key] = model print(f" ✅ Loaded {key} ({src})") elif err: print(f" ⚠️ Failed to load {base_name}: {err}") else: print(f" ⚠️ Model not found: {base_name}.pkl or {base_name}.json") print("✅ V20 Ensemble Beast ready!") @staticmethod def _load_xgb_model(xgb_dir: str, base_name: str): pkl_path = os.path.join(xgb_dir, f"{base_name}.pkl") json_path = os.path.join(xgb_dir, f"{base_name}.json") if os.path.exists(pkl_path): started = time.perf_counter() with open(pkl_path, "rb") as f: model = pickle.load(f) elapsed = time.perf_counter() - started return model, f"pkl {elapsed:.2f}s", None if os.path.exists(json_path): started = time.perf_counter() # Preferred path: sklearn wrapper with predict_proba try: model = xgb.XGBClassifier() model.load_model(json_path) elapsed = time.perf_counter() - started return model, f"json {elapsed:.2f}s", None except Exception: # Fallback: raw Booster + adapter try: booster = xgb.Booster() booster.load_model(json_path) model = _BoosterModelAdapter(booster) elapsed = time.perf_counter() - started return model, f"json/booster {elapsed:.2f}s", None except Exception as e: return None, "", e return None, "", None @staticmethod def _safe_odd(value: Any) -> float: try: odd = float(value) return odd if odd > 1.01 else 0.0 except (TypeError, ValueError): return 0.0 @staticmethod def _align_features(features: pd.DataFrame, model) -> pd.DataFrame: """Align DataFrame columns to the model's expected feature set. Supports: - sklearn wrappers (XGBClassifier / LGBMClassifier) → feature_names_in_ - raw xgboost.Booster → feature_names - _BoosterModelAdapter → _booster.feature_names If the model doesn't expose feature names, returns the DataFrame as-is. """ expected: Optional[List[str]] = None # 1. sklearn wrapper (XGBClassifier, LGBMClassifier, CalibratedClassifierCV) if hasattr(model, 'feature_names_in_'): expected = list(model.feature_names_in_) # 2. _BoosterModelAdapter elif hasattr(model, '_booster') and hasattr(model._booster, 'feature_names'): expected = model._booster.feature_names # 3. raw xgboost.Booster elif hasattr(model, 'feature_names') and model.feature_names: expected = list(model.feature_names) if expected is None: return features # Only keep columns that the model expects (order preserved) available = [col for col in expected if col in features.columns] if len(available) < len(expected): missing = set(expected) - set(available) print(f"⚠️ Feature alignment: {len(missing)} missing features filled with 0: {sorted(missing)[:5]}{'...' if len(missing) > 5 else ''}") # Add missing columns with 0 (safe neutral default) for col in expected: if col not in features.columns: features = features.copy() features[col] = 0.0 return features[expected] # type: ignore[return-value] def _favorite_profile_from_odds(self, odds_data: Dict[str, float]) -> Tuple[str, float]: """ Returns (favorite_side, gap_to_second_favorite). favorite_side: H, A, D, or U (unknown) """ ms_h = self._safe_odd((odds_data or {}).get("ms_h")) ms_d = self._safe_odd((odds_data or {}).get("ms_d")) ms_a = self._safe_odd((odds_data or {}).get("ms_a")) candidates = [(side, odd) for side, odd in (("H", ms_h), ("D", ms_d), ("A", ms_a)) if odd > 0.0] if len(candidates) < 2: return "U", 0.0 candidates.sort(key=lambda item: item[1]) favorite_side, favorite_odd = candidates[0] second_odd = candidates[1][1] return favorite_side, max(0.0, second_odd - favorite_odd) def _favorite_side_from_ms_odds( self, odds_data: Dict[str, float], ) -> str: """ Returns side from MS home/away odds only: - H: home favorite - A: away favorite - B: balanced (home and away near-equal) - U: unknown """ ms_h = self._safe_odd((odds_data or {}).get("ms_h")) ms_a = self._safe_odd((odds_data or {}).get("ms_a")) if ms_h <= 0.0 or ms_a <= 0.0: return "U" balance_gap = float(self.config.get("risk.htft_favorite_balance_gap", 0.20)) if abs(ms_h - ms_a) <= balance_gap: return "B" return "H" if ms_h < ms_a else "A" def _get_top_odds_conditioned_prior( self, odds_data: Dict[str, float], ) -> Optional[Tuple[float, ...]]: side = self._favorite_side_from_ms_odds(odds_data) if side == "H": return self.FOOTBALL_TOP_PRIOR_HOME_FAV if side == "A": return self.FOOTBALL_TOP_PRIOR_AWAY_FAV if side == "B": return self.FOOTBALL_TOP_PRIOR_BALANCED return None def _is_top_league(self, league_id: Optional[str]) -> bool: if not league_id: return False return str(league_id) in self.top_league_ids def _get_htft_league_prior( self, league_id: Optional[str], sport: str, ) -> Optional[Tuple[float, ...]]: sport_key = (sport or "").lower().strip() if sport_key != "football" or not league_id: return None cache_key = (sport_key, str(league_id)) if cache_key in self.league_htft_prior_cache: return self.league_htft_prior_cache[cache_key] min_samples = int(self.config.get("risk.htft_prior_min_matches", 300)) combo_counts = {label: 0 for label in self.HTFT_LABELS} try: with psycopg2.connect(self.db_dsn) as conn: with conn.cursor() as cur: cur.execute( """ WITH base AS ( SELECT CASE WHEN ht_score_home > ht_score_away THEN '1' WHEN ht_score_home = ht_score_away THEN 'X' ELSE '2' END AS ht, CASE WHEN score_home > score_away THEN '1' WHEN score_home = score_away THEN 'X' ELSE '2' END AS ft FROM matches WHERE status = 'FT' AND sport = %s AND league_id = %s AND ht_score_home IS NOT NULL AND ht_score_away IS NOT NULL AND score_home IS NOT NULL AND score_away IS NOT NULL ) SELECT ht || '/' || ft AS combo, COUNT(*)::bigint AS n FROM base GROUP BY combo """, (sport_key, str(league_id)), ) rows = cur.fetchall() except Exception: self.league_htft_prior_cache[cache_key] = None return None total = 0 for combo, n in rows: if combo in combo_counts: combo_counts[combo] = int(n) total += int(n) if total < min_samples: self.league_htft_prior_cache[cache_key] = None return None prior = tuple(combo_counts[label] / total for label in self.HTFT_LABELS) self.league_htft_prior_cache[cache_key] = prior return prior def _postprocess_htft_probs( self, raw_probs: List[float], odds_data: Optional[Dict[str, float]] = None, sport: str = "football", is_top_league: bool = False, league_id: Optional[str] = None, ) -> List[float]: """ Stabilize HT/FT class probabilities. Why: - HT/FT reversals (1/2, 2/1) are rare and can be overestimated. - We preserve ranking signal but make absolute probabilities conservative. """ probs = [max(1e-9, float(p)) for p in raw_probs[:9]] if len(probs) != 9: return [1.0 / 9.0] * 9 # Global calibration pass for HT/FT market. probs = [self.calibrator.calibrate("ht_ft", p) for p in probs] sport_key = (sport or "football").lower().strip() # Temperature > 1.0 flattens over-confident distributions. if sport_key == "basketball": if is_top_league: temperature = float( self.config.get("risk.htft_temperature_basketball_top", self.config.get("risk.htft_temperature_basketball", 1.08)), ) else: temperature = float( self.config.get("risk.htft_temperature_basketball_non_top", 1.15), ) else: if is_top_league: temperature = float( self.config.get("risk.htft_temperature_top", self.config.get("risk.htft_temperature", 1.25)), ) else: temperature = float( self.config.get("risk.htft_temperature_non_top", 1.35), ) if temperature > 1.0: inv_t = 1.0 / temperature probs = [p**inv_t for p in probs] # Extra damping for reversal classes: 1/2 (idx 2), 2/1 (idx 6). if is_top_league: base_reversal_multiplier = float( self.config.get("risk.htft_reversal_multiplier_top", self.config.get("risk.htft_reversal_multiplier", 0.60)), ) favorite_reversal_multiplier = float( self.config.get( "risk.htft_reversal_multiplier_favorite_top", self.config.get("risk.htft_reversal_multiplier_favorite", 0.72), ), ) underdog_reversal_multiplier = float( self.config.get( "risk.htft_reversal_multiplier_underdog_top", self.config.get("risk.htft_reversal_multiplier_underdog", 0.45), ), ) basketball_reversal_multiplier = float( self.config.get( "risk.htft_reversal_multiplier_basketball_top", self.config.get("risk.htft_reversal_multiplier_basketball", 0.90), ), ) else: base_reversal_multiplier = float(self.config.get("risk.htft_reversal_multiplier_non_top", 0.45)) favorite_reversal_multiplier = float( self.config.get("risk.htft_reversal_multiplier_favorite_non_top", 0.55), ) underdog_reversal_multiplier = float( self.config.get("risk.htft_reversal_multiplier_underdog_non_top", 0.30), ) basketball_reversal_multiplier = float( self.config.get("risk.htft_reversal_multiplier_basketball_non_top", 0.75), ) gap_medium = float(self.config.get("risk.htft_reversal_gap_medium", 0.50)) gap_strong = float(self.config.get("risk.htft_reversal_gap_strong", 1.00)) favorite_side, favorite_gap = self._favorite_profile_from_odds(odds_data or {}) def _reversal_multiplier(winner_side: str) -> float: if sport_key == "basketball": return basketball_reversal_multiplier multiplier = base_reversal_multiplier if favorite_side in ("H", "A"): multiplier = ( favorite_reversal_multiplier if winner_side == favorite_side else underdog_reversal_multiplier ) # If market heavily favors one side, penalize underdog-reversal harder. if winner_side != favorite_side and favorite_gap >= gap_strong: multiplier *= 0.80 elif winner_side != favorite_side and favorite_gap >= gap_medium: multiplier *= 0.90 return max(0.20, min(1.10, multiplier)) # 1/2 => winner is Away, 2/1 => winner is Home probs[2] *= _reversal_multiplier("A") probs[6] *= _reversal_multiplier("H") # Prior blend for football (league-specific if sufficient sample size). if sport_key == "football": league_prior = self._get_htft_league_prior(league_id=league_id, sport=sport_key) if league_prior is not None: prior = league_prior blend = float(self.config.get("risk.htft_prior_blend_league", 0.65)) else: prior = self.FOOTBALL_TOP_PRIOR if is_top_league else self.FOOTBALL_NON_TOP_PRIOR blend = float( self.config.get( "risk.htft_prior_blend_top" if is_top_league else "risk.htft_prior_blend_non_top", 0.50 if is_top_league else 0.58, ), ) if is_top_league: side_prior = self._get_top_odds_conditioned_prior(odds_data or {}) if side_prior is not None: if league_prior is not None: odds_prior_blend = float( self.config.get("risk.htft_prior_odds_blend_top_with_league", 0.22), ) else: odds_prior_blend = float( self.config.get("risk.htft_prior_odds_blend_top", 0.35), ) odds_prior_blend = max(0.0, min(0.80, odds_prior_blend)) prior = tuple( ((1.0 - odds_prior_blend) * prior[idx]) + (odds_prior_blend * side_prior[idx]) for idx in range(9) ) blend = max(0.0, min(0.95, blend)) probs = [((1.0 - blend) * p) + (blend * prior[idx]) for idx, p in enumerate(probs)] # Hard cap reversal classes by prior factor to avoid unrealistic spikes. cap_factor = float(self.config.get("risk.htft_reversal_cap_factor", 2.3)) cap_factor = max(1.0, cap_factor) for idx in (2, 6): cap_val = prior[idx] * cap_factor if probs[idx] > cap_val: probs[idx] = cap_val total = sum(probs) if total <= 0: return [1.0 / 9.0] * 9 return [p / total for p in probs] def predict(self, match_id: str, home_team_id: str, away_team_id: str, home_team_name: str, away_team_name: str, match_date_ms: int, odds_data: Optional[Dict[str, float]] = None, home_lineup: Optional[List[str]] = None, away_lineup: Optional[List[str]] = None, referee_name: Optional[str] = None, home_goals_avg: float = 1.5, home_conceded_avg: float = 1.2, away_goals_avg: float = 1.2, away_conceded_avg: float = 1.4, home_position: int = 10, away_position: int = 10, league_name: str = "", league_id: Optional[str] = None, sport: str = "football", sidelined_data: Optional[Dict] = None) -> FullMatchPrediction: """ Generate complete V20 ensemble prediction. Returns FullMatchPrediction with ALL markets. """ # Default odds if not provided if odds_data is None: odds_data = { "ms_h": self.DEFAULT_MS_H, "ms_d": self.DEFAULT_MS_D, "ms_a": self.DEFAULT_MS_A, } # === 1. COLLECT ALL ENGINE PREDICTIONS === team_pred = self.team_engine.predict( home_team_id=home_team_id, away_team_id=away_team_id, match_date_ms=match_date_ms, home_team_name=home_team_name, away_team_name=away_team_name ) player_pred = self.player_engine.predict( match_id=match_id, home_team_id=home_team_id, away_team_id=away_team_id, home_lineup=home_lineup, away_lineup=away_lineup, sidelined_data=sidelined_data ) odds_pred = self.odds_engine.predict( odds_data=odds_data, home_goals_avg=home_goals_avg, home_conceded_avg=home_conceded_avg, away_goals_avg=away_goals_avg, away_conceded_avg=away_conceded_avg ) referee_pred = self.referee_engine.predict( match_id=match_id, referee_name=referee_name or "", league_id=league_id or "" ) upset_factors = self.upset_engine.calculate_upset_potential( home_team_name=home_team_name, home_team_id=home_team_id, away_team_name=away_team_name, league_name=league_name, home_position=home_position, away_position=away_position, match_date_ms=match_date_ms ) # GLM-5 Enhanced Upset Detection v2 # Determine favorite from odds favorite_side = "home" favorite_odds = odds_data.get("ms_h", 2.0) if odds_data else 2.0 if odds_data: ms_h = odds_data.get("ms_h", 999) ms_a = odds_data.get("ms_a", 999) if ms_a < ms_h: favorite_side = "away" favorite_odds = ms_a elif ms_h < ms_a: favorite_side = "home" favorite_odds = ms_h else: favorite_side = "draw" favorite_odds = odds_data.get("ms_d", 3.0) upset_factors_v2 = self.upset_engine_v2.calculate_upset_potential( home_team_name=home_team_name, home_team_id=home_team_id, away_team_name=away_team_name, league_name=league_name, home_position=home_position, away_position=away_position, match_date_ms=match_date_ms, odds_data=odds_data, referee_name=referee_name or "", home_form_score=getattr(team_pred, 'home_form_score', 50.0), away_form_score=getattr(team_pred, 'away_form_score', 50.0), favorite_side=favorite_side, favorite_odds=favorite_odds ) # === 2. DYNAMIC ENGINE WEIGHTS === w_team = self.config.get("engine_weights.team", 0.30) w_player = self.config.get("engine_weights.player", 0.25) w_odds = self.config.get("engine_weights.odds", 0.30) w_referee = self.config.get("engine_weights.referee", 0.15) # Redistribution Logic if not player_pred.lineup_available: min_w = self.config.get("engine_weights.min_weight", 0.05) surplus = w_player - min_w w_player = min_w w_team += surplus * self.config.get("weight_redistribution.player_missing_to_team", 0.5) w_odds += surplus * self.config.get("weight_redistribution.player_missing_to_odds", 0.5) min_ref_matches = self.config.get("weight_redistribution.referee_min_matches", 5) if referee_pred.matches_officiated < min_ref_matches: min_w = self.config.get("engine_weights.min_weight", 0.05) surplus = w_referee - min_w w_referee = min_w w_team += surplus * self.config.get("weight_redistribution.referee_missing_to_team", 0.4) w_odds += surplus * self.config.get("weight_redistribution.referee_missing_to_odds", 0.6) # Normalize w_total = w_team + w_player + w_odds + w_referee weights = { "team": w_team / w_total, "player": w_player / w_total, "odds": w_odds / w_total, "referee": w_referee / w_total } # Get Modifiers player_mods = self.player_engine.get_1x2_modifier(player_pred) referee_mods = self.referee_engine.get_modifiers(referee_pred) # Calculate xG (Used by multiple calculators) home_xg = (team_pred.home_xg + odds_pred.poisson_home_xg) / 2 away_xg = (team_pred.away_xg + odds_pred.poisson_away_xg) / 2 # === 3. CREATE CONTEXT === ctx = CalculationContext( team_pred=team_pred, player_pred=player_pred, odds_pred=odds_pred, referee_pred=referee_pred, upset_factors=upset_factors, weights=weights, player_mods=player_mods, referee_mods=referee_mods, match_id=match_id, home_team_name=home_team_name, away_team_name=away_team_name, odds_data=odds_data, home_xg=home_xg, away_xg=away_xg, total_xg=home_xg + away_xg, league_id=league_id, sport=(sport or "football").lower().strip(), is_top_league=self._is_top_league(league_id), ) # === 4. XGBOOST INFERENCE === try: # Prepare features (1 row DataFrame) xgb_features = self.feature_adapter.get_features(ctx) # Predict — per-model feature alignment for key, model in self.xgb_models.items(): try: model_features = self._align_features(xgb_features, model) raw_pred = model.predict_proba(model_features) except Exception as model_err: print(f"⚠️ XGBoost {key} inference failed: {model_err}") continue # Handle multi-class (MS, HT_RESULT, HT/FT) vs binary if key in ("ms", "ht_result"): # raw_pred is (1, 3) probs = raw_pred[0] # [Home, Draw, Away] ctx.xgboost_preds[key] = { "home": float(probs[0]), "draw": float(probs[1]), "away": float(probs[2]) } elif key == "handicap_ms": probs = raw_pred[0] # [H1, HX, H2] ctx.xgboost_preds[key] = { "h1": float(probs[0]), "hx": float(probs[1]), "h2": float(probs[2]) } elif key == "ht_ft": # raw_pred is (1, 9) raw_probs = [float(p) for p in raw_pred[0]] probs = self._postprocess_htft_probs( raw_probs, odds_data=odds_data, sport=sport, is_top_league=ctx.is_top_league, league_id=league_id, ) ctx.xgboost_preds[key] = { label: float(probs[idx]) for idx, label in enumerate(self.HTFT_LABELS) } # Keep raw vector for optional calculators/debug consumers. ctx.xgboost_preds["ht_ft_raw"] = raw_probs else: # Binary (OU/BTTS) - index 1 is the positive class probability prob = float(raw_pred[0][1]) ctx.xgboost_preds[key] = prob except Exception as e: print(f"⚠️ XGBoost Inference Failed: {e}") import traceback traceback.print_exc() # === 5. RUN CALCULATORS === ms_result = self.match_result_calc.calculate(ctx) ou_result = self.over_under_calc.calculate(ctx) ht_result = self.half_time_calc.calculate(ctx) score_result = self.score_calc.calculate(ctx, ms_result) other_result = self.other_markets_calc.calculate(ctx, ms_result) risk_result = self.risk_assessor.calculate(ctx, ms_result) # Use Reconciled Result final_ms = score_result.reconciled_ms if score_result.reconciled_ms else ms_result # Expert Recommendation (New Logic) expert_result = self.expert_recommender.calculate(ctx, final_ms, ou_result, risk_result) expert_data = {} if expert_result: expert_data = { "main_pick": expert_result.main_pick, "safe_alternative": expert_result.safe_alternative, "value_picks": expert_result.value_picks, "surprise_picks": expert_result.surprise_picks, "market_summary": expert_result.market_summary } # Update context with risk info for recommender ctx.risk_level = risk_result.risk_level ctx.is_surprise = risk_result.is_surprise_risk rec_result = self.bet_recommender.calculate(ctx, final_ms, ou_result, risk_result) # === 5. ASSEMBLE PREDICTION === # Map MarketPredictionDTO to internal MarketPrediction def _map_dto(dto): if not dto: return None return MarketPrediction( market_type=dto.market_type, pick=dto.pick, probability=dto.probability, confidence=dto.confidence, odds=dto.odds, is_recommended=dto.is_recommended, is_value_bet=dto.is_value_bet, edge=dto.edge ) best_bet = _map_dto(rec_result.best_bet) alt_bet = _map_dto(rec_result.alternative_bet) recommended = [m for m in (_map_dto(r) for r in rec_result.recommended_bets) if m is not None] # Analysis Details analysis_details = { "home_form": f"Form Score: {round(0.5 + team_pred.form_advantage/2, 2)}", "away_form": f"Form Score: {round(0.5 - team_pred.form_advantage/2, 2)}", "key_players_missing": self._get_missing_desc(player_pred), "referee_notes": f"{referee_name}: {round(referee_pred.avg_yellow_cards, 1)} Yellow Cards/Avg", "market_trend": "Market data analyzed" } return FullMatchPrediction( match_id=match_id, home_team=home_team_name, away_team=away_team_name, # Match Result (Using Reconciled Final MS) ms_home_prob=final_ms.ms_home_prob, ms_draw_prob=final_ms.ms_draw_prob, ms_away_prob=final_ms.ms_away_prob, ms_pick=final_ms.ms_pick, ms_confidence=final_ms.ms_confidence, # Double Chance (Using Reconciled Final MS) dc_1x_prob=final_ms.dc_1x_prob, dc_x2_prob=final_ms.dc_x2_prob, dc_12_prob=final_ms.dc_12_prob, dc_pick=final_ms.dc_pick, dc_confidence=final_ms.dc_confidence, # Over/Under over_15_prob=ou_result.over_15_prob, under_15_prob=ou_result.under_15_prob, ou15_pick=ou_result.ou15_pick, ou15_confidence=ou_result.ou15_confidence, over_25_prob=ou_result.over_25_prob, under_25_prob=ou_result.under_25_prob, ou25_pick=ou_result.ou25_pick, ou25_confidence=ou_result.ou25_confidence, over_35_prob=ou_result.over_35_prob, under_35_prob=ou_result.under_35_prob, ou35_pick=ou_result.ou35_pick, ou35_confidence=ou_result.ou35_confidence, # BTTS btts_yes_prob=ou_result.btts_yes_prob, btts_no_prob=ou_result.btts_no_prob, btts_pick=ou_result.btts_pick, btts_confidence=ou_result.btts_confidence, # Half Time ht_home_prob=ht_result.ht_home_prob, ht_draw_prob=ht_result.ht_draw_prob, ht_away_prob=ht_result.ht_away_prob, ht_pick=ht_result.ht_pick, ht_confidence=ht_result.ht_confidence, # Score score=score_result, # HT O/U ht_over_05_prob=ht_result.ht_over_05_prob, ht_under_05_prob=ht_result.ht_under_05_prob, ht_over_15_prob=ht_result.ht_over_15_prob, ht_under_15_prob=ht_result.ht_under_15_prob, ht_ou_pick=ht_result.ht_ou_pick, ht_ou15_pick=ht_result.ht_ou15_pick, # Scores (Reconciled check usually happens in ScoreCalc) predicted_ft_score=score_result.predicted_ft_score, predicted_ht_score=score_result.predicted_ht_score, ft_scores_top5=score_result.ft_scores_top5, # xG home_xg=home_xg, away_xg=away_xg, total_xg=home_xg + away_xg, # Others total_corners_pred=other_result.total_corners_pred, corner_pick=other_result.corner_pick or "", total_cards_pred=other_result.total_cards_pred, card_pick=other_result.card_pick or "", cards_over_prob=other_result.cards_over_prob, cards_under_prob=other_result.cards_under_prob, cards_confidence=other_result.cards_confidence, handicap_pick=other_result.handicap_pick or "", handicap_home_prob=other_result.handicap_home_prob, handicap_draw_prob=other_result.handicap_draw_prob, handicap_away_prob=other_result.handicap_away_prob, handicap_confidence=other_result.handicap_confidence, odd_even_pick=other_result.odd_even_pick, odd_prob=other_result.odd_prob, even_prob=other_result.even_prob, # Risk risk_level=risk_result.risk_level, risk_score=risk_result.risk_score, is_surprise_risk=risk_result.is_surprise_risk, surprise_type=risk_result.surprise_type, ht_ft_probs=ctx.xgboost_preds.get("ht_ft", {}), analysis_details=analysis_details, risk_warnings=risk_result.risk_warnings, # GLM-5 Sürpriz Skoru upset_score=upset_factors_v2.upset_score, upset_level=upset_factors_v2.upset_level, upset_reasons=upset_factors_v2.reasoning, # Engines team_confidence=team_pred.confidence, player_confidence=player_pred.confidence, odds_confidence=odds_pred.confidence, referee_confidence=referee_pred.confidence, # Recs best_bet=best_bet, recommended_bets=recommended, alternative_bet=alt_bet, # Expert Recommendation (New) expert_recommendation=expert_data ) def _get_missing_desc(self, player_pred) -> List[str]: if not player_pred.lineup_available: return ["Lineups not confirmed"] missing = [] if player_pred.home_missing_impact > 0.1: missing.append(f"Home missing impact: {int(player_pred.home_missing_impact*100)}%") if player_pred.away_missing_impact > 0.1: missing.append(f"Away missing impact: {int(player_pred.away_missing_impact*100)}%") return missing if missing else ["No significant missing players"] # Singleton _predictor: Optional[V20EnsemblePredictor] = None def get_v20_predictor() -> V20EnsemblePredictor: global _predictor if _predictor is None: _predictor = V20EnsemblePredictor() return _predictor if __name__ == "__main__": predictor = get_v20_predictor() print("\\n🧪 V20 Ensemble Beast Test") print("=" * 60) result = predictor.predict( match_id="test_match", home_team_id="test_home", away_team_id="test_away", home_team_name="Beşiktaş", away_team_name="Galatasaray", match_date_ms=1707393600000, odds_data={ "ms_h": 2.50, "ms_d": 3.20, "ms_a": 2.80, "ou25_o": 1.85 }, home_position=3, away_position=1, league_name="Süper Lig" ) print(json.dumps(result.to_dict(), indent=2, ensure_ascii=False))