From 9027cc9900be15a94ab9e9b1b00f0951bba49d1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fahri=20Can=20Se=C3=A7er?= Date: Fri, 24 Apr 2026 23:46:28 +0300 Subject: [PATCH] v28 --- .gitignore | 6 +- ai-engine/features/odds_band_analyzer.py | 32 +- ai-engine/main.py | 22 +- ai-engine/models/calibration.py | 413 ++++++ ai-engine/models/v20_ensemble.py | 1282 +++++++++++++++++ ai-engine/models/v25_ensemble.py | 645 +++++++++ ai-engine/models/v27_predictor.py | 291 ++++ ai-engine/services/betting_brain.py | 497 +++++++ .../services/single_match_orchestrator.py | 780 +++++++++- package.json | 2 +- .../feeder/feeder-persistence.service.ts | 43 +- src/modules/feeder/feeder.service.ts | 14 +- src/modules/matches/matches.service.ts | 269 ++++ .../predictions/predictions.controller.ts | 9 +- .../predictions/predictions.service.ts | 79 +- src/scripts/run-feeder-previous-day.ts | 39 + src/scripts/run-feeder.ts | 14 +- 17 files changed, 4315 insertions(+), 122 deletions(-) create mode 100644 ai-engine/models/calibration.py create mode 100644 ai-engine/models/v20_ensemble.py create mode 100644 ai-engine/models/v25_ensemble.py create mode 100644 ai-engine/models/v27_predictor.py create mode 100644 ai-engine/services/betting_brain.py create mode 100644 src/scripts/run-feeder-previous-day.ts diff --git a/.gitignore b/.gitignore index 415def7..6085122 100644 --- a/.gitignore +++ b/.gitignore @@ -42,7 +42,9 @@ uploads/ public/uploads/ # Large Datasets and ML Models -ai-engine/models/ -models/ +ai-engine/models/* +!ai-engine/models/*.py +models/* +!models/*.py colab_export/ diff --git a/ai-engine/features/odds_band_analyzer.py b/ai-engine/features/odds_band_analyzer.py index ba4aacc..5cadc10 100644 --- a/ai-engine/features/odds_band_analyzer.py +++ b/ai-engine/features/odds_band_analyzer.py @@ -323,8 +323,8 @@ class OddsBandAnalyzer: m.home_team_id, m.away_team_id, CASE - WHEN m.home_team_id = %(team_id)s THEN os_sel.odd_value - ELSE os_sel2.odd_value + WHEN m.home_team_id = %(team_id)s THEN os_sel.odd_value::numeric + ELSE os_sel2.odd_value::numeric END AS team_odds FROM matches m JOIN odd_categories oc @@ -344,7 +344,7 @@ class OddsBandAnalyzer: AND m.score_home IS NOT NULL AND m.score_away IS NOT NULL AND m.mst_utc < %(before_ts)s - AND COALESCE(os_sel.odd_value, os_sel2.odd_value) + AND COALESCE(os_sel.odd_value::numeric, os_sel2.odd_value::numeric) BETWEEN %(band_low)s AND %(band_high)s ORDER BY m.mst_utc DESC LIMIT %(max_lookback)s @@ -432,7 +432,7 @@ class OddsBandAnalyzer: AND m.score_home IS NOT NULL AND m.score_away IS NOT NULL AND m.mst_utc < %(before_ts)s - AND os_h.odd_value BETWEEN %(band_low)s AND %(band_high)s + AND os_h.odd_value::numeric BETWEEN %(band_low)s AND %(band_high)s ORDER BY m.mst_utc DESC LIMIT %(max_lookback)s ) @@ -508,7 +508,7 @@ class OddsBandAnalyzer: f"İlk Yarı {line_str} Alt/Üst", f"Ilk Yari {line_str} Alt/Ust", ] - score_expr = "COALESCE(m.score_ht_home, 0) + COALESCE(m.score_ht_away, 0)" + score_expr = "COALESCE(m.ht_score_home, 0) + COALESCE(m.ht_score_away, 0)" else: cat_names = [ f"{line_str} Alt/Üst", @@ -535,7 +535,7 @@ class OddsBandAnalyzer: AND m.status = 'FT' AND m.score_home IS NOT NULL AND m.mst_utc < %(before_ts)s - AND os_over.odd_value BETWEEN %(band_low)s AND %(band_high)s + AND os_over.odd_value::numeric BETWEEN %(band_low)s AND %(band_high)s ORDER BY m.mst_utc DESC LIMIT %(max_lookback)s ) @@ -620,7 +620,7 @@ class OddsBandAnalyzer: AND m.status = 'FT' AND m.score_home IS NOT NULL AND m.mst_utc < %(before_ts)s - AND os_yes.odd_value BETWEEN %(band_low)s AND %(band_high)s + AND os_yes.odd_value::numeric BETWEEN %(band_low)s AND %(band_high)s ORDER BY m.mst_utc DESC LIMIT %(max_lookback)s ) @@ -696,7 +696,7 @@ class OddsBandAnalyzer: AND m.sport = 'football' AND m.status = 'FT' AND m.score_home IS NOT NULL AND m.mst_utc < %(before_ts)s - AND os_sel.odd_value BETWEEN %(bl)s AND %(bh)s + AND os_sel.odd_value::numeric BETWEEN %(bl)s AND %(bh)s ORDER BY m.mst_utc DESC LIMIT %(ml)s ) SELECT COUNT(*) AS ss, @@ -748,7 +748,7 @@ class OddsBandAnalyzer: try: cur.execute(""" WITH ht_matches AS ( - SELECT m.score_ht_home, m.score_ht_away, + SELECT m.ht_score_home, m.ht_score_away, m.home_team_id, m.away_team_id FROM matches m JOIN odd_categories oc ON oc.match_id = m.id @@ -761,18 +761,18 @@ class OddsBandAnalyzer: AND os2.name = '2' AND m.away_team_id = %(tid)s WHERE (m.home_team_id = %(tid)s OR m.away_team_id = %(tid)s) AND m.sport = 'football' AND m.status = 'FT' - AND m.score_ht_home IS NOT NULL + AND m.ht_score_home IS NOT NULL AND m.mst_utc < %(before_ts)s - AND COALESCE(os1.odd_value, os2.odd_value) + AND COALESCE(os1.odd_value::numeric, os2.odd_value::numeric) BETWEEN %(bl)s AND %(bh)s ORDER BY m.mst_utc DESC LIMIT %(ml)s ) SELECT COUNT(*) AS ss, COALESCE(AVG(CASE - WHEN (home_team_id = %(tid)s AND score_ht_home > score_ht_away) - OR (away_team_id = %(tid)s AND score_ht_away > score_ht_home) + WHEN (home_team_id = %(tid)s AND ht_score_home > ht_score_away) + OR (away_team_id = %(tid)s AND ht_score_away > ht_score_home) THEN 1.0 ELSE 0.0 END), 0.33) AS win_rate, - COALESCE(AVG(CASE WHEN score_ht_home = score_ht_away + COALESCE(AVG(CASE WHEN ht_score_home = ht_score_away THEN 1.0 ELSE 0.0 END), 0.40) AS draw_rate FROM ht_matches """, { @@ -824,7 +824,7 @@ class OddsBandAnalyzer: AND m.sport = 'football' AND m.status = 'FT' AND m.score_home IS NOT NULL AND m.mst_utc < %(before_ts)s - AND os_odd.odd_value BETWEEN %(bl)s AND %(bh)s + AND os_odd.odd_value::numeric BETWEEN %(bl)s AND %(bh)s ORDER BY m.mst_utc DESC LIMIT %(ml)s ) SELECT COUNT(*) AS ss, @@ -1185,7 +1185,7 @@ class OddsBandAnalyzer: 'IY/MS' ) JOIN odd_selections os ON os.odd_category_db_id = oc.db_id - AND os.odd_value BETWEEN %(bl)s AND %(bh)s + AND os.odd_value::numeric BETWEEN %(bl)s AND %(bh)s WHERE m.sport = 'football' AND m.status = 'FT' AND m.score_home IS NOT NULL diff --git a/ai-engine/main.py b/ai-engine/main.py index c643355..0d19682 100755 --- a/ai-engine/main.py +++ b/ai-engine/main.py @@ -14,10 +14,13 @@ from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse from pydantic import BaseModel -from models.basketball_v25 import get_basketball_v25_predictor +try: + from models.basketball_v25 import get_basketball_v25_predictor + HAS_BASKETBALL = True +except ImportError: + HAS_BASKETBALL = False from services.single_match_orchestrator import get_single_match_orchestrator from services.v26_shadow_engine import get_v26_shadow_engine -from data.database import dispose_engine load_dotenv() @@ -49,9 +52,6 @@ async def lifespan(_: FastAPI): yield - # Cleanup async DB connections on shutdown - await dispose_engine() - app = FastAPI( title="Suggest-Bet AI Engine", @@ -123,9 +123,15 @@ def health_check() -> dict[str, Any]: try: orchestrator = get_single_match_orchestrator() shadow_engine = get_v26_shadow_engine() - basketball_predictor = get_basketball_v25_predictor() - basketball_readiness = basketball_predictor.readiness_summary() - ready = bool(basketball_readiness["fully_loaded"]) + + if HAS_BASKETBALL: + basketball_predictor = get_basketball_v25_predictor() + basketball_readiness = basketball_predictor.readiness_summary() + ready = bool(basketball_readiness.get("fully_loaded", True)) + else: + basketball_readiness = {"fully_loaded": False, "error": "Basketball module not found"} + ready = True + return { "status": "healthy" if ready else "degraded", "engine": "v28.main", diff --git a/ai-engine/models/calibration.py b/ai-engine/models/calibration.py new file mode 100644 index 0000000..cc5ff15 --- /dev/null +++ b/ai-engine/models/calibration.py @@ -0,0 +1,413 @@ +""" +Calibration Module for XGBoost Models +===================================== +Calibrates raw probabilities from XGBoost models using Isotonic Regression. +Ensures that a predicted probability of 70% actually corresponds to a 70% win rate. + +Usage: + from ai_engine.models.calibration import Calibrator + calibrator = Calibrator() + calibrated_prob = calibrator.calibrate("ms", raw_prob) + + # Training new calibration models: + calibrator.train_calibration(valid_df, market="ms") +""" + +import os +import pickle +import json +import numpy as np +import pandas as pd +from datetime import datetime +from typing import Dict, List, Optional, Tuple, Any +from sklearn.isotonic import IsotonicRegression +from sklearn.calibration import calibration_curve +from sklearn.metrics import brier_score_loss + +AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +CALIBRATION_DIR = os.path.join(AI_ENGINE_DIR, "models", "calibration") + +os.makedirs(CALIBRATION_DIR, exist_ok=True) + +# Supported markets for calibration +SUPPORTED_MARKETS = [ + "ms", # Match Result (1X2) - multi-class, calibrated per class + "ms_home", # Standard Home win probability + "ms_home_heavy_fav", # Context: home odds <= 1.40 + "ms_home_fav", # Context: 1.40 < home odds <= 1.80 + "ms_home_balanced", # Context: 1.80 < home odds <= 2.50 + "ms_home_underdog", # Context: home odds > 2.50 + "ms_draw", # Draw probability + "ms_away", # Away win probability + "ou15", # Over/Under 1.5 + "ou25", # Over/Under 2.5 + "ou35", # Over/Under 3.5 + "btts", # Both Teams to Score + "ht_ft", # Half-Time/Full-Time + "dc", # Double Chance + "ht", # Half-Time Result +] + + +class CalibrationMetrics: + """Stores calibration quality metrics for a market.""" + + def __init__(self): + self.brier_score: float = 0.0 + self.calibration_error: float = 0.0 + self.sample_count: int = 0 + self.last_trained: str = "" + self.mean_predicted: float = 0.0 + self.mean_actual: float = 0.0 + + def to_dict(self) -> Dict: + return { + "brier_score": round(self.brier_score, 4), + "calibration_error": round(self.calibration_error, 4), + "sample_count": self.sample_count, + "last_trained": self.last_trained, + "mean_predicted": round(self.mean_predicted, 4), + "mean_actual": round(self.mean_actual, 4), + } + + +class Calibrator: + """ + Probability calibration using Isotonic Regression. + + Isotonic Regression is a non-parametric method that fits a piecewise + constant function that is monotonically increasing. It's ideal for + calibrating probabilities because: + + 1. It preserves ranking (if P(A) > P(B) before, P(A) > P(B) after) + 2. It doesn't assume a specific distribution shape + 3. It can correct systematic over/under-confidence + + Example: + # Before calibration: model predicts 70% but actual win rate is 60% + # After calibration: model predicts 70% → calibrated to 60% + """ + + def __init__(self): + self.calibrators: Dict[str, IsotonicRegression] = {} + self.metrics: Dict[str, CalibrationMetrics] = {} + self.heuristic_fallback: Dict[str, float] = { + "ms": 0.90, + "ms_home": 0.90, + "ms_home_heavy_fav": 0.95, + "ms_home_fav": 0.90, + "ms_home_balanced": 0.85, + "ms_home_underdog": 0.80, + "ms_draw": 0.90, + "ms_away": 0.90, + "ou15": 0.90, + "ou25": 0.90, + "ou35": 0.90, + "btts": 0.90, + "ht_ft": 0.85, + "dc": 0.93, + "ht": 0.85, + } + self._load_calibrators() + + def _load_calibrators(self): + """Load trained calibrators for each market from disk.""" + for market in SUPPORTED_MARKETS: + model_path = os.path.join(CALIBRATION_DIR, f"{market}_calibrator.pkl") + metrics_path = os.path.join(CALIBRATION_DIR, f"{market}_metrics.json") + + if os.path.exists(model_path): + try: + with open(model_path, "rb") as f: + self.calibrators[market] = pickle.load(f) + print(f"[Calibrator] Loaded calibration model for {market}") + except Exception as e: + print(f"[Calibrator] Warning: Failed to load {market}: {e}") + + if os.path.exists(metrics_path): + try: + with open(metrics_path, "r") as f: + data = json.load(f) + metrics = CalibrationMetrics() + metrics.brier_score = data.get("brier_score", 0.0) + metrics.calibration_error = data.get("calibration_error", 0.0) + metrics.sample_count = data.get("sample_count", 0) + metrics.last_trained = data.get("last_trained", "") + metrics.mean_predicted = data.get("mean_predicted", 0.0) + metrics.mean_actual = data.get("mean_actual", 0.0) + self.metrics[market] = metrics + except Exception as e: + print(f"[Calibrator] Warning: Failed to load metrics for {market}: {e}") + + def calibrate(self, market_type: str, raw_prob: float, odds_val: Optional[float] = None) -> float: + """ + Calibrate a raw probability using Isotonic Regression. + + Args: + market_type (str): 'ms_home', 'ou25', 'btts', 'ht_ft', etc. + raw_prob (float): The raw probability from XGBoost (0.0 - 1.0) + odds_val (float, optional): The pre-match odds, used for context-aware bucket mapping + + Returns: + float: Calibrated probability (0.0 - 1.0) + """ + # Normalize market type + market_key = market_type.lower().replace("-", "_") + + # Route to bucket if ms_home and odds provided + if market_key == "ms_home" and odds_val is not None and odds_val > 1.0: + if odds_val <= 1.40: + bucket_key = "ms_home_heavy_fav" + elif odds_val <= 1.80: + bucket_key = "ms_home_fav" + elif odds_val <= 2.50: + bucket_key = "ms_home_balanced" + else: + bucket_key = "ms_home_underdog" + + if bucket_key in self.calibrators: + market_key = bucket_key + + # If we have a trained Isotonic Regression model, use it + if market_key in self.calibrators: + try: + calibrated = self.calibrators[market_key].predict([raw_prob])[0] + # Ensure output is valid probability + return float(np.clip(calibrated, 0.01, 0.99)) + except Exception as e: + print(f"[Calibrator] Warning: Isotonic failed for {market_key}: {e}") + # Fall through to heuristic + + # Fallback to heuristic calibration + return self._heuristic_calibrate(market_key, raw_prob) + + def _heuristic_calibrate(self, market_type: str, raw_prob: float) -> float: + """ + Heuristic calibration fallback when no trained model exists. + + This applies a conservative shrinkage towards the mean: + - Binary markets (OU, BTTS): shrink towards 0.5 + - Multi-class (MS): shrink towards 0.33 + - HT/FT: stronger shrinkage due to higher variance + """ + # Get shrinkage factor for this market + shrinkage = self.heuristic_fallback.get(market_type, 0.90) + + if market_type in ["ms", "ms_home", "ms_home_heavy_fav", "ms_home_fav", "ms_home_balanced", "ms_home_underdog", "ms_draw", "ms_away"]: + # Pull towards 0.33 (uniform for 3-class) + return (raw_prob * shrinkage) + (0.33 * (1.0 - shrinkage)) + + elif market_type in ["ou15", "ou25", "ou35", "btts"]: + # Pull towards 0.5 (uniform for binary) + return (raw_prob * shrinkage) + (0.5 * (1.0 - shrinkage)) + + elif market_type in ["ht_ft", "ht"]: + # Stronger shrinkage for high-variance markets + return raw_prob * shrinkage + + elif market_type == "dc": + # Double chance is more reliable + return (raw_prob * shrinkage) + (0.66 * (1.0 - shrinkage)) + + return raw_prob + + def train_calibration( + self, + df: pd.DataFrame, + market: str, + prob_col: str, + actual_col: str, + min_samples: int = 100, + save: bool = True, + ) -> CalibrationMetrics: + """ + Train an Isotonic Regression calibration model for a specific market. + + Args: + df: DataFrame with predictions and actual outcomes + market: Market identifier (e.g., 'ms_home', 'ou25', 'btts') + prob_col: Column name for raw probabilities + actual_col: Column name for actual outcomes (0 or 1) + min_samples: Minimum samples required to train + save: Whether to save the model to disk + + Returns: + CalibrationMetrics with quality metrics + """ + # Filter valid data + valid_df = df[[prob_col, actual_col]].dropna() + n_samples = len(valid_df) + + if n_samples < min_samples: + print(f"[Calibrator] Warning: Only {n_samples} samples for {market}, " + f"need at least {min_samples}") + metrics = CalibrationMetrics() + metrics.sample_count = n_samples + return metrics + + # Extract arrays + raw_probs = valid_df[prob_col].values + actuals = valid_df[actual_col].values + + # Train Isotonic Regression + iso = IsotonicRegression(out_of_bounds="clip", increasing=True) + iso.fit(raw_probs, actuals) + + # Calculate calibrated probabilities + calibrated_probs = iso.predict(raw_probs) + + # Calculate metrics + metrics = CalibrationMetrics() + metrics.sample_count = n_samples + metrics.last_trained = datetime.utcnow().isoformat() + metrics.brier_score = brier_score_loss(actuals, calibrated_probs) + metrics.mean_predicted = np.mean(raw_probs) + metrics.mean_actual = np.mean(actuals) + + # Calculate Expected Calibration Error (ECE) + metrics.calibration_error = self._calculate_ece( + calibrated_probs, actuals, n_bins=10 + ) + + # Store in memory + self.calibrators[market] = iso + self.metrics[market] = metrics + + # Save to disk + if save: + self._save_calibration(market, iso, metrics) + + print(f"[Calibrator] Trained {market}: " + f"Brier={metrics.brier_score:.4f}, " + f"ECE={metrics.calibration_error:.4f}, " + f"n={n_samples}") + + return metrics + + def train_all_markets( + self, + df: pd.DataFrame, + market_config: Dict[str, Tuple[str, str]], + min_samples: int = 100, + ) -> Dict[str, CalibrationMetrics]: + """ + Train calibration models for multiple markets at once. + + Args: + df: DataFrame with all predictions and outcomes + market_config: Dict mapping market -> (prob_col, actual_col) + e.g., {'ou25': ('ou25_over_prob', 'ou25_over_actual')} + min_samples: Minimum samples per market + + Returns: + Dict of market -> CalibrationMetrics + """ + results = {} + + for market, (prob_col, actual_col) in market_config.items(): + print(f"\n[Calibrator] Training {market}...") + try: + metrics = self.train_calibration( + df=df, + market=market, + prob_col=prob_col, + actual_col=actual_col, + min_samples=min_samples, + save=True, + ) + results[market] = metrics + except Exception as e: + print(f"[Calibrator] Failed to train {market}: {e}") + + return results + + def _calculate_ece( + self, + probs: np.ndarray, + actuals: np.ndarray, + n_bins: int = 10 + ) -> float: + """ + Calculate Expected Calibration Error (ECE). + + ECE = sum(|bin_accuracy - bin_confidence| * bin_weight) + + Lower is better. Perfect calibration = 0. + """ + bin_boundaries = np.linspace(0, 1, n_bins + 1) + ece = 0.0 + + for i in range(n_bins): + in_bin = (probs >= bin_boundaries[i]) & (probs < bin_boundaries[i + 1]) + prop_in_bin = np.mean(in_bin) + + if prop_in_bin > 0: + accuracy_in_bin = np.mean(actuals[in_bin]) + avg_confidence_in_bin = np.mean(probs[in_bin]) + ece += np.abs(accuracy_in_bin - avg_confidence_in_bin) * prop_in_bin + + return ece + + def _save_calibration( + self, + market: str, + calibrator: IsotonicRegression, + metrics: CalibrationMetrics + ): + """Save calibration model and metrics to disk.""" + # Save model + model_path = os.path.join(CALIBRATION_DIR, f"{market}_calibrator.pkl") + with open(model_path, "wb") as f: + pickle.dump(calibrator, f) + + # Save metrics + metrics_path = os.path.join(CALIBRATION_DIR, f"{market}_metrics.json") + with open(metrics_path, "w") as f: + json.dump(metrics.to_dict(), f, indent=2) + + print(f"[Calibrator] Saved {market} to {CALIBRATION_DIR}") + + def get_calibration_report(self) -> Dict[str, Any]: + """Generate a summary report of all calibration models.""" + report = { + "trained_markets": list(self.calibrators.keys()), + "metrics": {}, + "heuristic_only": [], + } + + for market in SUPPORTED_MARKETS: + if market in self.metrics: + report["metrics"][market] = self.metrics[market].to_dict() + elif market not in self.calibrators: + report["heuristic_only"].append(market) + + return report + + def get_calibrated_probabilities( + self, + market: str, + raw_probs: np.ndarray + ) -> np.ndarray: + """ + Batch calibration for array of probabilities. + + Args: + market: Market type + raw_probs: Array of raw probabilities + + Returns: + Array of calibrated probabilities + """ + return np.array([self.calibrate(market, p) for p in raw_probs]) + + +# Singleton instance +_calibrator_instance: Optional[Calibrator] = None + + +def get_calibrator() -> Calibrator: + """Get or create the global Calibrator instance.""" + global _calibrator_instance + if _calibrator_instance is None: + _calibrator_instance = Calibrator() + return _calibrator_instance diff --git a/ai-engine/models/v20_ensemble.py b/ai-engine/models/v20_ensemble.py new file mode 100644 index 0000000..b890fc4 --- /dev/null +++ b/ai-engine/models/v20_ensemble.py @@ -0,0 +1,1282 @@ +""" +V20 Ensemble Beast - Main Predictor +Combines 4 prediction engines with surprise detection. + +This is the primary interface for V20 predictions. +""" + +import os +import sys +import math +import json +import pickle +import time +import psycopg2 +import pandas as pd +from typing import Dict, List, Optional, Tuple, Any +from dataclasses import dataclass, field + +# Add paths +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from core.engines.team_predictor import get_team_predictor +from core.engines.player_predictor import get_player_predictor +from core.engines.odds_predictor import get_odds_predictor +from core.engines.referee_predictor import get_referee_predictor +from features.upset_engine import get_upset_engine +from features.upset_engine_v2 import get_upset_engine_v2 +from features.feature_adapter import get_feature_adapter +from utils.top_leagues import load_top_league_ids +from data.db import get_clean_dsn +import xgboost as xgb +from models.calibration import Calibrator + +# New Config & Calculators +from config.config_loader import get_config +from core.calculators.base_calculator import CalculationContext +from core.calculators.match_result_calculator import MatchResultCalculator +from core.calculators.over_under_calculator import OverUnderCalculator +from core.calculators.half_time_calculator import HalfTimeCalculator +from core.calculators.score_calculator import ScoreCalculator, ScorePrediction +from core.calculators.other_markets_calculator import OtherMarketsCalculator +from core.calculators.risk_assessor import RiskAssessor +from core.calculators.bet_recommender import BetRecommender + + +class _BoosterModelAdapter: + """Adapter to provide predict_proba interface for raw xgboost.Booster models.""" + + def __init__(self, booster: xgb.Booster): + self._booster = booster + + def predict_proba(self, features: pd.DataFrame): + dmat = xgb.DMatrix(features) + preds = self._booster.predict(dmat) + if len(preds.shape) == 1: + # binary: return [P(class0), P(class1)] + return [[float(1.0 - p), float(p)] for p in preds] + # multiclass: already (n, k) + return preds + + +@dataclass +class MarketPrediction: + """Prediction for a single betting market.""" + market_type: str + pick: str + probability: float + confidence: float + odds: float = 0.0 + is_recommended: bool = False + is_value_bet: bool = False + edge: float = 0.0 # Expected edge over market + + def to_dict(self) -> dict: + return { + "market_type": self.market_type, + "pick": self.pick, + "probability": round(self.probability * 100, 1), + "confidence": round(self.confidence, 1), + "odds": self.odds, + "is_recommended": self.is_recommended, + "is_value_bet": self.is_value_bet, + "edge": round(self.edge, 1) + } + + +@dataclass +class FullMatchPrediction: + """Complete prediction for a match with ALL markets.""" + match_id: str + home_team: str + away_team: str + match_date: str = "" + + # === MAÇ SONUCU (1X2) === + ms_home_prob: float = 0.33 + ms_draw_prob: float = 0.33 + ms_away_prob: float = 0.33 + ms_pick: str = "" + ms_confidence: float = 0.0 + + # === ÇİFTE ŞANS === + dc_1x_prob: float = 0.66 + dc_x2_prob: float = 0.66 + dc_12_prob: float = 0.66 + dc_pick: str = "" + dc_confidence: float = 0.0 + + # === ALT/ÜST GOLLER === + # 1.5 + over_15_prob: float = 0.70 + under_15_prob: float = 0.30 + ou15_pick: str = "" + ou15_confidence: float = 0.0 + + # 2.5 + over_25_prob: float = 0.50 + under_25_prob: float = 0.50 + ou25_pick: str = "" + ou25_confidence: float = 0.0 + + # 3.5 + over_35_prob: float = 0.30 + under_35_prob: float = 0.70 + ou35_pick: str = "" + ou35_confidence: float = 0.0 + + # === KARŞILIKLI GOL (BTTS) === + btts_yes_prob: float = 0.50 + btts_no_prob: float = 0.50 + btts_pick: str = "" + btts_confidence: float = 0.0 + + # === İLK YARI SONUCU === + ht_home_prob: float = 0.30 + ht_draw_prob: float = 0.40 + ht_away_prob: float = 0.30 + ht_pick: str = "" + ht_confidence: float = 0.0 + + # === SKOR TAHMİNLERİ === + score: ScorePrediction = None + predicted_ft_score: str = "1-1" + predicted_ht_score: str = "0-0" + ft_scores_top5: List[Dict] = field(default_factory=list) + + # === xG (Expected Goals) === + home_xg: float = 1.3 + away_xg: float = 1.1 + total_xg: float = 2.4 + + # === RISK DEĞERLENDİRMESİ === + risk_level: str = "MEDIUM" # LOW, MEDIUM, HIGH, EXTREME + risk_score: float = 0.0 + is_surprise_risk: bool = False + surprise_type: str = "" + risk_warnings: List[str] = field(default_factory=list) + ht_ft_probs: Dict[str, float] = field(default_factory=dict) + + # === GLM-5 SÜRPRİZ SKORU === + upset_score: int = 0 # 0-100 arası sürpriz skoru + upset_level: str = "LOW" # LOW, MEDIUM, HIGH, EXTREME + upset_reasons: List[str] = field(default_factory=list) + + # === ENGINE KATKILARI === + team_confidence: float = 0.0 + player_confidence: float = 0.0 + odds_confidence: float = 0.0 + referee_confidence: float = 0.0 + + # === KORNER & KART & DİĞER === + total_corners_pred: float = 9.5 + corner_pick: str = "9.5 Üst" + + total_cards_pred: float = 4.5 + card_pick: str = "4.5 Alt" + cards_over_prob: float = 0.50 + cards_under_prob: float = 0.50 + cards_confidence: float = 0.0 + + handicap_pick: str = "" + handicap_home_prob: float = 0.33 + handicap_draw_prob: float = 0.34 + handicap_away_prob: float = 0.33 + handicap_confidence: float = 0.0 + + ht_over_05_prob: float = 0.65 + ht_under_05_prob: float = 0.35 + ht_over_15_prob: float = 0.30 + ht_under_15_prob: float = 0.70 + ht_ou_pick: str = "İY 0.5 Üst" + ht_ou15_pick: str = "İY 1.5 Alt" + + odd_even_pick: str = "Çift" + odd_prob: float = 0.50 # Tek olasılığı + even_prob: float = 0.50 # Çift olasılığı + + # === TAVSİYELER (RECOMMENDATIONS) === + best_bet: Optional[MarketPrediction] = None + recommended_bets: List[MarketPrediction] = field(default_factory=list) + alternative_bet: Optional[MarketPrediction] = None + expert_recommendation: Dict[str, Any] = field(default_factory=dict) + + # === DETAILED ANALYSIS === + analysis_details: Dict[str, Any] = field(default_factory=dict) + + def to_dict(self) -> dict: + return { + "match_info": { + "match_id": self.match_id, + "home_team": self.home_team, + "away_team": self.away_team, + "match_date": self.match_date + }, + "predictions": { + "match_result": { + "1": round(self.ms_home_prob * 100, 1), + "X": round(self.ms_draw_prob * 100, 1), + "2": round(self.ms_away_prob * 100, 1), + "pick": self.ms_pick, + "confidence": round(self.ms_confidence, 1) + }, + "double_chance": { + "1X": round(self.dc_1x_prob * 100, 1), + "X2": round(self.dc_x2_prob * 100, 1), + "12": round(self.dc_12_prob * 100, 1), + "pick": self.dc_pick, + "confidence": round(self.dc_confidence, 1) + }, + "over_under": { + "1.5": { + "over": round(self.over_15_prob * 100, 1), + "under": round(self.under_15_prob * 100, 1), + "pick": self.ou15_pick, + "confidence": round(self.ou15_confidence, 1) + }, + "2.5": { + "over": round(self.over_25_prob * 100, 1), + "under": round(self.under_25_prob * 100, 1), + "pick": self.ou25_pick, + "confidence": round(self.ou25_confidence, 1) + }, + "3.5": { + "over": round(self.over_35_prob * 100, 1), + "under": round(self.under_35_prob * 100, 1), + "pick": self.ou35_pick, + "confidence": round(self.ou35_confidence, 1) + } + }, + "btts": { + "yes": round(self.btts_yes_prob * 100, 1), + "no": round(self.btts_no_prob * 100, 1), + "pick": self.btts_pick, + "confidence": round(self.btts_confidence, 1) + }, + "first_half": { + "1": round(self.ht_home_prob * 100, 1), + "X": round(self.ht_draw_prob * 100, 1), + "2": round(self.ht_away_prob * 100, 1), + "pick": self.ht_pick, + "confidence": round(self.ht_confidence, 1), + "over_under_05": { + "over": round(self.ht_over_05_prob * 100, 1), + "under": round(self.ht_under_05_prob * 100, 1), + "pick": self.ht_ou_pick + }, + "over_under_15": { + "over": round(self.ht_over_15_prob * 100, 1), + "under": round(self.ht_under_15_prob * 100, 1), + "pick": self.ht_ou15_pick + } + }, + "scores": { + "predicted_ft": self.predicted_ft_score, + "predicted_ht": self.predicted_ht_score, + "top_5_ft_scores": self.ft_scores_top5 + }, + "others": { + "handicap": { + "pick": self.handicap_pick, + "confidence": round(self.handicap_confidence, 1), + "home": round(self.handicap_home_prob * 100, 1), + "draw": round(self.handicap_draw_prob * 100, 1), + "away": round(self.handicap_away_prob * 100, 1) + }, + "corners": { + "total": round(self.total_corners_pred, 1), + "pick": self.corner_pick + }, + "cards": { + "total": round(self.total_cards_pred, 1), + "pick": self.card_pick, + "confidence": round(self.cards_confidence, 1), + "over": round(self.cards_over_prob * 100, 1), + "under": round(self.cards_under_prob * 100, 1) + }, + "odd_even": { + "pick": self.odd_even_pick, + "tek": round(self.odd_prob * 100, 1), + "cift": round(self.even_prob * 100, 1) + } + }, + "xg": { + "home": round(self.home_xg, 2), + "away": round(self.away_xg, 2), + "total": round(self.total_xg, 2) + } + }, + "risk": { + "level": self.risk_level, + "score": round(self.risk_score, 1), + "is_surprise_risk": self.is_surprise_risk, + "surprise_type": self.surprise_type, + "ht_ft_probs": {k: round(v * 100, 1) for k, v in self.ht_ft_probs.items()} if self.ht_ft_probs else {}, + "warnings": self.risk_warnings + }, + "upset_analysis": { + "score": self.upset_score, + "level": self.upset_level, + "reasons": self.upset_reasons + }, + "engine_breakdown": { + "team_engine": round(self.team_confidence, 1), + "player_engine": round(self.player_confidence, 1), + "odds_engine": round(self.odds_confidence, 1), + "referee_engine": round(self.referee_confidence, 1) + }, + "recommendations": { + "best_bet": self.best_bet.to_dict() if self.best_bet else None, + "all_recommended": [b.to_dict() for b in self.recommended_bets] if self.recommended_bets else [], + "alternative_bet": self.alternative_bet.to_dict() if self.alternative_bet else None + }, + "analysis_details": self.analysis_details + } + + +class V20EnsemblePredictor: + HTFT_LABELS = ("1/1", "1/X", "1/2", "X/1", "X/X", "X/2", "2/1", "2/X", "2/2") + # Neutral defaults when MS odds are missing: avoid synthetic home-favorite bias. + DEFAULT_MS_H = 2.65 + DEFAULT_MS_D = 3.20 + DEFAULT_MS_A = 2.65 + FOOTBALL_TOP_PRIOR = ( + 0.263760, + 0.051786, + 0.022942, + 0.150168, + 0.157798, + 0.106064, + 0.027622, + 0.051226, + 0.168634, + ) + FOOTBALL_NON_TOP_PRIOR = ( + 0.265113, + 0.048306, + 0.020399, + 0.147020, + 0.152383, + 0.113075, + 0.026542, + 0.046356, + 0.180805, + ) + # Top-league football priors conditioned on favorite side from MS (1X2) odds. + # Label order follows HTFT_LABELS. + FOOTBALL_TOP_PRIOR_HOME_FAV = ( + 0.321707, + 0.054165, + 0.017952, + 0.179729, + 0.161674, + 0.078991, + 0.031186, + 0.047394, + 0.107201, + ) + FOOTBALL_TOP_PRIOR_AWAY_FAV = ( + 0.130654, + 0.049139, + 0.033754, + 0.081975, + 0.156142, + 0.167164, + 0.020207, + 0.058324, + 0.302641, + ) + FOOTBALL_TOP_PRIOR_BALANCED = ( + 0.169429, + 0.052486, + 0.028545, + 0.144567, + 0.209024, + 0.116943, + 0.026703, + 0.053407, + 0.198895, + ) + + def __init__(self): + print("🚀 Initializing V20 Ensemble Beast...") + self.config = get_config() + + # Engines + self.team_engine = get_team_predictor() + self.player_engine = get_player_predictor() + self.odds_engine = get_odds_predictor() + self.referee_engine = get_referee_predictor() + self.upset_engine = get_upset_engine() + self.upset_engine_v2 = get_upset_engine_v2() # GLM-5 enhanced + + # Calculators + print("⚙️ Loading market calculators...") + self.match_result_calc = MatchResultCalculator(self.config) + self.over_under_calc = OverUnderCalculator(self.config) + self.half_time_calc = HalfTimeCalculator(self.config) + self.score_calc = ScoreCalculator(self.config) + print(" ✅ Score Calculator (XGBoost FT+HT) loaded") + self.other_markets_calc = OtherMarketsCalculator(self.config) + self.risk_assessor = RiskAssessor(self.config) + self.bet_recommender = BetRecommender(self.config) + + # Expert Recommender (New Logic) + from core.calculators.expert_recommender import ExpertRecommender + self.expert_recommender = ExpertRecommender(self.config) + + # XGBoost Integration + print("🤖 Loading XGBoost models...") + self.feature_adapter = get_feature_adapter() + self.calibrator = Calibrator() + self.xgb_models = {} + self.top_league_ids = load_top_league_ids() + print(f"📋 Loaded {len(self.top_league_ids)} top leagues for HT/FT tuning") + self.db_dsn = get_clean_dsn() + self.league_htft_prior_cache: Dict[Tuple[str, str], Optional[Tuple[float, ...]]] = {} + + xgb_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "models", "xgboost") + model_files = { + "ms": "xgb_ms", + "ou25": "xgb_ou25", + "btts": "xgb_btts", + "ht_ft": "xgb_ht_ft", + "ht_result": "xgb_ht_result", + "ht_ou05": "xgb_ht_ou05", + "ht_ou15": "xgb_ht_ou15", + "odd_even": "xgb_odd_even", + "ou15": "xgb_ou15", + "ou35": "xgb_ou35", + "handicap_ms": "xgb_handicap_ms", + "cards_ou45": "xgb_cards_ou45", + } + + only_keys = os.getenv("XGB_MODEL_KEYS", "").strip() + if only_keys: + selected_keys = {k.strip().lower() for k in only_keys.split(",") if k.strip()} + model_files = {k: v for k, v in model_files.items() if k in selected_keys} + if model_files: + print(f"ℹ️ XGB_MODEL_KEYS active -> loading only: {', '.join(sorted(model_files.keys()))}") + else: + print("⚠️ XGB_MODEL_KEYS set but no valid keys matched. Loading none.") + + for key, base_name in model_files.items(): + print(f" ⏳ Loading {key} from {base_name}.pkl/.json...", flush=True) + model, src, err = self._load_xgb_model(xgb_dir, base_name) + if model is not None: + self.xgb_models[key] = model + print(f" ✅ Loaded {key} ({src})") + elif err: + print(f" ⚠️ Failed to load {base_name}: {err}") + else: + print(f" ⚠️ Model not found: {base_name}.pkl or {base_name}.json") + + print("✅ V20 Ensemble Beast ready!") + + @staticmethod + def _load_xgb_model(xgb_dir: str, base_name: str): + pkl_path = os.path.join(xgb_dir, f"{base_name}.pkl") + json_path = os.path.join(xgb_dir, f"{base_name}.json") + + if os.path.exists(pkl_path): + started = time.perf_counter() + with open(pkl_path, "rb") as f: + model = pickle.load(f) + elapsed = time.perf_counter() - started + return model, f"pkl {elapsed:.2f}s", None + + if os.path.exists(json_path): + started = time.perf_counter() + # Preferred path: sklearn wrapper with predict_proba + try: + model = xgb.XGBClassifier() + model.load_model(json_path) + elapsed = time.perf_counter() - started + return model, f"json {elapsed:.2f}s", None + except Exception: + # Fallback: raw Booster + adapter + try: + booster = xgb.Booster() + booster.load_model(json_path) + model = _BoosterModelAdapter(booster) + elapsed = time.perf_counter() - started + return model, f"json/booster {elapsed:.2f}s", None + except Exception as e: + return None, "", e + + return None, "", None + + @staticmethod + def _safe_odd(value: Any) -> float: + try: + odd = float(value) + return odd if odd > 1.01 else 0.0 + except (TypeError, ValueError): + return 0.0 + + @staticmethod + def _align_features(features: pd.DataFrame, model) -> pd.DataFrame: + """Align DataFrame columns to the model's expected feature set. + + Supports: + - sklearn wrappers (XGBClassifier / LGBMClassifier) → feature_names_in_ + - raw xgboost.Booster → feature_names + - _BoosterModelAdapter → _booster.feature_names + + If the model doesn't expose feature names, returns the DataFrame as-is. + """ + expected: Optional[List[str]] = None + + # 1. sklearn wrapper (XGBClassifier, LGBMClassifier, CalibratedClassifierCV) + if hasattr(model, 'feature_names_in_'): + expected = list(model.feature_names_in_) + # 2. _BoosterModelAdapter + elif hasattr(model, '_booster') and hasattr(model._booster, 'feature_names'): + expected = model._booster.feature_names + # 3. raw xgboost.Booster + elif hasattr(model, 'feature_names') and model.feature_names: + expected = list(model.feature_names) + + if expected is None: + return features + + # Only keep columns that the model expects (order preserved) + available = [col for col in expected if col in features.columns] + if len(available) < len(expected): + missing = set(expected) - set(available) + print(f"⚠️ Feature alignment: {len(missing)} missing features filled with 0: {sorted(missing)[:5]}{'...' if len(missing) > 5 else ''}") + # Add missing columns with 0 (safe neutral default) + for col in expected: + if col not in features.columns: + features = features.copy() + features[col] = 0.0 + + return features[expected] + + def _favorite_profile_from_odds(self, odds_data: Dict[str, float]) -> Tuple[str, float]: + """ + Returns (favorite_side, gap_to_second_favorite). + favorite_side: H, A, D, or U (unknown) + """ + ms_h = self._safe_odd((odds_data or {}).get("ms_h")) + ms_d = self._safe_odd((odds_data or {}).get("ms_d")) + ms_a = self._safe_odd((odds_data or {}).get("ms_a")) + + candidates = [(side, odd) for side, odd in (("H", ms_h), ("D", ms_d), ("A", ms_a)) if odd > 0.0] + if len(candidates) < 2: + return "U", 0.0 + + candidates.sort(key=lambda item: item[1]) + favorite_side, favorite_odd = candidates[0] + second_odd = candidates[1][1] + return favorite_side, max(0.0, second_odd - favorite_odd) + + def _favorite_side_from_ms_odds( + self, + odds_data: Dict[str, float], + ) -> str: + """ + Returns side from MS home/away odds only: + - H: home favorite + - A: away favorite + - B: balanced (home and away near-equal) + - U: unknown + """ + ms_h = self._safe_odd((odds_data or {}).get("ms_h")) + ms_a = self._safe_odd((odds_data or {}).get("ms_a")) + if ms_h <= 0.0 or ms_a <= 0.0: + return "U" + + balance_gap = float(self.config.get("risk.htft_favorite_balance_gap", 0.20)) + if abs(ms_h - ms_a) <= balance_gap: + return "B" + return "H" if ms_h < ms_a else "A" + + def _get_top_odds_conditioned_prior( + self, + odds_data: Dict[str, float], + ) -> Optional[Tuple[float, ...]]: + side = self._favorite_side_from_ms_odds(odds_data) + if side == "H": + return self.FOOTBALL_TOP_PRIOR_HOME_FAV + if side == "A": + return self.FOOTBALL_TOP_PRIOR_AWAY_FAV + if side == "B": + return self.FOOTBALL_TOP_PRIOR_BALANCED + return None + + def _is_top_league(self, league_id: Optional[str]) -> bool: + if not league_id: + return False + return str(league_id) in self.top_league_ids + + def _get_htft_league_prior( + self, + league_id: Optional[str], + sport: str, + ) -> Optional[Tuple[float, ...]]: + sport_key = (sport or "").lower().strip() + if sport_key != "football" or not league_id: + return None + + cache_key = (sport_key, str(league_id)) + if cache_key in self.league_htft_prior_cache: + return self.league_htft_prior_cache[cache_key] + + min_samples = int(self.config.get("risk.htft_prior_min_matches", 300)) + combo_counts = {label: 0 for label in self.HTFT_LABELS} + try: + with psycopg2.connect(self.db_dsn) as conn: + with conn.cursor() as cur: + cur.execute( + """ + WITH base AS ( + SELECT + CASE WHEN ht_score_home > ht_score_away THEN '1' + WHEN ht_score_home = ht_score_away THEN 'X' + ELSE '2' END AS ht, + CASE WHEN score_home > score_away THEN '1' + WHEN score_home = score_away THEN 'X' + ELSE '2' END AS ft + FROM matches + WHERE status = 'FT' + AND sport = %s + AND league_id = %s + AND ht_score_home IS NOT NULL + AND ht_score_away IS NOT NULL + AND score_home IS NOT NULL + AND score_away IS NOT NULL + ) + SELECT ht || '/' || ft AS combo, COUNT(*)::bigint AS n + FROM base + GROUP BY combo + """, + (sport_key, str(league_id)), + ) + rows = cur.fetchall() + except Exception: + self.league_htft_prior_cache[cache_key] = None + return None + + total = 0 + for combo, n in rows: + if combo in combo_counts: + combo_counts[combo] = int(n) + total += int(n) + + if total < min_samples: + self.league_htft_prior_cache[cache_key] = None + return None + + prior = tuple(combo_counts[label] / total for label in self.HTFT_LABELS) + self.league_htft_prior_cache[cache_key] = prior + return prior + + def _postprocess_htft_probs( + self, + raw_probs: List[float], + odds_data: Optional[Dict[str, float]] = None, + sport: str = "football", + is_top_league: bool = False, + league_id: Optional[str] = None, + ) -> List[float]: + """ + Stabilize HT/FT class probabilities. + + Why: + - HT/FT reversals (1/2, 2/1) are rare and can be overestimated. + - We preserve ranking signal but make absolute probabilities conservative. + """ + probs = [max(1e-9, float(p)) for p in raw_probs[:9]] + if len(probs) != 9: + return [1.0 / 9.0] * 9 + + # Global calibration pass for HT/FT market. + probs = [self.calibrator.calibrate("ht_ft", p) for p in probs] + + sport_key = (sport or "football").lower().strip() + + # Temperature > 1.0 flattens over-confident distributions. + if sport_key == "basketball": + if is_top_league: + temperature = float( + self.config.get("risk.htft_temperature_basketball_top", self.config.get("risk.htft_temperature_basketball", 1.08)), + ) + else: + temperature = float( + self.config.get("risk.htft_temperature_basketball_non_top", 1.15), + ) + else: + if is_top_league: + temperature = float( + self.config.get("risk.htft_temperature_top", self.config.get("risk.htft_temperature", 1.25)), + ) + else: + temperature = float( + self.config.get("risk.htft_temperature_non_top", 1.35), + ) + if temperature > 1.0: + inv_t = 1.0 / temperature + probs = [p**inv_t for p in probs] + + # Extra damping for reversal classes: 1/2 (idx 2), 2/1 (idx 6). + if is_top_league: + base_reversal_multiplier = float( + self.config.get("risk.htft_reversal_multiplier_top", self.config.get("risk.htft_reversal_multiplier", 0.60)), + ) + favorite_reversal_multiplier = float( + self.config.get( + "risk.htft_reversal_multiplier_favorite_top", + self.config.get("risk.htft_reversal_multiplier_favorite", 0.72), + ), + ) + underdog_reversal_multiplier = float( + self.config.get( + "risk.htft_reversal_multiplier_underdog_top", + self.config.get("risk.htft_reversal_multiplier_underdog", 0.45), + ), + ) + basketball_reversal_multiplier = float( + self.config.get( + "risk.htft_reversal_multiplier_basketball_top", + self.config.get("risk.htft_reversal_multiplier_basketball", 0.90), + ), + ) + else: + base_reversal_multiplier = float(self.config.get("risk.htft_reversal_multiplier_non_top", 0.45)) + favorite_reversal_multiplier = float( + self.config.get("risk.htft_reversal_multiplier_favorite_non_top", 0.55), + ) + underdog_reversal_multiplier = float( + self.config.get("risk.htft_reversal_multiplier_underdog_non_top", 0.30), + ) + basketball_reversal_multiplier = float( + self.config.get("risk.htft_reversal_multiplier_basketball_non_top", 0.75), + ) + gap_medium = float(self.config.get("risk.htft_reversal_gap_medium", 0.50)) + gap_strong = float(self.config.get("risk.htft_reversal_gap_strong", 1.00)) + + favorite_side, favorite_gap = self._favorite_profile_from_odds(odds_data or {}) + + def _reversal_multiplier(winner_side: str) -> float: + if sport_key == "basketball": + return basketball_reversal_multiplier + + multiplier = base_reversal_multiplier + if favorite_side in ("H", "A"): + multiplier = ( + favorite_reversal_multiplier + if winner_side == favorite_side + else underdog_reversal_multiplier + ) + + # If market heavily favors one side, penalize underdog-reversal harder. + if winner_side != favorite_side and favorite_gap >= gap_strong: + multiplier *= 0.80 + elif winner_side != favorite_side and favorite_gap >= gap_medium: + multiplier *= 0.90 + + return max(0.20, min(1.10, multiplier)) + + # 1/2 => winner is Away, 2/1 => winner is Home + probs[2] *= _reversal_multiplier("A") + probs[6] *= _reversal_multiplier("H") + + # Prior blend for football (league-specific if sufficient sample size). + if sport_key == "football": + league_prior = self._get_htft_league_prior(league_id=league_id, sport=sport_key) + if league_prior is not None: + prior = league_prior + blend = float(self.config.get("risk.htft_prior_blend_league", 0.65)) + else: + prior = self.FOOTBALL_TOP_PRIOR if is_top_league else self.FOOTBALL_NON_TOP_PRIOR + blend = float( + self.config.get( + "risk.htft_prior_blend_top" if is_top_league else "risk.htft_prior_blend_non_top", + 0.50 if is_top_league else 0.58, + ), + ) + + if is_top_league: + side_prior = self._get_top_odds_conditioned_prior(odds_data or {}) + if side_prior is not None: + if league_prior is not None: + odds_prior_blend = float( + self.config.get("risk.htft_prior_odds_blend_top_with_league", 0.22), + ) + else: + odds_prior_blend = float( + self.config.get("risk.htft_prior_odds_blend_top", 0.35), + ) + odds_prior_blend = max(0.0, min(0.80, odds_prior_blend)) + prior = tuple( + ((1.0 - odds_prior_blend) * prior[idx]) + (odds_prior_blend * side_prior[idx]) + for idx in range(9) + ) + + blend = max(0.0, min(0.95, blend)) + probs = [((1.0 - blend) * p) + (blend * prior[idx]) for idx, p in enumerate(probs)] + + # Hard cap reversal classes by prior factor to avoid unrealistic spikes. + cap_factor = float(self.config.get("risk.htft_reversal_cap_factor", 2.3)) + cap_factor = max(1.0, cap_factor) + for idx in (2, 6): + cap_val = prior[idx] * cap_factor + if probs[idx] > cap_val: + probs[idx] = cap_val + + total = sum(probs) + if total <= 0: + return [1.0 / 9.0] * 9 + + return [p / total for p in probs] + + def predict(self, + match_id: str, + home_team_id: str, + away_team_id: str, + home_team_name: str, + away_team_name: str, + match_date_ms: int, + odds_data: Dict[str, float] = None, + home_lineup: List[str] = None, + away_lineup: List[str] = None, + referee_name: str = None, + home_goals_avg: float = 1.5, + home_conceded_avg: float = 1.2, + away_goals_avg: float = 1.2, + away_conceded_avg: float = 1.4, + home_position: int = 10, + away_position: int = 10, + league_name: str = "", + league_id: str = None, + sport: str = "football", + sidelined_data: Dict = None) -> FullMatchPrediction: + """ + Generate complete V20 ensemble prediction. + + Returns FullMatchPrediction with ALL markets. + """ + + # Default odds if not provided + if odds_data is None: + odds_data = { + "ms_h": self.DEFAULT_MS_H, + "ms_d": self.DEFAULT_MS_D, + "ms_a": self.DEFAULT_MS_A, + } + + # === 1. COLLECT ALL ENGINE PREDICTIONS === + + team_pred = self.team_engine.predict( + home_team_id=home_team_id, + away_team_id=away_team_id, + match_date_ms=match_date_ms, + home_team_name=home_team_name, + away_team_name=away_team_name + ) + + player_pred = self.player_engine.predict( + match_id=match_id, + home_team_id=home_team_id, + away_team_id=away_team_id, + home_lineup=home_lineup, + away_lineup=away_lineup, + sidelined_data=sidelined_data + ) + + odds_pred = self.odds_engine.predict( + odds_data=odds_data, + home_goals_avg=home_goals_avg, + home_conceded_avg=home_conceded_avg, + away_goals_avg=away_goals_avg, + away_conceded_avg=away_conceded_avg + ) + + referee_pred = self.referee_engine.predict( + match_id=match_id, + referee_name=referee_name, + league_id=league_id + ) + + upset_factors = self.upset_engine.calculate_upset_potential( + home_team_name=home_team_name, + home_team_id=home_team_id, + away_team_name=away_team_name, + league_name=league_name, + home_position=home_position, + away_position=away_position, + match_date_ms=match_date_ms + ) + + # GLM-5 Enhanced Upset Detection v2 + # Determine favorite from odds + favorite_side = "home" + favorite_odds = odds_data.get("ms_h", 2.0) if odds_data else 2.0 + if odds_data: + ms_h = odds_data.get("ms_h", 999) + ms_a = odds_data.get("ms_a", 999) + if ms_a < ms_h: + favorite_side = "away" + favorite_odds = ms_a + elif ms_h < ms_a: + favorite_side = "home" + favorite_odds = ms_h + else: + favorite_side = "draw" + favorite_odds = odds_data.get("ms_d", 3.0) + + upset_factors_v2 = self.upset_engine_v2.calculate_upset_potential( + home_team_name=home_team_name, + home_team_id=home_team_id, + away_team_name=away_team_name, + league_name=league_name, + home_position=home_position, + away_position=away_position, + match_date_ms=match_date_ms, + odds_data=odds_data, + referee_name=referee_name, + home_form_score=team_pred.home_form_score if hasattr(team_pred, 'home_form_score') else 50.0, + away_form_score=team_pred.away_form_score if hasattr(team_pred, 'away_form_score') else 50.0, + favorite_side=favorite_side, + favorite_odds=favorite_odds + ) + + # === 2. DYNAMIC ENGINE WEIGHTS === + w_team = self.config.get("engine_weights.team", 0.30) + w_player = self.config.get("engine_weights.player", 0.25) + w_odds = self.config.get("engine_weights.odds", 0.30) + w_referee = self.config.get("engine_weights.referee", 0.15) + + # Redistribution Logic + if not player_pred.lineup_available: + min_w = self.config.get("engine_weights.min_weight", 0.05) + surplus = w_player - min_w + w_player = min_w + w_team += surplus * self.config.get("weight_redistribution.player_missing_to_team", 0.5) + w_odds += surplus * self.config.get("weight_redistribution.player_missing_to_odds", 0.5) + + min_ref_matches = self.config.get("weight_redistribution.referee_min_matches", 5) + if referee_pred.matches_officiated < min_ref_matches: + min_w = self.config.get("engine_weights.min_weight", 0.05) + surplus = w_referee - min_w + w_referee = min_w + w_team += surplus * self.config.get("weight_redistribution.referee_missing_to_team", 0.4) + w_odds += surplus * self.config.get("weight_redistribution.referee_missing_to_odds", 0.6) + + # Normalize + w_total = w_team + w_player + w_odds + w_referee + weights = { + "team": w_team / w_total, + "player": w_player / w_total, + "odds": w_odds / w_total, + "referee": w_referee / w_total + } + + # Get Modifiers + player_mods = self.player_engine.get_1x2_modifier(player_pred) + referee_mods = self.referee_engine.get_modifiers(referee_pred) + + # Calculate xG (Used by multiple calculators) + home_xg = (team_pred.home_xg + odds_pred.poisson_home_xg) / 2 + away_xg = (team_pred.away_xg + odds_pred.poisson_away_xg) / 2 + + # === 3. CREATE CONTEXT === + ctx = CalculationContext( + team_pred=team_pred, + player_pred=player_pred, + odds_pred=odds_pred, + referee_pred=referee_pred, + upset_factors=upset_factors, + weights=weights, + player_mods=player_mods, + referee_mods=referee_mods, + match_id=match_id, + home_team_name=home_team_name, + away_team_name=away_team_name, + odds_data=odds_data, + home_xg=home_xg, + away_xg=away_xg, + total_xg=home_xg + away_xg, + league_id=league_id, + sport=(sport or "football").lower().strip(), + is_top_league=self._is_top_league(league_id), + ) + + # === 4. XGBOOST INFERENCE === + try: + # Prepare features (1 row DataFrame) + xgb_features = self.feature_adapter.get_features(ctx) + + # Predict — per-model feature alignment + for key, model in self.xgb_models.items(): + try: + model_features = self._align_features(xgb_features, model) + raw_pred = model.predict_proba(model_features) + except Exception as model_err: + print(f"⚠️ XGBoost {key} inference failed: {model_err}") + continue + + # Handle multi-class (MS, HT_RESULT, HT/FT) vs binary + if key in ("ms", "ht_result"): + # raw_pred is (1, 3) + probs = raw_pred[0] # [Home, Draw, Away] + ctx.xgboost_preds[key] = { + "home": float(probs[0]), + "draw": float(probs[1]), + "away": float(probs[2]) + } + elif key == "handicap_ms": + probs = raw_pred[0] # [H1, HX, H2] + ctx.xgboost_preds[key] = { + "h1": float(probs[0]), + "hx": float(probs[1]), + "h2": float(probs[2]) + } + elif key == "ht_ft": + # raw_pred is (1, 9) + raw_probs = [float(p) for p in raw_pred[0]] + probs = self._postprocess_htft_probs( + raw_probs, + odds_data=odds_data, + sport=sport, + is_top_league=ctx.is_top_league, + league_id=league_id, + ) + ctx.xgboost_preds[key] = { + label: float(probs[idx]) for idx, label in enumerate(self.HTFT_LABELS) + } + # Keep raw vector for optional calculators/debug consumers. + ctx.xgboost_preds["ht_ft_raw"] = raw_probs + else: + # Binary (OU/BTTS) - index 1 is the positive class probability + prob = float(raw_pred[0][1]) + ctx.xgboost_preds[key] = prob + + except Exception as e: + print(f"⚠️ XGBoost Inference Failed: {e}") + import traceback + traceback.print_exc() + + # === 5. RUN CALCULATORS === + ms_result = self.match_result_calc.calculate(ctx) + ou_result = self.over_under_calc.calculate(ctx) + ht_result = self.half_time_calc.calculate(ctx) + score_result = self.score_calc.calculate(ctx, ms_result) + other_result = self.other_markets_calc.calculate(ctx, ms_result) + risk_result = self.risk_assessor.calculate(ctx, ms_result) + + # Use Reconciled Result + final_ms = score_result.reconciled_ms if score_result.reconciled_ms else ms_result + + # Expert Recommendation (New Logic) + expert_result = self.expert_recommender.calculate(ctx, final_ms, ou_result, risk_result) + expert_data = {} + if expert_result: + expert_data = { + "main_pick": expert_result.main_pick, + "safe_alternative": expert_result.safe_alternative, + "value_picks": expert_result.value_picks, + "surprise_picks": expert_result.surprise_picks, + "market_summary": expert_result.market_summary + } + + # Update context with risk info for recommender + ctx.risk_level = risk_result.risk_level + ctx.is_surprise = risk_result.is_surprise_risk + + rec_result = self.bet_recommender.calculate(ctx, final_ms, ou_result, risk_result) + + # === 5. ASSEMBLE PREDICTION === + + # Map MarketPredictionDTO to internal MarketPrediction + def _map_dto(dto): + if not dto: return None + return MarketPrediction( + market_type=dto.market_type, + pick=dto.pick, + probability=dto.probability, + confidence=dto.confidence, + odds=dto.odds, + is_recommended=dto.is_recommended, + is_value_bet=dto.is_value_bet, + edge=dto.edge + ) + + best_bet = _map_dto(rec_result.best_bet) + alt_bet = _map_dto(rec_result.alternative_bet) + recommended = [_map_dto(r) for r in rec_result.recommended_bets] + + # Analysis Details + analysis_details = { + "home_form": f"Form Score: {round(0.5 + team_pred.form_advantage/2, 2)}", + "away_form": f"Form Score: {round(0.5 - team_pred.form_advantage/2, 2)}", + "key_players_missing": self._get_missing_desc(player_pred), + "referee_notes": f"{referee_name}: {round(referee_pred.avg_yellow_cards, 1)} Yellow Cards/Avg", + "market_trend": "Market data analyzed" + } + + return FullMatchPrediction( + match_id=match_id, + home_team=home_team_name, + away_team=away_team_name, + + # Match Result (Using Reconciled Final MS) + ms_home_prob=final_ms.ms_home_prob, + ms_draw_prob=final_ms.ms_draw_prob, + ms_away_prob=final_ms.ms_away_prob, + ms_pick=final_ms.ms_pick, + ms_confidence=final_ms.ms_confidence, + + # Double Chance (Using Reconciled Final MS) + dc_1x_prob=final_ms.dc_1x_prob, + dc_x2_prob=final_ms.dc_x2_prob, + dc_12_prob=final_ms.dc_12_prob, + dc_pick=final_ms.dc_pick, + dc_confidence=final_ms.dc_confidence, + + # Over/Under + over_15_prob=ou_result.over_15_prob, + under_15_prob=ou_result.under_15_prob, + ou15_pick=ou_result.ou15_pick, + ou15_confidence=ou_result.ou15_confidence, + + over_25_prob=ou_result.over_25_prob, + under_25_prob=ou_result.under_25_prob, + ou25_pick=ou_result.ou25_pick, + ou25_confidence=ou_result.ou25_confidence, + + over_35_prob=ou_result.over_35_prob, + under_35_prob=ou_result.under_35_prob, + ou35_pick=ou_result.ou35_pick, + ou35_confidence=ou_result.ou35_confidence, + + # BTTS + btts_yes_prob=ou_result.btts_yes_prob, + btts_no_prob=ou_result.btts_no_prob, + btts_pick=ou_result.btts_pick, + btts_confidence=ou_result.btts_confidence, + + # Half Time + ht_home_prob=ht_result.ht_home_prob, + ht_draw_prob=ht_result.ht_draw_prob, + ht_away_prob=ht_result.ht_away_prob, + ht_pick=ht_result.ht_pick, + ht_confidence=ht_result.ht_confidence, + + # Score + score=score_result, + + # HT O/U + ht_over_05_prob=ht_result.ht_over_05_prob, + ht_under_05_prob=ht_result.ht_under_05_prob, + ht_over_15_prob=ht_result.ht_over_15_prob, + ht_under_15_prob=ht_result.ht_under_15_prob, + ht_ou_pick=ht_result.ht_ou_pick, + ht_ou15_pick=ht_result.ht_ou15_pick, + + # Scores (Reconciled check usually happens in ScoreCalc) + predicted_ft_score=score_result.predicted_ft_score, + predicted_ht_score=score_result.predicted_ht_score, + ft_scores_top5=score_result.ft_scores_top5, + + # xG + home_xg=home_xg, + away_xg=away_xg, + total_xg=home_xg + away_xg, + + # Others + total_corners_pred=other_result.total_corners_pred, + corner_pick=other_result.corner_pick, + total_cards_pred=other_result.total_cards_pred, + card_pick=other_result.card_pick, + cards_over_prob=other_result.cards_over_prob, + cards_under_prob=other_result.cards_under_prob, + cards_confidence=other_result.cards_confidence, + handicap_pick=other_result.handicap_pick, + handicap_home_prob=other_result.handicap_home_prob, + handicap_draw_prob=other_result.handicap_draw_prob, + handicap_away_prob=other_result.handicap_away_prob, + handicap_confidence=other_result.handicap_confidence, + odd_even_pick=other_result.odd_even_pick, + odd_prob=other_result.odd_prob, + even_prob=other_result.even_prob, + + # Risk + risk_level=risk_result.risk_level, + risk_score=risk_result.risk_score, + is_surprise_risk=risk_result.is_surprise_risk, + surprise_type=risk_result.surprise_type, + ht_ft_probs=ctx.xgboost_preds.get("ht_ft", {}), + analysis_details=analysis_details, + risk_warnings=risk_result.risk_warnings, + + # GLM-5 Sürpriz Skoru + upset_score=upset_factors_v2.upset_score, + upset_level=upset_factors_v2.upset_level, + upset_reasons=upset_factors_v2.reasoning, + + # Engines + team_confidence=team_pred.confidence, + player_confidence=player_pred.confidence, + odds_confidence=odds_pred.confidence, + referee_confidence=referee_pred.confidence, + + # Recs + best_bet=best_bet, + recommended_bets=recommended, + alternative_bet=alt_bet, + + # Expert Recommendation (New) + expert_recommendation=expert_data + ) + + def _get_missing_desc(self, player_pred) -> List[str]: + if not player_pred.lineup_available: + return ["Lineups not confirmed"] + + missing = [] + if player_pred.home_missing_impact > 0.1: + missing.append(f"Home missing impact: {int(player_pred.home_missing_impact*100)}%") + if player_pred.away_missing_impact > 0.1: + missing.append(f"Away missing impact: {int(player_pred.away_missing_impact*100)}%") + + return missing if missing else ["No significant missing players"] + + +# Singleton +_predictor: Optional[V20EnsemblePredictor] = None + + +def get_v20_predictor() -> V20EnsemblePredictor: + global _predictor + if _predictor is None: + _predictor = V20EnsemblePredictor() + return _predictor + + +if __name__ == "__main__": + predictor = get_v20_predictor() + + print("\\n🧪 V20 Ensemble Beast Test") + print("=" * 60) + + result = predictor.predict( + match_id="test_match", + home_team_id="test_home", + away_team_id="test_away", + home_team_name="Beşiktaş", + away_team_name="Galatasaray", + match_date_ms=1707393600000, + odds_data={ + "ms_h": 2.50, + "ms_d": 3.20, + "ms_a": 2.80, + "ou25_o": 1.85 + }, + home_position=3, + away_position=1, + league_name="Süper Lig" + ) + + print(json.dumps(result.to_dict(), indent=2, ensure_ascii=False)) diff --git a/ai-engine/models/v25_ensemble.py b/ai-engine/models/v25_ensemble.py new file mode 100644 index 0000000..968745a --- /dev/null +++ b/ai-engine/models/v25_ensemble.py @@ -0,0 +1,645 @@ +""" +V25 Ensemble Predictor - NO TARGET LEAKAGE +=========================================== +Multi-model ensemble for match prediction using XGBoost and LightGBM. + +Features: +- 73 engineered features (NO target leakage) +- Market-specific models (MS, OU25, BTTS) +- Weighted ensemble predictions +- Value bet detection +""" + +import os +import json +import numpy as np +import pandas as pd +from typing import Dict, List, Optional, Any +from dataclasses import dataclass, field + +import xgboost as xgb +import lightgbm as lgb + +# CatBoost is optional +try: + from catboost import CatBoostClassifier + CATBOOST_AVAILABLE = True +except ImportError: + CatBoostClassifier = None + CATBOOST_AVAILABLE = False + +# Paths +MODELS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'v25') + + +@dataclass +class MarketPrediction: + """Prediction for a single betting market.""" + market_type: str + pick: str + probability: float + confidence: float + odds: float = 0.0 + is_value_bet: bool = False + edge: float = 0.0 + + def to_dict(self) -> dict: + return { + 'market_type': self.market_type, + 'pick': self.pick, + 'probability': round(self.probability * 100, 1), + 'confidence': round(self.confidence, 1), + 'odds': self.odds, + 'is_value_bet': self.is_value_bet, + 'edge': round(self.edge * 100, 1), + } + + +@dataclass +class ValueBet: + """Detected value bet opportunity.""" + market_type: str + pick: str + probability: float + odds: float + edge: float + confidence: float + + def to_dict(self) -> dict: + return { + 'market_type': self.market_type, + 'pick': self.pick, + 'probability': round(self.probability * 100, 1), + 'odds': self.odds, + 'edge': round(self.edge * 100, 1), + 'confidence': round(self.confidence, 1), + } + + +@dataclass +class MatchPrediction: + """Complete match prediction with all markets.""" + match_id: str + home_team: str + away_team: str + + # MS predictions + home_prob: float = 0.0 + draw_prob: float = 0.0 + away_prob: float = 0.0 + ms_pick: str = '' + ms_confidence: float = 0.0 + + # OU25 predictions + over_prob: float = 0.0 + under_prob: float = 0.0 + ou25_pick: str = '' + ou25_confidence: float = 0.0 + + # BTTS predictions + btts_yes_prob: float = 0.0 + btts_no_prob: float = 0.0 + btts_pick: str = '' + btts_confidence: float = 0.0 + + # Value bets + value_bets: List[ValueBet] = field(default_factory=list) + + def to_dict(self) -> dict: + return { + 'match_id': self.match_id, + 'home_team': self.home_team, + 'away_team': self.away_team, + 'ms': { + 'home_prob': round(self.home_prob * 100, 1), + 'draw_prob': round(self.draw_prob * 100, 1), + 'away_prob': round(self.away_prob * 100, 1), + 'pick': self.ms_pick, + 'confidence': round(self.ms_confidence, 1), + }, + 'ou25': { + 'over_prob': round(self.over_prob * 100, 1), + 'under_prob': round(self.under_prob * 100, 1), + 'pick': self.ou25_pick, + 'confidence': round(self.ou25_confidence, 1), + }, + 'btts': { + 'yes_prob': round(self.btts_yes_prob * 100, 1), + 'no_prob': round(self.btts_no_prob * 100, 1), + 'pick': self.btts_pick, + 'confidence': round(self.btts_confidence, 1), + }, + 'value_bets': [vb.to_dict() for vb in self.value_bets], + } + + +class V25Predictor: + """ + V25 Ensemble Predictor - NO TARGET LEAKAGE + + Uses market-specific XGBoost and LightGBM models. + Each market (MS, OU25, BTTS) has its own trained models. + """ + + # Feature columns (82 features, NO target leakage) + FEATURE_COLS = [ + # ELO Features (8) + 'home_overall_elo', 'away_overall_elo', 'elo_diff', + 'home_home_elo', 'away_away_elo', + 'home_form_elo', 'away_form_elo', 'form_elo_diff', + + # Form Features (12) + 'home_goals_avg', 'home_conceded_avg', + 'away_goals_avg', 'away_conceded_avg', + 'home_clean_sheet_rate', 'away_clean_sheet_rate', + 'home_scoring_rate', 'away_scoring_rate', + 'home_winning_streak', 'away_winning_streak', + 'home_unbeaten_streak', 'away_unbeaten_streak', + + # H2H Features (6) + 'h2h_total_matches', 'h2h_home_win_rate', 'h2h_draw_rate', + 'h2h_avg_goals', 'h2h_btts_rate', 'h2h_over25_rate', + + # Team Stats Features (8) + 'home_avg_possession', 'away_avg_possession', + 'home_avg_shots_on_target', 'away_avg_shots_on_target', + 'home_shot_conversion', 'away_shot_conversion', + 'home_avg_corners', 'away_avg_corners', + + # Odds Features (24) + 'odds_ms_h', 'odds_ms_d', 'odds_ms_a', + 'implied_home', 'implied_draw', 'implied_away', + 'odds_ht_ms_h', 'odds_ht_ms_d', 'odds_ht_ms_a', + 'odds_ou05_o', 'odds_ou05_u', + 'odds_ou15_o', 'odds_ou15_u', + 'odds_ou25_o', 'odds_ou25_u', + 'odds_ou35_o', 'odds_ou35_u', + 'odds_ht_ou05_o', 'odds_ht_ou05_u', + 'odds_ht_ou15_o', 'odds_ht_ou15_u', + 'odds_btts_y', 'odds_btts_n', + + # League Features (4) + 'home_xga', 'away_xga', + 'league_avg_goals', 'league_zero_goal_rate', + + # Upset Engine (4) + 'upset_atmosphere', 'upset_motivation', 'upset_fatigue', 'upset_potential', + + # Referee Engine (5) + 'referee_home_bias', 'referee_avg_goals', 'referee_cards_total', + 'referee_avg_yellow', 'referee_experience', + + # Momentum Engine (3) + 'home_momentum_score', 'away_momentum_score', 'momentum_diff', + + # Squad Features (9) + 'home_squad_quality', 'away_squad_quality', 'squad_diff', + 'home_key_players', 'away_key_players', + 'home_missing_impact', 'away_missing_impact', + 'home_goals_form', 'away_goals_form', + ] + + # Model weights for ensemble + DEFAULT_WEIGHTS = { + 'xgb': 0.50, + 'lgb': 0.50, + } + + def __init__(self, models_dir: str = None): + """ + Initialize V25 Predictor. + + Args: + models_dir: Directory containing model files. Defaults to v25/ directory. + """ + self.models_dir = models_dir or MODELS_DIR + self.models = {} # market -> {'xgb': model, 'lgb': model} + self._loaded = False + + # All trained market models available in V25 + ALL_MARKETS = [ + 'ms', 'ou25', 'btts', # Core markets + 'ou15', 'ou35', # Additional OU lines + 'ht_result', 'ht_ou05', 'ht_ou15', # HT markets + 'htft', # HT/FT combo + 'cards_ou45', # Cards market + 'handicap_ms', # Handicap + 'odd_even', # Odd/Even goals + ] + + # Multi-class markets (output > 2 classes) + MULTICLASS_MARKETS = {'ms', 'ht_result', 'htft', 'handicap_ms'} + + def load_models(self) -> bool: + """Load all market-specific models from disk.""" + try: + loaded_count = 0 + + for market in self.ALL_MARKETS: + self.models[market] = {} + + # Load XGBoost (read content in Python to avoid non-ASCII path issues) + xgb_path = os.path.join(self.models_dir, f'xgb_v25_{market}.json') + if os.path.exists(xgb_path) and os.path.getsize(xgb_path) > 0: + with open(xgb_path, 'r', encoding='utf-8') as f: + xgb_content = f.read() + booster = xgb.Booster() + booster.load_model(bytearray(xgb_content, 'utf-8')) + self.models[market]['xgb'] = booster + loaded_count += 1 + + # Load LightGBM (read content in Python to avoid non-ASCII path issues) + lgb_path = os.path.join(self.models_dir, f'lgb_v25_{market}.txt') + if os.path.exists(lgb_path) and os.path.getsize(lgb_path) > 0: + with open(lgb_path, 'r', encoding='utf-8') as f: + model_str = f.read() + self.models[market]['lgb'] = lgb.Booster(model_str=model_str) + loaded_count += 1 + + # Remove empty entries + if not self.models[market]: + del self.models[market] + + print(f"[V25] Loaded {loaded_count} model files across {len(self.models)} markets: {list(self.models.keys())}") + self._loaded = loaded_count > 0 + return self._loaded + + except Exception as e: + print(f"[ERROR] Error loading models: {e}") + import traceback + traceback.print_exc() + return False + + def _ensure_loaded(self): + """Ensure models are loaded before prediction.""" + if not self._loaded: + if not self.load_models(): + raise RuntimeError("Failed to load V25 models") + + def _prepare_features(self, features: Dict[str, float]) -> pd.DataFrame: + """Prepare feature vector for prediction.""" + X = pd.DataFrame([{col: features.get(col, 0.0) for col in self.FEATURE_COLS}]) + return X + + def predict_ms(self, features: Dict[str, float]) -> tuple: + """ + Predict match result (1X2). + + Returns: + (home_prob, draw_prob, away_prob) + """ + self._ensure_loaded() + + X = self._prepare_features(features) + probs = [] + + # XGBoost + if 'xgb' in self.models.get('ms', {}): + dmat = xgb.DMatrix(X) + xgb_proba = self.models['ms']['xgb'].predict(dmat) + if len(xgb_proba.shape) == 1: + xgb_proba = np.array([xgb_proba]) + probs.append(xgb_proba[0] * self.DEFAULT_WEIGHTS['xgb']) + + # LightGBM + if 'lgb' in self.models.get('ms', {}): + lgb_proba = self.models['ms']['lgb'].predict(X) + if len(lgb_proba.shape) == 2: + probs.append(lgb_proba[0] * self.DEFAULT_WEIGHTS['lgb']) + + if not probs: + return 0.33, 0.33, 0.33 + + ensemble_proba = np.sum(probs, axis=0) + ensemble_proba = ensemble_proba / ensemble_proba.sum() + + return float(ensemble_proba[0]), float(ensemble_proba[1]), float(ensemble_proba[2]) + + def predict_ou25(self, features: Dict[str, float]) -> tuple: + """ + Predict Over/Under 2.5 goals. + + Returns: + (over_prob, under_prob) + """ + self._ensure_loaded() + + X = self._prepare_features(features) + probs = [] + + # XGBoost + if 'xgb' in self.models.get('ou25', {}): + dmat = xgb.DMatrix(X) + xgb_proba = self.models['ou25']['xgb'].predict(dmat) + if isinstance(xgb_proba, np.ndarray) and len(xgb_proba.shape) == 1: + probs.append(xgb_proba[0]) + + # LightGBM + if 'lgb' in self.models.get('ou25', {}): + lgb_proba = self.models['ou25']['lgb'].predict(X) + if isinstance(lgb_proba, np.ndarray): + probs.append(lgb_proba[0]) + + if not probs: + return 0.5, 0.5 + + # Average probability + avg_prob = np.mean(probs) + + return float(avg_prob), float(1 - avg_prob) + + def predict_btts(self, features: Dict[str, float]) -> tuple: + """ + Predict Both Teams To Score. + + Returns: + (yes_prob, no_prob) + """ + self._ensure_loaded() + + X = self._prepare_features(features) + probs = [] + + # XGBoost + if 'xgb' in self.models.get('btts', {}): + dmat = xgb.DMatrix(X) + xgb_proba = self.models['btts']['xgb'].predict(dmat) + if isinstance(xgb_proba, np.ndarray) and len(xgb_proba.shape) == 1: + probs.append(xgb_proba[0]) + + # LightGBM + if 'lgb' in self.models.get('btts', {}): + lgb_proba = self.models['btts']['lgb'].predict(X) + if isinstance(lgb_proba, np.ndarray): + probs.append(lgb_proba[0]) + + if not probs: + return 0.5, 0.5 + + # Average probability + avg_prob = np.mean(probs) + + return float(avg_prob), float(1 - avg_prob) + + def predict_market(self, market: str, features: Dict[str, float]) -> np.ndarray: + """ + Generic prediction for any loaded market. + + Args: + market: Market key (e.g. 'ht_result', 'htft', 'cards_ou45') + features: Feature dictionary. + + Returns: + numpy array of probabilities. + For binary markets: [positive_prob] + For multi-class markets: [class0_prob, class1_prob, ...] + """ + self._ensure_loaded() + + if market not in self.models: + return None + + X = self._prepare_features(features) + probs = [] + weights = [] + is_multiclass = market in self.MULTICLASS_MARKETS + + # XGBoost + if 'xgb' in self.models[market]: + dmat = xgb.DMatrix(X) + xgb_proba = self.models[market]['xgb'].predict(dmat) + if isinstance(xgb_proba, np.ndarray): + if is_multiclass and len(xgb_proba.shape) == 2: + probs.append(xgb_proba[0]) + elif is_multiclass and len(xgb_proba.shape) == 1: + probs.append(xgb_proba) + else: + probs.append(np.array([xgb_proba[0]])) + weights.append(self.DEFAULT_WEIGHTS['xgb']) + + # LightGBM + if 'lgb' in self.models[market]: + lgb_proba = self.models[market]['lgb'].predict(X) + if isinstance(lgb_proba, np.ndarray): + if is_multiclass and len(lgb_proba.shape) == 2: + probs.append(lgb_proba[0]) + elif is_multiclass and len(lgb_proba.shape) == 1: + probs.append(lgb_proba) + else: + probs.append(np.array([lgb_proba[0]])) + weights.append(self.DEFAULT_WEIGHTS['lgb']) + + if not probs: + return None + + # Weighted average + if len(probs) == 1: + return probs[0] + + total_w = sum(weights[:len(probs)]) + result = np.zeros_like(probs[0]) + for p, w in zip(probs, weights): + result += p * (w / total_w) + + # Normalize multi-class + if is_multiclass and result.sum() > 0: + result = result / result.sum() + + return result + + def has_market(self, market: str) -> bool: + """Check if a specific market model is loaded.""" + return market in self.models + + def predict_match( + self, + match_id: str, + home_team: str, + away_team: str, + features: Dict[str, float], + odds: Optional[Dict[str, float]] = None, + ) -> MatchPrediction: + """ + Predict all markets for a match. + + Args: + match_id: Match identifier. + home_team: Home team name. + away_team: Away team name. + features: Feature dictionary. + odds: Optional odds dictionary for value bet detection. + + Returns: + MatchPrediction object. + """ + # Get predictions for each market + home_prob, draw_prob, away_prob = self.predict_ms(features) + over_prob, under_prob = self.predict_ou25(features) + btts_yes_prob, btts_no_prob = self.predict_btts(features) + + # Determine picks + ms_probs = {'1': home_prob, 'X': draw_prob, '2': away_prob} + ms_pick = max(ms_probs, key=ms_probs.get) + ms_confidence = ms_probs[ms_pick] * 100 + + ou25_probs = {'Over': over_prob, 'Under': under_prob} + ou25_pick = max(ou25_probs, key=ou25_probs.get) + ou25_confidence = ou25_probs[ou25_pick] * 100 + + btts_probs = {'Yes': btts_yes_prob, 'No': btts_no_prob} + btts_pick = max(btts_probs, key=btts_probs.get) + btts_confidence = btts_probs[btts_pick] * 100 + + # Create prediction + prediction = MatchPrediction( + match_id=match_id, + home_team=home_team, + away_team=away_team, + home_prob=home_prob, + draw_prob=draw_prob, + away_prob=away_prob, + ms_pick=ms_pick, + ms_confidence=ms_confidence, + over_prob=over_prob, + under_prob=under_prob, + ou25_pick=ou25_pick, + ou25_confidence=ou25_confidence, + btts_yes_prob=btts_yes_prob, + btts_no_prob=btts_no_prob, + btts_pick=btts_pick, + btts_confidence=btts_confidence, + ) + + # Detect value bets + if odds: + prediction.value_bets = self._detect_value_bets( + prediction, odds, home_prob, draw_prob, away_prob, + over_prob, under_prob, btts_yes_prob, btts_no_prob + ) + + return prediction + + def _detect_value_bets( + self, + prediction: MatchPrediction, + odds: Dict[str, float], + home_prob: float, + draw_prob: float, + away_prob: float, + over_prob: float, + under_prob: float, + btts_yes_prob: float, + btts_no_prob: float, + ) -> List[ValueBet]: + """Detect value bets based on model vs market odds.""" + value_bets = [] + min_edge = 0.05 # 5% minimum edge + + # MS value bets + if 'ms_h' in odds and odds['ms_h'] > 0: + implied = 1 / odds['ms_h'] + edge = home_prob - implied + if edge > min_edge: + value_bets.append(ValueBet( + market_type='MS', + pick='1', + probability=home_prob, + odds=odds['ms_h'], + edge=edge, + confidence=home_prob * 100, + )) + + if 'ms_d' in odds and odds['ms_d'] > 0: + implied = 1 / odds['ms_d'] + edge = draw_prob - implied + if edge > min_edge: + value_bets.append(ValueBet( + market_type='MS', + pick='X', + probability=draw_prob, + odds=odds['ms_d'], + edge=edge, + confidence=draw_prob * 100, + )) + + if 'ms_a' in odds and odds['ms_a'] > 0: + implied = 1 / odds['ms_a'] + edge = away_prob - implied + if edge > min_edge: + value_bets.append(ValueBet( + market_type='MS', + pick='2', + probability=away_prob, + odds=odds['ms_a'], + edge=edge, + confidence=away_prob * 100, + )) + + # OU25 value bets + if 'ou25_o' in odds and odds['ou25_o'] > 0: + implied = 1 / odds['ou25_o'] + edge = over_prob - implied + if edge > min_edge: + value_bets.append(ValueBet( + market_type='OU25', + pick='Over', + probability=over_prob, + odds=odds['ou25_o'], + edge=edge, + confidence=over_prob * 100, + )) + + if 'ou25_u' in odds and odds['ou25_u'] > 0: + implied = 1 / odds['ou25_u'] + edge = under_prob - implied + if edge > min_edge: + value_bets.append(ValueBet( + market_type='OU25', + pick='Under', + probability=under_prob, + odds=odds['ou25_u'], + edge=edge, + confidence=under_prob * 100, + )) + + # BTTS value bets + if 'btts_y' in odds and odds['btts_y'] > 0: + implied = 1 / odds['btts_y'] + edge = btts_yes_prob - implied + if edge > min_edge: + value_bets.append(ValueBet( + market_type='BTTS', + pick='Yes', + probability=btts_yes_prob, + odds=odds['btts_y'], + edge=edge, + confidence=btts_yes_prob * 100, + )) + + if 'btts_n' in odds and odds['btts_n'] > 0: + implied = 1 / odds['btts_n'] + edge = btts_no_prob - implied + if edge > min_edge: + value_bets.append(ValueBet( + market_type='BTTS', + pick='No', + probability=btts_no_prob, + odds=odds['btts_n'], + edge=edge, + confidence=btts_no_prob * 100, + )) + + return value_bets + + +# Singleton instance +_v25_predictor: Optional[V25Predictor] = None + + +def get_v25_predictor() -> V25Predictor: + """Get or create V25 predictor instance.""" + global _v25_predictor + if _v25_predictor is None: + _v25_predictor = V25Predictor() + _v25_predictor.load_models() + return _v25_predictor \ No newline at end of file diff --git a/ai-engine/models/v27_predictor.py b/ai-engine/models/v27_predictor.py new file mode 100644 index 0000000..462b7e3 --- /dev/null +++ b/ai-engine/models/v27_predictor.py @@ -0,0 +1,291 @@ +""" +V27 Pro Predictor — Odds-Free Fundamentals + Value Edge Detection + +This module loads V27 ensemble models (XGBoost, LightGBM, CatBoost) +and produces market-independent probability estimates. + +The key insight: V27 is trained WITHOUT odds features, so it produces +"true" probabilities unbiased by market pricing. The divergence between +V25 (odds-aware) and V27 (odds-free) predictions signals market mispricing. +""" + +import json +import logging +import os +import pickle +from pathlib import Path +from typing import Dict, List, Optional, Tuple + +import numpy as np + +logger = logging.getLogger(__name__) + +V27_DIR = Path(__file__).parent / "v27" + + +class V27Predictor: + """ + Loads V27 ensemble models and provides predictions using the + 82-feature odds-free vector. + """ + + MARKETS = ["ms", "ou25"] + + def __init__(self): + self.models: Dict[str, Dict[str, object]] = {} + self.feature_cols: List[str] = [] + self._loaded = False + + def load_models(self) -> bool: + """Load all V27 ensemble models and feature column spec.""" + if self._loaded: + return True + + # Feature columns + cols_path = V27_DIR / "v27_feature_cols.json" + if not cols_path.exists(): + logger.error("[V27] Feature columns file not found: %s", cols_path) + return False + + try: + with open(cols_path, "r", encoding="utf-8") as f: + self.feature_cols = json.load(f) + logger.info("[V27] Loaded %d feature columns", len(self.feature_cols)) + except Exception as e: + logger.error("[V27] Failed to load feature columns: %s", e) + return False + + # Load models per market + model_types = {"xgb": "xgb", "lgb": "lgb", "cb": "cb"} + + for market in self.MARKETS: + self.models[market] = {} + for short, label in model_types.items(): + # Try market-specific file first: v27_ms_xgb.pkl + path = V27_DIR / f"v27_{market}_{short}.pkl" + if not path.exists(): + # Fallback to generic: v27_xgboost.pkl (for MS only) + generic_names = {"xgb": "v27_xgboost.pkl", "lgb": "v27_lightgbm.pkl", "cb": "v27_catboost.pkl"} + path = V27_DIR / generic_names.get(short, "") + if not path.exists(): + logger.warning("[V27] Model file not found for %s/%s", market, short) + continue + + try: + with open(path, "rb") as f: + model = pickle.load(f) + self.models[market][label] = model + logger.info("[V27] ✓ Loaded %s/%s from %s", market, label, path.name) + except Exception as e: + logger.error("[V27] ✗ Failed to load %s/%s: %s", market, label, e) + + loaded_count = sum(len(v) for v in self.models.values()) + if loaded_count == 0: + logger.error("[V27] No models loaded!") + return False + + self._loaded = True + logger.info("[V27] Total models loaded: %d across %d markets", loaded_count, len(self.models)) + return True + + def _build_feature_array(self, features: Dict[str, float]) -> np.ndarray: + """ + Build ordered feature array from the full feature dict. + V27 uses only its 82 features (odds-free subset). + """ + row = [] + for col in self.feature_cols: + row.append(float(features.get(col, 0.0))) + return np.array([row]) + + def _predict_with_model(self, model, X: np.ndarray, label: str, expected_classes: int) -> Optional[np.ndarray]: + """ + Predict probabilities from a model, handling both sklearn wrappers + (predict_proba) and raw Booster objects (predict). + + For raw XGBoost Boosters, DMatrix is created WITH feature_names + to match the training schema. + """ + import xgboost as xgb + import lightgbm as lgbm + import pandas as pd + + # 1. Try sklearn-style predict_proba first + if hasattr(model, 'predict_proba'): + try: + proba = model.predict_proba(X)[0] + if len(proba) == expected_classes: + return proba + logger.warning("[V27] %s predict_proba returned %d classes, expected %d", label, len(proba), expected_classes) + except Exception: + pass # Fall through to raw predict + + # 2. Raw xgboost.Booster — MUST pass feature_names + if isinstance(model, xgb.Booster): + try: + feature_names = self.feature_cols if self.feature_cols else None + dmat = xgb.DMatrix(X, feature_names=feature_names) + raw = model.predict(dmat) + if isinstance(raw, np.ndarray): + if raw.ndim == 2 and raw.shape[1] == expected_classes: + return raw[0] + elif raw.ndim == 1 and expected_classes == 2: + p = float(raw[0]) + return np.array([1.0 - p, p]) + elif raw.ndim == 1 and len(raw) == expected_classes: + return raw + except Exception as e: + logger.warning("[V27] %s xgb.Booster predict failed: %s", label, e) + return None + + # 3. Raw lightgbm.Booster — pass as DataFrame with column names + if isinstance(model, lgbm.Booster): + try: + if self.feature_cols: + X_named = pd.DataFrame(X, columns=self.feature_cols) + raw = model.predict(X_named) + else: + raw = model.predict(X) + if isinstance(raw, np.ndarray): + if raw.ndim == 2 and raw.shape[1] == expected_classes: + return raw[0] + elif raw.ndim == 1 and expected_classes == 2: + p = float(raw[0]) + return np.array([1.0 - p, p]) + elif raw.ndim == 1 and len(raw) == expected_classes: + return raw + except Exception as e: + logger.warning("[V27] %s lgb.Booster predict failed: %s", label, e) + return None + + # 4. Generic fallback (CatBoost, etc.) + try: + if hasattr(model, 'predict'): + raw = model.predict(X) + if isinstance(raw, np.ndarray): + if raw.ndim == 2 and raw.shape[1] == expected_classes: + return raw[0] + elif raw.ndim == 1 and expected_classes == 2: + p = float(raw[0]) + return np.array([1.0 - p, p]) + elif raw.ndim == 1 and len(raw) == expected_classes: + return raw + except Exception as e: + logger.warning("[V27] %s generic predict failed: %s", label, e) + + return None + + def predict_ms(self, features: Dict[str, float]) -> Optional[Dict[str, float]]: + """ + Predict Match Score probabilities (Home/Draw/Away). + Returns dict with keys: home, draw, away. + """ + if not self._loaded or "ms" not in self.models or not self.models["ms"]: + return None + + X = self._build_feature_array(features) + probs_list = [] + + for label, model in self.models["ms"].items(): + proba = self._predict_with_model(model, X, f"MS/{label}", expected_classes=3) + if proba is not None and len(proba) == 3: + probs_list.append(proba) + + if not probs_list: + return None + + # Ensemble average + avg = np.mean(probs_list, axis=0) + return { + "home": float(avg[0]), + "draw": float(avg[1]), + "away": float(avg[2]), + } + + def predict_ou25(self, features: Dict[str, float]) -> Optional[Dict[str, float]]: + """ + Predict Over/Under 2.5 probabilities. + Returns dict with keys: under, over. + """ + if not self._loaded or "ou25" not in self.models or not self.models["ou25"]: + return None + + X = self._build_feature_array(features) + probs_list = [] + + for label, model in self.models["ou25"].items(): + proba = self._predict_with_model(model, X, f"OU25/{label}", expected_classes=2) + if proba is not None and len(proba) == 2: + probs_list.append(proba) + + if not probs_list: + return None + + avg = np.mean(probs_list, axis=0) + return { + "under": float(avg[0]), + "over": float(avg[1]), + } + + def predict_all(self, features: Dict[str, float]) -> Dict[str, Optional[Dict[str, float]]]: + """Run predictions for all supported markets.""" + return { + "ms": self.predict_ms(features), + "ou25": self.predict_ou25(features), + } + + +def compute_divergence( + v25_probs: Dict[str, float], + v27_probs: Dict[str, float], +) -> Dict[str, float]: + """ + Compute the divergence signal between V25 (odds-aware) and V27 (odds-free). + + Positive divergence = V27 thinks it's MORE likely than the market → VALUE BET + Negative divergence = V27 thinks it's LESS likely than the market → PASS + + Returns per-outcome divergence values. + """ + divergence = {} + for key in v27_probs: + v25_val = v25_probs.get(key, 0.33) + v27_val = v27_probs.get(key, 0.33) + divergence[key] = round(v27_val - v25_val, 4) + return divergence + + +def compute_value_edge( + v25_probs: Dict[str, float], + v27_probs: Dict[str, float], + odds: Dict[str, float], +) -> Dict[str, Dict]: + """ + Detect value bets by combining V25/V27 divergence with odds. + + A value bet exists when: + 1. V27 (odds-free) probability > implied odds probability (model says it's underpriced) + 2. V27 and V25 divergence is positive (V27 sees more signal than the market) + + Returns per-outcome: { probability, implied_prob, edge, is_value } + """ + results = {} + for key in v27_probs: + v27_p = v27_probs[key] + v25_p = v25_probs.get(key, 0.33) + odds_val = odds.get(key, 0.0) + + implied_p = (1.0 / odds_val) if odds_val > 1.01 else 0.0 + divergence = v27_p - v25_p + edge = v27_p - implied_p if implied_p > 0 else 0.0 + + results[key] = { + "v27_prob": round(v27_p, 4), + "v25_prob": round(v25_p, 4), + "implied_prob": round(implied_p, 4), + "divergence": round(divergence, 4), + "edge": round(edge, 4), + "is_value": edge > 0.05 and divergence > 0.02, # 5% edge + 2% divergence + } + + return results diff --git a/ai-engine/services/betting_brain.py b/ai-engine/services/betting_brain.py new file mode 100644 index 0000000..3fd294d --- /dev/null +++ b/ai-engine/services/betting_brain.py @@ -0,0 +1,497 @@ +""" +Deterministic betting judge for prediction packages. + +The model layer estimates event probabilities. BettingBrain decides whether +those probabilities are trustworthy enough to risk money. +""" + +from __future__ import annotations + +from typing import Any, Dict, List, Optional, Tuple + + +class BettingBrain: + MIN_ODDS = 1.30 + MIN_BET_SCORE = 72.0 + MIN_WATCH_SCORE = 62.0 + MIN_BAND_SAMPLE = 8 + HARD_DIVERGENCE = 0.22 + SOFT_DIVERGENCE = 0.14 + EXTREME_MODEL_PROB = 0.85 + EXTREME_GAP = 0.30 + + MARKET_PRIORS = { + "DC": 4.0, + "OU15": 3.0, + "OU25": 2.0, + "BTTS": 0.0, + "MS": -2.0, + "OU35": -2.0, + "HT": -6.0, + "HTFT": -12.0, + "CARDS": -5.0, + "OE": -8.0, + } + + def judge(self, package: Dict[str, Any]) -> Dict[str, Any]: + v27_engine = package.get("v27_engine") + if not isinstance(v27_engine, dict): + return package + + guarded = dict(package) + rows = self._collect_rows(guarded) + if not rows: + return guarded + + judged_rows: Dict[str, Dict[str, Any]] = {} + decisions: List[Dict[str, Any]] = [] + for row in rows: + key = self._row_key(row) + judged = self._judge_row(dict(row), guarded) + judged_rows[key] = judged + decisions.append(judged["betting_brain"]) + + approved = [ + row for row in judged_rows.values() + if row.get("betting_brain", {}).get("action") == "BET" + ] + watchlist = [ + row for row in judged_rows.values() + if row.get("betting_brain", {}).get("action") == "WATCH" + ] + approved.sort(key=self._candidate_sort_key, reverse=True) + watchlist.sort(key=self._candidate_sort_key, reverse=True) + + original_main = guarded.get("main_pick") or {} + main_pick = None + decision = "NO_BET" + decision_reason = "No candidate passed the betting brain evidence gates." + + if approved: + main_pick = dict(approved[0]) + main_pick["is_guaranteed"] = bool(main_pick.get("betting_brain", {}).get("score", 0.0) >= 82.0) + main_pick["pick_reason"] = "betting_brain_approved" + decision = "BET" + decision_reason = main_pick.get("betting_brain", {}).get("summary", "Evidence is aligned.") + elif watchlist: + main_pick = dict(watchlist[0]) + self._force_no_bet(main_pick, "betting_brain_watchlist") + decision = "WATCHLIST" + decision_reason = main_pick.get("betting_brain", {}).get("summary", "Interesting but not clean enough.") + elif original_main: + main_pick = dict(judged_rows.get(self._row_key(original_main), original_main)) + self._force_no_bet(main_pick, "betting_brain_no_safe_pick") + + main_key = self._row_key(main_pick) if main_pick else "" + supporting = [ + dict(row) + for row in judged_rows.values() + if self._row_key(row) != main_key + ] + supporting.sort(key=self._candidate_sort_key, reverse=True) + + bet_summary = [ + self._summary_item(row) + for row in sorted(judged_rows.values(), key=self._candidate_sort_key, reverse=True) + ] + + guarded["main_pick"] = main_pick + guarded["value_pick"] = self._pick_value_candidate(judged_rows, main_key) + guarded["supporting_picks"] = supporting[:6] + guarded["bet_summary"] = bet_summary + + playable = decision == "BET" and bool(main_pick and main_pick.get("playable")) + advice = dict(guarded.get("bet_advice") or {}) + advice["playable"] = playable + advice["suggested_stake_units"] = float(main_pick.get("stake_units", 0.0)) if playable else 0.0 + advice["reason"] = "betting_brain_approved" if playable else "betting_brain_no_bet" + advice["decision"] = decision + advice["confidence_band"] = self._decision_band(main_pick) + guarded["bet_advice"] = advice + + rejected = [d for d in decisions if d.get("action") == "REJECT"] + guarded["betting_brain"] = { + "version": "judge-v1", + "decision": decision, + "reason": decision_reason, + "main_pick_key": main_key or None, + "approved_count": len(approved), + "watchlist_count": len(watchlist), + "rejected_count": len(rejected), + "top_candidates": self._top_decisions(decisions), + "rules": { + "min_bet_score": self.MIN_BET_SCORE, + "min_watch_score": self.MIN_WATCH_SCORE, + "min_band_sample": self.MIN_BAND_SAMPLE, + "hard_divergence": self.HARD_DIVERGENCE, + "soft_divergence": self.SOFT_DIVERGENCE, + "extreme_model_probability": self.EXTREME_MODEL_PROB, + "extreme_model_market_gap": self.EXTREME_GAP, + }, + } + guarded["upper_brain"] = guarded["betting_brain"] + guarded.setdefault("analysis_details", {}) + guarded["analysis_details"]["betting_brain_applied"] = True + guarded["analysis_details"]["betting_brain_decision"] = decision + return guarded + + def _judge_row(self, row: Dict[str, Any], package: Dict[str, Any]) -> Dict[str, Any]: + market = str(row.get("market") or "") + pick = str(row.get("pick") or "") + model_prob = self._market_probability(row, package) + odds = self._safe_float(row.get("odds"), 0.0) or 0.0 + implied = (1.0 / odds) if odds > 1.0 else 0.0 + model_gap = (model_prob - implied) if model_prob is not None and implied > 0 else None + calibrated_conf = self._safe_float(row.get("calibrated_confidence", row.get("confidence")), 0.0) or 0.0 + play_score = self._safe_float(row.get("play_score"), 0.0) or 0.0 + ev_edge = self._safe_float(row.get("ev_edge", row.get("edge")), 0.0) or 0.0 + v27_prob = self._v27_probability(market, pick, package.get("v27_engine") or {}) + divergence = abs(model_prob - v27_prob) if model_prob is not None and v27_prob is not None else None + triple_key = self._triple_key(market, pick) + triple = self._triple_value(package, triple_key) + band_sample = int(self._safe_float((triple or {}).get("band_sample"), 0.0) or 0.0) + triple_is_value = bool((triple or {}).get("is_value")) + consensus = str((package.get("v27_engine") or {}).get("consensus") or "").upper() + + positives: List[str] = [] + issues: List[str] = [] + vetoes: List[str] = [] + score = 0.0 + + if row.get("playable"): + score += 18.0 + positives.append("base_model_playable") + else: + score -= 18.0 + issues.append("base_model_not_playable") + + score += max(0.0, min(20.0, calibrated_conf * 0.22)) + score += max(-8.0, min(16.0, ev_edge * 45.0)) + score += max(0.0, min(14.0, play_score * 0.12)) + score += self.MARKET_PRIORS.get(market, -3.0) + + data_quality = package.get("data_quality") or {} + quality_score = self._safe_float(data_quality.get("score"), 0.6) or 0.6 + score += max(-8.0, min(6.0, (quality_score - 0.55) * 16.0)) + risk = str((package.get("risk") or {}).get("level") or "MEDIUM").upper() + score += {"LOW": 5.0, "MEDIUM": 0.0, "HIGH": -12.0, "EXTREME": -22.0}.get(risk, -4.0) + + if odds < self.MIN_ODDS: + vetoes.append("odds_below_minimum") + if calibrated_conf < 38.0: + vetoes.append("calibrated_confidence_too_low") + if play_score < 50.0: + vetoes.append("play_score_too_low") + + if divergence is not None: + if divergence >= self.HARD_DIVERGENCE: + score -= 42.0 + vetoes.append("v25_v27_hard_disagreement") + elif divergence >= self.SOFT_DIVERGENCE: + score -= 18.0 + issues.append("v25_v27_soft_disagreement") + else: + score += 11.0 + positives.append("v25_v27_aligned") + + if isinstance(triple, dict): + if triple_is_value: + score += 18.0 + positives.append("triple_value_confirmed") + elif market in {"DC", "MS", "OU25", "BTTS"}: + score -= 18.0 + issues.append("triple_value_not_confirmed") + + if band_sample >= 25: + score += 8.0 + positives.append("strong_historical_sample") + elif band_sample >= self.MIN_BAND_SAMPLE: + score += 3.0 + positives.append("usable_historical_sample") + else: + score -= 16.0 + issues.append("historical_sample_too_low") + if market == "DC": + vetoes.append("dc_without_historical_sample") + elif market in {"MS", "DC", "OU25"}: + score -= 10.0 + issues.append("missing_triple_value_evidence") + + if consensus == "DISAGREE" and market in {"MS", "DC"}: + score -= 12.0 + issues.append("engine_consensus_disagree") + + if ( + model_prob is not None + and model_gap is not None + and model_prob >= self.EXTREME_MODEL_PROB + and model_gap >= self.EXTREME_GAP + and not triple_is_value + ): + score -= 24.0 + vetoes.append("extreme_probability_without_evidence") + + if market in {"HT", "HTFT", "OE"} and score < 86.0: + vetoes.append("volatile_market_requires_exceptional_evidence") + + score = max(0.0, min(100.0, score)) + action = "BET" + if vetoes: + action = "REJECT" + elif score < self.MIN_WATCH_SCORE: + action = "REJECT" + elif score < self.MIN_BET_SCORE: + action = "WATCH" + + row["betting_brain"] = { + "action": action, + "score": round(score, 1), + "summary": self._summary(action, market, pick, positives, issues, vetoes), + "positives": positives[:5], + "issues": issues[:6], + "vetoes": vetoes[:6], + "model_prob": round(model_prob, 4) if model_prob is not None else None, + "implied_prob": round(implied, 4), + "model_market_gap": round(model_gap, 4) if model_gap is not None else None, + "v27_prob": round(v27_prob, 4) if v27_prob is not None else None, + "divergence": round(divergence, 4) if divergence is not None else None, + "triple_key": triple_key, + "triple_value": triple, + } + + if action != "BET": + self._force_no_bet(row, f"betting_brain_{action.lower()}") + else: + row["is_guaranteed"] = bool(score >= 82.0) + row["pick_reason"] = "betting_brain_approved" + row["stake_units"] = self._brain_stake(row, score) + row["bet_grade"] = "A" if score >= 82.0 else "B" + row["playable"] = True + + self._append_reason(row, f"betting_brain_{action.lower()}_{round(score)}") + return row + + def _collect_rows(self, package: Dict[str, Any]) -> List[Dict[str, Any]]: + rows: Dict[str, Dict[str, Any]] = {} + for source in ("main_pick", "value_pick"): + item = package.get(source) + if isinstance(item, dict) and item.get("market"): + rows[self._row_key(item)] = dict(item) + + for source in ("supporting_picks", "bet_summary"): + for item in package.get(source) or []: + if isinstance(item, dict) and item.get("market"): + key = self._row_key(item) + rows[key] = self._merge_row(rows.get(key), item) + return list(rows.values()) + + @staticmethod + def _merge_row(existing: Optional[Dict[str, Any]], incoming: Dict[str, Any]) -> Dict[str, Any]: + if existing is None: + return dict(incoming) + merged = dict(incoming) + merged.update({k: v for k, v in existing.items() if v is not None}) + for key in ("decision_reasons", "reasons"): + reasons = list(existing.get(key) or []) + list(incoming.get(key) or []) + if reasons: + merged[key] = list(dict.fromkeys(reasons)) + return merged + + def _pick_value_candidate(self, rows: Dict[str, Dict[str, Any]], main_key: str) -> Optional[Dict[str, Any]]: + candidates = [ + row for key, row in rows.items() + if key != main_key + and row.get("betting_brain", {}).get("action") in {"BET", "WATCH"} + and (self._safe_float(row.get("odds"), 0.0) or 0.0) >= 1.60 + ] + candidates.sort(key=self._candidate_sort_key, reverse=True) + return dict(candidates[0]) if candidates else None + + def _summary_item(self, row: Dict[str, Any]) -> Dict[str, Any]: + reasons = list(row.get("decision_reasons") or row.get("reasons") or []) + return { + "market": row.get("market"), + "pick": row.get("pick"), + "raw_confidence": row.get("raw_confidence", row.get("confidence")), + "calibrated_confidence": row.get("calibrated_confidence", row.get("confidence")), + "bet_grade": row.get("bet_grade", "PASS"), + "playable": bool(row.get("playable")), + "stake_units": float(row.get("stake_units", 0.0) or 0.0), + "play_score": row.get("play_score", 0.0), + "ev_edge": row.get("ev_edge", row.get("edge", 0.0)), + "implied_prob": row.get("implied_prob", 0.0), + "odds_reliability": row.get("odds_reliability", 0.35), + "odds": row.get("odds", 0.0), + "reasons": reasons[:6], + "betting_brain": row.get("betting_brain"), + } + + @staticmethod + def _candidate_sort_key(row: Dict[str, Any]) -> Tuple[float, float, float]: + brain = row.get("betting_brain") or {} + action_boost = {"BET": 2.0, "WATCH": 1.0, "REJECT": 0.0}.get(str(brain.get("action")), 0.0) + return ( + action_boost, + float(brain.get("score", 0.0) or 0.0), + float(row.get("play_score", 0.0) or 0.0), + ) + + @staticmethod + def _row_key(row: Optional[Dict[str, Any]]) -> str: + if not isinstance(row, dict): + return "" + return f"{row.get('market')}:{row.get('pick')}" + + def _force_no_bet(self, row: Dict[str, Any], reason: str) -> None: + row["playable"] = False + row["stake_units"] = 0.0 + row["bet_grade"] = "PASS" + row["is_guaranteed"] = False + row["pick_reason"] = reason + if row.get("signal_tier") == "CORE": + row["signal_tier"] = "PASS" + self._append_reason(row, reason) + + @staticmethod + def _append_reason(row: Dict[str, Any], reason: str) -> None: + key = "decision_reasons" if "decision_reasons" in row else "reasons" + reasons = list(row.get(key) or []) + if reason not in reasons: + reasons.append(reason) + row[key] = reasons[:6] + + def _brain_stake(self, row: Dict[str, Any], score: float) -> float: + existing = self._safe_float(row.get("stake_units"), 0.0) or 0.0 + odds = self._safe_float(row.get("odds"), 0.0) or 0.0 + if odds <= 1.0: + return 0.0 + cap = 2.0 if score >= 82.0 else 1.2 + if score < 78.0: + cap = 0.8 + return round(max(0.25, min(existing if existing > 0 else cap, cap)), 1) + + @staticmethod + def _decision_band(main_pick: Optional[Dict[str, Any]]) -> str: + if not main_pick: + return "LOW" + score = float((main_pick.get("betting_brain") or {}).get("score", 0.0) or 0.0) + if score >= 82.0: + return "HIGH" + if score >= 72.0: + return "MEDIUM" + return "LOW" + + @staticmethod + def _top_decisions(decisions: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + ordered = sorted(decisions, key=lambda d: float(d.get("score", 0.0) or 0.0), reverse=True) + return [ + { + "action": item.get("action"), + "score": item.get("score"), + "summary": item.get("summary"), + "vetoes": item.get("vetoes", []), + "issues": item.get("issues", []), + } + for item in ordered[:5] + ] + + @staticmethod + def _summary(action: str, market: str, pick: str, positives: List[str], issues: List[str], vetoes: List[str]) -> str: + if action == "BET": + return f"{market} {pick} approved: evidence is aligned enough for a controlled stake." + if action == "WATCH": + return f"{market} {pick} is interesting but not clean enough for stake." + if vetoes: + return f"{market} {pick} rejected: {', '.join(vetoes[:3])}." + if issues: + return f"{market} {pick} rejected: {', '.join(issues[:3])}." + return f"{market} {pick} rejected by evidence score." + + def _market_probability(self, row: Dict[str, Any], package: Dict[str, Any]) -> Optional[float]: + direct = self._safe_float(row.get("probability")) + if direct is not None: + return direct + board = package.get("market_board") or {} + payload = board.get(str(row.get("market") or "")) if isinstance(board, dict) else None + probs = payload.get("probs") if isinstance(payload, dict) else None + if not isinstance(probs, dict): + return None + key = self._prob_key(str(row.get("market") or ""), str(row.get("pick") or "")) + return self._safe_float(probs.get(key)) if key else None + + def _v27_probability(self, market: str, pick: str, v27_engine: Dict[str, Any]) -> Optional[float]: + predictions = v27_engine.get("predictions") or {} + ms = predictions.get("ms") or {} + ou25 = predictions.get("ou25") or {} + if market == "MS": + return self._safe_float(ms.get({"1": "home", "X": "draw", "2": "away"}.get(pick, ""))) + if market == "DC": + home = self._safe_float(ms.get("home"), 0.0) or 0.0 + draw = self._safe_float(ms.get("draw"), 0.0) or 0.0 + away = self._safe_float(ms.get("away"), 0.0) or 0.0 + return {"1X": home + draw, "X2": draw + away, "12": home + away}.get(pick) + if market == "OU25": + key = self._prob_key(market, pick) + return self._safe_float(ou25.get(key)) if key else None + return None + + def _triple_value(self, package: Dict[str, Any], key: Optional[str]) -> Optional[Dict[str, Any]]: + if not key: + return None + value = ((package.get("v27_engine") or {}).get("triple_value") or {}).get(key) + return value if isinstance(value, dict) else None + + def _triple_key(self, market: str, pick: str) -> Optional[str]: + prob_key = self._prob_key(market, pick) + if market == "MS": + return {"1": "home", "2": "away"}.get(pick) + if market == "DC" and pick.upper() in {"1X", "X2", "12"}: + return f"dc_{pick.lower()}" + if market in {"OU15", "OU25", "OU35"} and prob_key == "over": + return f"{market.lower()}_over" + if market == "BTTS" and prob_key == "yes": + return "btts_yes" + if market == "HT": + return {"1": "ht_home", "2": "ht_away"}.get(pick) + if market in {"HT_OU05", "HT_OU15"} and prob_key == "over": + return f"{market.lower()}_over" + if market == "OE" and prob_key == "odd": + return "oe_odd" + if market == "CARDS" and prob_key == "over": + return "cards_over" + if market == "HTFT" and "/" in pick: + return f"htft_{pick.replace('/', '').lower()}" + return None + + @staticmethod + def _prob_key(market: str, pick: str) -> Optional[str]: + norm = str(pick or "").strip().casefold() + if market in {"MS", "HT", "HCAP"}: + return pick if pick in {"1", "X", "2"} else None + if market == "DC": + return pick.upper() if pick.upper() in {"1X", "X2", "12"} else None + if market in {"OU15", "OU25", "OU35", "HT_OU05", "HT_OU15", "CARDS"}: + if "over" in norm or "ust" in norm or "üst" in norm: + return "over" + if "under" in norm or "alt" in norm: + return "under" + if market == "BTTS": + if "yes" in norm or "var" in norm: + return "yes" + if "no" in norm or "yok" in norm: + return "no" + if market == "OE": + if "odd" in norm or "tek" in norm: + return "odd" + if "even" in norm or "cift" in norm or "çift" in norm: + return "even" + if market == "HTFT" and "/" in pick: + return pick + return None + + @staticmethod + def _safe_float(value: Any, default: Optional[float] = None) -> Optional[float]: + try: + return float(value) + except (TypeError, ValueError): + return default diff --git a/ai-engine/services/single_match_orchestrator.py b/ai-engine/services/single_match_orchestrator.py index 20d6d7f..3db1e44 100755 --- a/ai-engine/services/single_match_orchestrator.py +++ b/ai-engine/services/single_match_orchestrator.py @@ -30,12 +30,18 @@ from models.v20_ensemble import FullMatchPrediction from models.v25_ensemble import V25Predictor, get_v25_predictor from models.v27_predictor import V27Predictor, compute_divergence, compute_value_edge from features.odds_band_analyzer import OddsBandAnalyzer -from models.basketball_v25 import ( - BasketballMatchPrediction, - get_basketball_v25_predictor, -) +try: + from models.basketball_v25 import ( + BasketballMatchPrediction, + get_basketball_v25_predictor, + ) +except ImportError: + BasketballMatchPrediction = Any + def get_basketball_v25_predictor(): + raise ImportError("Basketball predictor is not available") from core.engines.player_predictor import PlayerPrediction, get_player_predictor from services.feature_enrichment import FeatureEnrichmentService +from services.betting_brain import BettingBrain from services.v26_shadow_engine import V26ShadowEngine, get_v26_shadow_engine from utils.top_leagues import load_top_league_ids from utils.league_reliability import load_league_reliability @@ -69,6 +75,7 @@ class MatchData: substate: Optional[str] = None current_score_home: Optional[int] = None current_score_away: Optional[int] = None + lineup_confidence: float = 0.0 class SingleMatchOrchestrator: @@ -144,7 +151,7 @@ class SingleMatchOrchestrator: self.v26_shadow_engine: Optional[V26ShadowEngine] = None self.basketball_predictor: Optional[Any] = None self.dsn = get_clean_dsn() - self.engine_mode = str(os.getenv("AI_ENGINE_MODE", "v25")).strip().lower() + self.engine_mode = str(os.getenv("AI_ENGINE_MODE", "v28-pro-max")).strip().lower() self.top_league_ids = load_top_league_ids() self.league_reliability = load_league_reliability() self.enrichment = FeatureEnrichmentService() @@ -527,12 +534,18 @@ class SingleMatchOrchestrator: } def _get_squad_features(self, data: MatchData) -> Dict[str, float]: - """Non-fatal squad analysis. Returns zero-defaults on failure.""" + """Non-fatal squad analysis. Returns neutral-average defaults on failure. + + Design note (V32-fix): Previous 0.0 defaults caused the model to treat + missing lineups as 'both teams have zero quality', producing overly + conservative predictions (e.g. static 1.5 Under). Neutral averages let + the model fall back on stronger signals (odds, ELO, form, H2H). + """ defaults = { - 'home_squad_quality': 0.0, 'away_squad_quality': 0.0, 'squad_diff': 0.0, - 'home_key_players': 0.0, 'away_key_players': 0.0, + 'home_squad_quality': 0.50, 'away_squad_quality': 0.50, 'squad_diff': 0.0, + 'home_key_players': 3.0, 'away_key_players': 3.0, 'home_missing_impact': 0.0, 'away_missing_impact': 0.0, - 'home_goals_form': 0.0, 'away_goals_form': 0.0, + 'home_goals_form': 1.3, 'away_goals_form': 1.3, } try: engine = get_player_predictor() @@ -559,27 +572,186 @@ class SingleMatchOrchestrator: print(f"⚠️ Squad features failed: {e}") return defaults + # ── V25 internal key → _build_v25_prediction key mapping ── + _V25_KEY_MAP = { + "ms": "MS", + "ou15": "OU15", + "ou25": "OU25", + "ou35": "OU35", + "btts": "BTTS", + "ht_result": "HT", + "ht_ou05": "HT_OU05", + "ht_ou15": "HT_OU15", + "htft": "HTFT", + "cards_ou45": "CARDS", + "handicap_ms": "HCAP", + "odd_even": "OE", + } + def _get_v25_signal( self, data: MatchData, features: Optional[Dict[str, float]] = None, ) -> Dict[str, Any]: + """ + Get V25 ensemble predictions for all available markets. + Returns a dict keyed by UPPERCASE market name (MS, OU25, BTTS, etc.) + each with a 'probs' sub-dict that _prob_map can consume. + + CRITICAL: Keys MUST be uppercase to match _build_v25_prediction lookups. + """ v25 = self._get_v25_predictor() feature_row = features or self._build_v25_features(data) - return v25.predict_market_bundle( - features=feature_row, - odds=self._sanitize_v25_odds(data.odds_data or {}), - ) + + signal: Dict[str, Any] = {} + + def _temperature_scale(probs_dict: Dict[str, float], temperature: float = 2.5) -> Dict[str, float]: + """ + Apply temperature scaling to soften overconfident model outputs. + + LightGBM often produces extreme probabilities (e.g., 0.999 / 0.001). + Temperature scaling converts to log-odds, divides by T, then re-normalizes. + T=1.0 → no change, T>1 → softer probabilities. + + Standard approach for post-hoc model calibration (Guo et al., 2017). + """ + import math + eps = 1e-7 # numerical stability + n = len(probs_dict) + + # Determine appropriate temperature based on market type + # Binary markets (2-class) tend to be more overconfident in LGB + if n <= 2: + T = max(temperature, 2.0) + elif n == 3: + T = max(temperature * 0.8, 1.5) # 3-way slightly less aggressive + else: + T = max(temperature * 0.6, 1.3) # 9-way (HTFT) already spread + + # Convert to log-odds and apply temperature + labels = list(probs_dict.keys()) + log_odds = [] + for label in labels: + p = max(eps, min(1.0 - eps, float(probs_dict[label]))) + log_odds.append(math.log(p) / T) + + # Softmax re-normalization + max_lo = max(log_odds) + exp_vals = [math.exp(lo - max_lo) for lo in log_odds] + total = sum(exp_vals) + + scaled = {} + for i, label in enumerate(labels): + scaled[label] = exp_vals[i] / total + + return scaled + + def _enrich_signal_entry(probs_dict: Dict[str, float]) -> Dict[str, Any]: + """Add pick, probability, confidence to a signal entry from its probs. + + Applies temperature scaling to convert overconfident LightGBM outputs + into realistic, calibrated probabilities. + """ + # Apply temperature scaling to soften extreme probabilities + scaled_probs = _temperature_scale(probs_dict, temperature=2.5) + + best_label = max(scaled_probs, key=scaled_probs.get) + best_prob = float(scaled_probs[best_label]) + return { + "probs": scaled_probs, + "raw_probs": probs_dict, # keep originals for debugging + "pick": best_label, + "probability": best_prob, + "confidence": round(best_prob * 100.0, 1), + } + + # Core markets using dedicated methods + h, d, a = v25.predict_ms(feature_row) + signal["MS"] = _enrich_signal_entry({"1": h, "X": d, "2": a}) + print(f" [V25-SIGNAL] MS → H={h:.4f} D={d:.4f} A={a:.4f}") + + over25, under25 = v25.predict_ou25(feature_row) + signal["OU25"] = _enrich_signal_entry({"Over": over25, "Under": under25}) + print(f" [V25-SIGNAL] OU25 → O={over25:.4f} U={under25:.4f}") + + btts_y, btts_n = v25.predict_btts(feature_row) + signal["BTTS"] = _enrich_signal_entry({"Yes": btts_y, "No": btts_n}) + print(f" [V25-SIGNAL] BTTS → Y={btts_y:.4f} N={btts_n:.4f}") + + # Additional markets via generic predict_market + for model_key, label_map in [ + ("ou15", {"Over": 0, "Under": None}), + ("ou35", {"Over": 0, "Under": None}), + ("ht_result", {"1": 0, "X": 1, "2": 2}), + ("ht_ou05", {"Over": 0, "Under": None}), + ("ht_ou15", {"Over": 0, "Under": None}), + ("htft", None), + ("cards_ou45", {"Over": 0, "Under": None}), + ("handicap_ms", {"1": 0, "X": 1, "2": 2}), + ("odd_even", {"Odd": 0, "Even": None}), + ]: + out_key = self._V25_KEY_MAP.get(model_key, model_key.upper()) + if not v25.has_market(model_key): + continue + raw = v25.predict_market(model_key, feature_row) + if raw is None: + continue + + if label_map is None: + # HTFT — 9 combinations + htft_labels = ["1/1", "1/X", "1/2", "X/1", "X/X", "X/2", "2/1", "2/X", "2/2"] + probs_dict = {} + for i, label in enumerate(htft_labels): + probs_dict[label] = float(raw[i]) if i < len(raw) else 0.0 + signal[out_key] = _enrich_signal_entry(probs_dict) + elif len(label_map) == 2: + # Binary market + labels = list(label_map.keys()) + p = float(raw[0]) if len(raw) >= 1 else None + if p is None: + print(f" [V25-SIGNAL] {out_key} → EMPTY raw output, skipped") + continue + signal[out_key] = _enrich_signal_entry({labels[0]: p, labels[1]: 1.0 - p}) + elif len(label_map) == 3: + # 3-class market + labels = list(label_map.keys()) + probs_dict = {} + for i, label in enumerate(labels): + if i >= len(raw): + print(f" [V25-SIGNAL] {out_key} → insufficient probabilities in raw output") + break + probs_dict[label] = float(raw[i]) + else: + signal[out_key] = _enrich_signal_entry(probs_dict) + + if out_key in signal: + print(f" [V25-SIGNAL] {out_key} → {signal[out_key]['probs']}") + + print(f" [V25-SIGNAL] Total markets with real predictions: {len(signal)}") + if not signal: + raise RuntimeError("V25 model produced ZERO market predictions — cannot continue") + + return signal @staticmethod def _prob_map(signal: Optional[Dict[str, Any]], market: str, defaults: Dict[str, float]) -> Dict[str, float]: + """Extract normalised probabilities from signal. + + If the signal contains real model output for this market, use it. + If the market is missing from the signal, log a warning and return + the defaults as a LAST RESORT (so the pipeline doesn't crash). + The defaults are ONLY used for non-core / secondary markets that + may not have a trained model yet (e.g. CARDS, HCAP, OE). + """ market_payload = signal.get(market, {}) if isinstance(signal, dict) else {} probs = market_payload.get("probs", {}) if isinstance(market_payload, dict) else {} if not isinstance(probs, dict) or not probs: + print(f" ⚠️ [PROB_MAP] Market '{market}' NOT found in V25 signal — model output missing") return dict(defaults) out = {key: float(probs.get(key, value)) for key, value in defaults.items()} total = sum(out.values()) if total <= 0: + print(f" ⚠️ [PROB_MAP] Market '{market}' has zero total probability") return dict(defaults) return {key: value / total for key, value in out.items()} @@ -730,7 +902,8 @@ class SingleMatchOrchestrator: prediction.cards_confidence, prediction.handicap_confidence, ) - lineup_penalty = 12.0 if data.lineup_source == "none" else 7.0 if data.lineup_source == "probable_xi" else 0.0 + lineup_conf = max(0.0, min(1.0, float(getattr(data, "lineup_confidence", 0.0) or 0.0))) + lineup_penalty = 12.0 if data.lineup_source == "none" else max(1.5, (1.0 - lineup_conf) * 8.0) if data.lineup_source == "probable_xi" else 0.0 referee_penalty = 6.0 if not data.referee_name else 0.0 parity_penalty = 8.0 if abs(ms_edge) < 0.08 else 0.0 prediction.risk_score = round(min(100.0, max(10.0, 100.0 - max_market_conf + lineup_penalty + referee_penalty + parity_penalty)), 1) @@ -747,6 +920,8 @@ class SingleMatchOrchestrator: prediction.risk_warnings = [] if data.lineup_source == "probable_xi": prediction.risk_warnings.append("lineup_probable_not_confirmed") + if lineup_conf < 0.65: + prediction.risk_warnings.append("lineup_projection_low_confidence") if data.lineup_source == "none": prediction.risk_warnings.append("lineup_unavailable") if not data.referee_name: @@ -1142,7 +1317,9 @@ class SingleMatchOrchestrator: if band_val.get("is_value"): boost = min(8.0, boost + 3.0) # Triple confirmation extra boost prediction.ms_confidence = min(95.0, prediction.ms_confidence + boost) - base_package["prediction"]["ms_confidence"] = prediction.ms_confidence + market_board = base_package.get("market_board") + if isinstance(market_board, dict) and isinstance(market_board.get("MS"), dict): + market_board["MS"]["confidence"] = round(float(prediction.ms_confidence), 1) base_package["v27_engine"]["consensus"] = "AGREE" else: base_package["v27_engine"]["consensus"] = "DISAGREE" @@ -1157,8 +1334,10 @@ class SingleMatchOrchestrator: base_package.setdefault("analysis_details", {}) base_package["analysis_details"]["v27_loaded"] = False - mode = str(getattr(self, "engine_mode", "v25") or "v25").lower() - if mode not in {"v25", "v26", "dual"}: + base_package = self._apply_upper_brain_guards(base_package) + + mode = str(getattr(self, "engine_mode", "v28-pro-max") or "v28-pro-max").lower() + if mode not in {"v25", "v26", "dual", "v28", "v28-pro-max"}: mode = "v25" quality = base_package.get("data_quality", self._compute_data_quality(data)) @@ -1185,6 +1364,304 @@ class SingleMatchOrchestrator: return merged return base_package + def _apply_upper_brain_guards(self, package: Dict[str, Any]) -> Dict[str, Any]: + return BettingBrain().judge(package) + + v27_engine = package.get("v27_engine") + if not isinstance(v27_engine, dict) or not v27_engine.get("triple_value"): + return package + + guarded = dict(package) + vetoed_keys = set() + guarded_keys = set() + + def mark_guard(item: Dict[str, Any]) -> Dict[str, Any]: + if not isinstance(item, dict): + return item + + out = dict(item) + assessment = self._upper_brain_assessment(out, guarded) + if not assessment.get("applies"): + return out + + key = f"{out.get('market')}:{out.get('pick')}" + guarded_keys.add(key) + out["upper_brain"] = assessment + + reason_key = "decision_reasons" if "decision_reasons" in out else "reasons" + reasons = list(out.get(reason_key) or []) + for reason in assessment.get("reason_codes", []): + if reason not in reasons: + reasons.append(reason) + out[reason_key] = reasons[:6] + + if assessment.get("veto"): + vetoed_keys.add(key) + out["playable"] = False + out["stake_units"] = 0.0 + out["bet_grade"] = "PASS" + out["is_guaranteed"] = False + out["pick_reason"] = "upper_brain_veto" + if "signal_tier" in out: + out["signal_tier"] = "PASS" + elif assessment.get("downgrade"): + out["is_guaranteed"] = False + if out.get("signal_tier") == "CORE": + out["signal_tier"] = "LEAN" + if out.get("pick_reason") == "high_accuracy_market": + out["pick_reason"] = "upper_brain_downgraded" + + return out + + main_pick = mark_guard(guarded.get("main_pick") or {}) + value_pick = mark_guard(guarded.get("value_pick") or {}) if guarded.get("value_pick") else None + supporting = [ + mark_guard(row) + for row in list(guarded.get("supporting_picks") or []) + if isinstance(row, dict) + ] + bet_summary = [ + mark_guard(row) + for row in list(guarded.get("bet_summary") or []) + if isinstance(row, dict) + ] + + main_safe = bool(main_pick and main_pick.get("playable") and not main_pick.get("upper_brain", {}).get("veto")) + if not main_safe: + candidates = [ + row for row in supporting + if row.get("playable") + and not row.get("upper_brain", {}).get("veto") + and float(row.get("odds", 0.0) or 0.0) >= 1.30 + ] + candidates.sort(key=lambda row: float(row.get("play_score", 0.0) or 0.0), reverse=True) + if candidates: + main_pick = dict(candidates[0]) + main_pick["is_guaranteed"] = False + main_pick["pick_reason"] = "upper_brain_reselected" + reasons = list(main_pick.get("decision_reasons") or []) + if "upper_brain_reselected_after_veto" not in reasons: + reasons.append("upper_brain_reselected_after_veto") + main_pick["decision_reasons"] = reasons[:6] + elif main_pick: + main_pick["is_guaranteed"] = False + main_pick["pick_reason"] = "upper_brain_no_safe_pick" + + if main_pick: + supporting = [ + row for row in supporting + if not ( + row.get("market") == main_pick.get("market") + and row.get("pick") == main_pick.get("pick") + ) + ][:6] + + guarded["main_pick"] = main_pick if main_pick else None + guarded["value_pick"] = value_pick + guarded["supporting_picks"] = supporting + guarded["bet_summary"] = bet_summary + + playable = bool(main_pick and main_pick.get("playable") and not main_pick.get("upper_brain", {}).get("veto")) + advice = dict(guarded.get("bet_advice") or {}) + advice["playable"] = playable + advice["suggested_stake_units"] = float(main_pick.get("stake_units", 0.0)) if playable else 0.0 + if playable: + advice["reason"] = "playable_pick_found" + elif vetoed_keys: + advice["reason"] = "upper_brain_no_safe_pick" + else: + advice["reason"] = "no_bet_conditions_met" + guarded["bet_advice"] = advice + + guarded["upper_brain"] = { + "applied": True, + "guarded_count": len(guarded_keys), + "vetoed_count": len(vetoed_keys), + "vetoed": sorted(vetoed_keys)[:8], + "rules": { + "min_band_sample": 8, + "max_v25_v27_divergence": 0.18, + "dc_requires_triple_value": True, + }, + } + guarded.setdefault("analysis_details", {}) + guarded["analysis_details"]["upper_brain_guards_applied"] = True + guarded["analysis_details"]["upper_brain_vetoed_count"] = len(vetoed_keys) + return guarded + + def _upper_brain_assessment( + self, + item: Dict[str, Any], + package: Dict[str, Any], + ) -> Dict[str, Any]: + market = str(item.get("market") or "") + pick = str(item.get("pick") or "") + if not market or not pick: + return {"applies": False} + + v27_engine = package.get("v27_engine") or {} + triple_value = v27_engine.get("triple_value") or {} + model_prob = self._upper_brain_market_probability(item, package) + v27_prob = self._upper_brain_v27_probability(market, pick, v27_engine) + triple_key = self._upper_brain_triple_key(market, pick) + triple = triple_value.get(triple_key) if triple_key else None + + veto = False + downgrade = False + reasons: List[str] = [] + divergence = None + + if model_prob is not None and v27_prob is not None: + divergence = abs(float(model_prob) - float(v27_prob)) + if divergence >= 0.18: + veto = True + reasons.append("upper_brain_v25_v27_divergence") + elif divergence >= 0.12: + downgrade = True + reasons.append("upper_brain_v25_v27_warning") + + if isinstance(triple, dict): + band_sample = int(float(triple.get("band_sample", 0) or 0)) + is_value = bool(triple.get("is_value")) + if market == "DC": + if band_sample < 8: + veto = True + reasons.append("upper_brain_band_sample_too_low") + elif not is_value: + veto = True + reasons.append("upper_brain_triple_value_rejected") + elif market in {"MS", "OU25"} and band_sample > 0 and band_sample < 8: + downgrade = True + reasons.append("upper_brain_band_sample_thin") + elif market in {"OU15", "HT_OU05"} and band_sample < 8: + downgrade = True + reasons.append("upper_brain_band_sample_thin") + + consensus = str(v27_engine.get("consensus") or "").upper() + if consensus == "DISAGREE" and market in {"MS", "DC"} and not veto: + downgrade = True + reasons.append("upper_brain_consensus_disagree") + + applies = bool(reasons or triple is not None or v27_prob is not None) + return { + "applies": applies, + "veto": veto, + "downgrade": downgrade, + "reason_codes": reasons, + "model_prob": round(float(model_prob), 4) if model_prob is not None else None, + "v27_prob": round(float(v27_prob), 4) if v27_prob is not None else None, + "divergence": round(float(divergence), 4) if divergence is not None else None, + "triple_key": triple_key, + "triple_value": triple, + } + + def _upper_brain_market_probability( + self, + item: Dict[str, Any], + package: Dict[str, Any], + ) -> Optional[float]: + raw_prob = item.get("probability") + if raw_prob is not None: + try: + return float(raw_prob) + except (TypeError, ValueError): + pass + + market = str(item.get("market") or "") + pick = str(item.get("pick") or "") + board = package.get("market_board") or {} + payload = board.get(market) if isinstance(board, dict) else None + probs = payload.get("probs") if isinstance(payload, dict) else None + if not isinstance(probs, dict): + return None + + prob_key = self._upper_brain_prob_key(market, pick) + if prob_key is None: + return None + try: + return float(probs.get(prob_key)) + except (TypeError, ValueError): + return None + + def _upper_brain_v27_probability( + self, + market: str, + pick: str, + v27_engine: Dict[str, Any], + ) -> Optional[float]: + predictions = v27_engine.get("predictions") or {} + ms = predictions.get("ms") or {} + ou25 = predictions.get("ou25") or {} + + if market == "MS": + return self._safe_float(ms.get({"1": "home", "X": "draw", "2": "away"}.get(pick, ""))) + if market == "DC": + if pick == "1X": + return self._safe_float(ms.get("home"), 0.0) + self._safe_float(ms.get("draw"), 0.0) + if pick == "X2": + return self._safe_float(ms.get("draw"), 0.0) + self._safe_float(ms.get("away"), 0.0) + if pick == "12": + return self._safe_float(ms.get("home"), 0.0) + self._safe_float(ms.get("away"), 0.0) + if market == "OU25": + prob_key = self._upper_brain_prob_key(market, pick) + return self._safe_float(ou25.get(prob_key)) if prob_key else None + return None + + @staticmethod + def _upper_brain_prob_key(market: str, pick: str) -> Optional[str]: + pick_norm = str(pick or "").strip().casefold() + if market in {"MS", "HT", "HCAP"}: + return pick if pick in {"1", "X", "2"} else None + if market == "DC": + return pick.upper() if pick.upper() in {"1X", "X2", "12"} else None + if market in {"OU15", "OU25", "OU35", "HT_OU05", "HT_OU15", "CARDS"}: + if "over" in pick_norm or "st" in pick_norm: + return "over" + if "under" in pick_norm or "alt" in pick_norm: + return "under" + if market == "BTTS": + if "yes" in pick_norm or "var" in pick_norm: + return "yes" + if "no" in pick_norm or "yok" in pick_norm: + return "no" + if market == "OE": + if "odd" in pick_norm or "tek" in pick_norm: + return "odd" + if "even" in pick_norm or "ift" in pick_norm: + return "even" + if market == "HTFT" and "/" in pick: + return pick + return None + + def _upper_brain_triple_key(self, market: str, pick: str) -> Optional[str]: + prob_key = self._upper_brain_prob_key(market, pick) + if market == "MS": + return {"1": "home", "2": "away"}.get(pick) + if market == "DC": + return f"dc_{pick.lower()}" if pick.upper() in {"1X", "X2", "12"} else None + if market in {"OU15", "OU25", "OU35"} and prob_key == "over": + return f"{market.lower()}_over" + if market == "BTTS" and prob_key == "yes": + return "btts_yes" + if market == "HT": + return {"1": "ht_home", "2": "ht_away"}.get(pick) + if market in {"HT_OU05", "HT_OU15"} and prob_key == "over": + return f"{market.lower()}_over" + if market == "OE" and prob_key == "odd": + return "oe_odd" + if market == "CARDS" and prob_key == "over": + return "cards_over" + if market == "HTFT" and "/" in pick: + return f"htft_{pick.replace('/', '').lower()}" + return None + + @staticmethod + def _safe_float(value: Any, default: Optional[float] = None) -> Optional[float]: + try: + return float(value) + except (TypeError, ValueError): + return default + def analyze_match_htms(self, match_id: str) -> Optional[Dict[str, Any]]: """ HT/MS focused response for upset-hunting workflows. @@ -2104,7 +2581,7 @@ class SingleMatchOrchestrator: return None odds_data = self._extract_odds(cur, row) - home_lineup, away_lineup, lineup_source = self._extract_lineups(cur, row) + home_lineup, away_lineup, lineup_source, lineup_confidence = self._extract_lineups(cur, row) sidelined = self._parse_json_dict(row.get("sidelined")) match_date_ms = int(row.get("match_date_ms") or 0) league_id = str(row.get("league_id")) if row.get("league_id") else None @@ -2159,6 +2636,7 @@ class SingleMatchOrchestrator: status=str(row.get("status") or ""), state=row.get("state"), substate=row.get("substate"), + lineup_confidence=lineup_confidence, current_score_home=( int(row.get("score_home")) if row.get("score_home") is not None @@ -2291,48 +2769,78 @@ class SingleMatchOrchestrator: self, cur: RealDictCursor, row: Dict[str, Any], - ) -> Tuple[Optional[List[str]], Optional[List[str]], str]: + ) -> Tuple[Optional[List[str]], Optional[List[str]], str, float]: live_lineups = row.get("lineups") - home, away = self._parse_lineups_json(live_lineups) - if (home and len(home) >= 9) and (away and len(away) >= 9): - return home, away, "confirmed_live" - - # fallback 1: current match participation table - cur.execute( - """ - SELECT team_id, player_id - FROM match_player_participation - WHERE match_id = %s - AND is_starting = true - """, - (row["match_id"],), + status_upper = str(row.get("status") or "").upper() + state_upper = str(row.get("state") or "").upper() + substate_upper = str(row.get("substate") or "").upper() + can_trust_feed_lineups = ( + status_upper in {"LIVE", "1H", "2H", "HT", "FT", "FINISHED"} + or state_upper in {"LIVE", "FIRSTHALF", "SECONDHALF", "POSTGAME", "POST_GAME"} + or substate_upper in {"LIVE", "FIRSTHALF", "SECONDHALF"} ) + home, away = self._parse_lineups_json(live_lineups) if can_trust_feed_lineups else (None, None) + if (home and len(home) >= 9) and (away and len(away) >= 9): + return home, away, "confirmed_live", 1.0 + home_id = str(row["home_team_id"]) away_id = str(row["away_team_id"]) - rows = cur.fetchall() - if rows: - home_players = [str(r["player_id"]) for r in rows if str(r["team_id"]) == home_id] - away_players = [str(r["player_id"]) for r in rows if str(r["team_id"]) == away_id] - if not home and home_players: - home = home_players - if not away and away_players: - away = away_players - if (home and len(home) >= 9) and (away and len(away) >= 9): - return home, away, "confirmed_participation" + + # fallback 1: current match participation table. + # Trust this only for live/finished matches; pre-match rows can be stale feed snapshots. + if can_trust_feed_lineups: + cur.execute( + """ + SELECT team_id, player_id + FROM match_player_participation + WHERE match_id = %s + AND is_starting = true + """, + (row["match_id"],), + ) + rows = cur.fetchall() + if rows: + home_players = [str(r["player_id"]) for r in rows if str(r["team_id"]) == home_id] + away_players = [str(r["player_id"]) for r in rows if str(r["team_id"]) == away_id] + if not home and home_players: + home = home_players + if not away and away_players: + away = away_players + if (home and len(home) >= 9) and (away and len(away) >= 9): + return home, away, "confirmed_participation", 0.98 # fallback 2: probable XI from historical starts before match date before_date_ms = int(row.get("match_date_ms") or 0) + sidelined = self._parse_json_dict(row.get("sidelined")) or {} + home_excluded = self._sidelined_player_ids(sidelined.get("homeTeam")) + away_excluded = self._sidelined_player_ids(sidelined.get("awayTeam")) used_probable = False - if not home: - home = self._build_probable_xi(cur, home_id, before_date_ms) + home_conf = 0.0 + away_conf = 0.0 + if not home or len(home) < 9: + home, home_conf = self._build_probable_xi( + cur, + home_id, + before_date_ms, + excluded_player_ids=home_excluded, + ) used_probable = used_probable or bool(home) - if not away: - away = self._build_probable_xi(cur, away_id, before_date_ms) + if not away or len(away) < 9: + away, away_conf = self._build_probable_xi( + cur, + away_id, + before_date_ms, + excluded_player_ids=away_excluded, + ) used_probable = used_probable or bool(away) if used_probable: - return home, away, "probable_xi" - return home, away, "none" + inferred_conf = min( + home_conf if home else 0.0, + away_conf if away else 0.0, + ) + return home, away, "probable_xi", inferred_conf + return home, away, "none", 0.0 def _calculate_team_form( self, @@ -2445,35 +2953,172 @@ class SingleMatchOrchestrator: cur: RealDictCursor, team_id: str, before_date_ms: int, - max_days: int = 30, - ) -> Optional[List[str]]: + match_limit: int = 5, + lookback_days: int = 370, + max_staleness_days: int = 120, + excluded_player_ids: Optional[Set[str]] = None, + ) -> Tuple[Optional[List[str]], float]: if not team_id: - return None + return None, 0.0 + min_date_ms = max(0, before_date_ms - (lookback_days * 24 * 60 * 60 * 1000)) - min_date_ms = max(0, before_date_ms - (max_days * 24 * 60 * 60 * 1000)) cur.execute( """ SELECT mpp.player_id, - COUNT(*) AS starts, - MAX(m.mst_utc) AS last_start_ms + m.id AS match_id, + m.mst_utc, + m.home_team_id, + m.away_team_id FROM match_player_participation mpp JOIN matches m ON m.id = mpp.match_id WHERE mpp.team_id = %s AND mpp.is_starting = true - AND m.status = 'FT' - AND m.mst_utc < %s - AND m.mst_utc >= %s - GROUP BY mpp.player_id - ORDER BY starts DESC, last_start_ms DESC - LIMIT 11 + AND NOT EXISTS ( + SELECT 1 + FROM match_player_participation later_mpp + JOIN matches later_m ON later_m.id = later_mpp.match_id + WHERE later_mpp.player_id = mpp.player_id + AND later_mpp.team_id <> %s + AND later_m.mst_utc > m.mst_utc + AND later_m.mst_utc < %s + AND ( + later_m.status = 'FT' + OR later_m.state = 'postGame' + OR (later_m.score_home IS NOT NULL AND later_m.score_away IS NOT NULL) + ) + ) + AND m.id IN ( + SELECT m2.id + FROM matches m2 + JOIN match_player_participation recent_mpp + ON recent_mpp.match_id = m2.id + AND recent_mpp.team_id = %s + AND recent_mpp.is_starting = true + WHERE (m2.home_team_id = %s OR m2.away_team_id = %s) + AND ( + m2.status = 'FT' + OR m2.state = 'postGame' + OR (m2.score_home IS NOT NULL AND m2.score_away IS NOT NULL) + ) + AND m2.mst_utc < %s + AND m2.mst_utc >= %s + GROUP BY m2.id + HAVING COUNT(recent_mpp.*) >= 9 + ORDER BY MAX(m2.mst_utc) DESC + LIMIT %s + ) + ORDER BY m.mst_utc DESC """, - (team_id, before_date_ms, min_date_ms), + ( + team_id, + team_id, + before_date_ms, + team_id, + team_id, + team_id, + before_date_ms, + min_date_ms, + match_limit, + ), ) rows = cur.fetchall() if not rows: - return None - return [str(r["player_id"]) for r in rows] + return None, 0.0 + + latest_mst = max(int(row.get("mst_utc") or 0) for row in rows) + age_days = (before_date_ms - latest_mst) / (24 * 60 * 60 * 1000) + stale_projection = age_days > max_staleness_days + + excluded = {str(pid) for pid in (excluded_player_ids or set()) if pid} + match_order: Dict[str, int] = {} + for row in rows: + match_id = str(row["match_id"]) + if match_id not in match_order: + match_order[match_id] = len(match_order) + + player_scores: Dict[str, Dict[str, float]] = {} + for row in rows: + player_id = str(row["player_id"]) + if player_id in excluded: + continue + + idx = match_order.get(str(row["match_id"]), match_limit) + recency_weight = max(1.0, float(match_limit - idx)) + score = recency_weight + if idx == 0: + score += 3.0 + elif idx == 1: + score += 1.5 + + stats = player_scores.setdefault( + player_id, + { + "score": 0.0, + "starts": 0.0, + "last_seen_rank": float(idx), + }, + ) + stats["score"] += score + stats["starts"] += 1.0 + stats["last_seen_rank"] = min(stats["last_seen_rank"], float(idx)) + + if not player_scores: + return None, 0.0 + + ranked = sorted( + player_scores.items(), + key=lambda item: ( + item[1]["score"], + item[1]["starts"], + -item[1]["last_seen_rank"], + ), + reverse=True, + ) + lineup = [player_id for player_id, _ in ranked[:11]] + + coverage = min(1.0, len(lineup) / 11.0) + available_matches = max(1, len(match_order)) + history_score = min(1.0, available_matches / float(match_limit)) + core_stability = 0.0 + if ranked: + stable_core = sum(1 for _, stats in ranked[:11] if stats["starts"] >= 2.0) + core_stability = stable_core / 11.0 + + staleness_factor = max( + 0.35, + min(1.0, float(max_staleness_days) / max(age_days, 1.0)), + ) + confidence = ( + (coverage * 0.45) + (history_score * 0.25) + (core_stability * 0.30) + ) * staleness_factor + if excluded: + confidence *= 0.92 + + confidence_cap = 0.58 if stale_projection else 0.88 + return lineup or None, round(max(0.0, min(confidence_cap, confidence)), 3) + + @staticmethod + def _sidelined_player_ids(team_data: Any) -> Set[str]: + if not isinstance(team_data, dict): + return set() + players = team_data.get("players") + if not isinstance(players, list): + return set() + + ids: Set[str] = set() + for player in players: + if not isinstance(player, dict): + continue + player_id = ( + player.get("playerId") + or player.get("player_id") + or player.get("id") + or player.get("personId") + ) + if player_id: + ids.add(str(player_id)) + return ids def _parse_odds_json(self, odds_json: Any) -> Dict[str, float]: odds_json = self._parse_json_dict(odds_json) @@ -4267,7 +4912,8 @@ class SingleMatchOrchestrator: lineup_sensitive = market in ("MS", "BTTS", "HT", "HTFT") lineup_penalty = 5.0 if lineup_missing and lineup_sensitive else 0.0 if data.lineup_source == "probable_xi" and lineup_sensitive: - lineup_penalty += 4.0 + lineup_conf = max(0.0, min(1.0, float(getattr(data, "lineup_confidence", 0.0) or 0.0))) + lineup_penalty += max(1.0, (1.0 - lineup_conf) * 5.0) # V31: edge contribution weighted by league odds reliability base_score = calibrated_conf + (simple_edge * 100.0 * edge_multiplier) @@ -4438,8 +5084,11 @@ class SingleMatchOrchestrator: away_n = len(data.away_lineup or []) lineup_score = min(home_n, away_n) / 11.0 if min(home_n, away_n) > 0 else 0.0 if data.lineup_source == "probable_xi": - lineup_score *= 0.55 + lineup_conf = max(0.0, min(1.0, float(getattr(data, "lineup_confidence", 0.0) or 0.0))) + lineup_score *= max(0.45, min(0.88, lineup_conf)) flags.append("lineup_probable_not_confirmed") + if lineup_conf < 0.65: + flags.append("lineup_projection_low_confidence") elif data.lineup_source == "none": flags.append("lineup_unavailable") if lineup_score < 0.7: @@ -4464,6 +5113,7 @@ class SingleMatchOrchestrator: "home_lineup_count": home_n, "away_lineup_count": away_n, "lineup_source": data.lineup_source, + "lineup_confidence": round(float(getattr(data, "lineup_confidence", 0.0) or 0.0), 3), "flags": flags, } diff --git a/package.json b/package.json index 951e062..2eb682e 100755 --- a/package.json +++ b/package.json @@ -22,7 +22,7 @@ "ai:backtest": "python ai-engine/scripts/backtest_v2_runtime.py", "ai:train:vqwen": "python ai-engine/scripts/train_vqwen_v3.py", "feeder:historical": "ts-node -r tsconfig-paths/register src/scripts/run-feeder.ts", - "feeder:previous-day": "ts-node -r tsconfig-paths/register src/scripts/run-feeder.ts", + "feeder:previous-day": "ts-node -r tsconfig-paths/register src/scripts/run-feeder-previous-day.ts", "feeder:fill-gaps": "ts-node -r tsconfig-paths/register src/scripts/run-feeder-filtered.ts", "feeder:basketball": "ts-node -r tsconfig-paths/register src/scripts/run-feeder-basketball.ts", "feeder:live": "ts-node -r tsconfig-paths/register src/scripts/run-live-feeder.ts", diff --git a/src/modules/feeder/feeder-persistence.service.ts b/src/modules/feeder/feeder-persistence.service.ts index f35f7cb..cd96cd8 100755 --- a/src/modules/feeder/feeder-persistence.service.ts +++ b/src/modules/feeder/feeder-persistence.service.ts @@ -856,19 +856,46 @@ export class FeederPersistenceService { const matches = await this.prisma.match.findMany({ where: { id: { in: matchIds }, - AND: [ - { oddCategories: { some: {} } }, + oddCategories: { some: {} }, + OR: [ { - OR: [ - { footballTeamStats: { some: {} } }, - { basketballTeamStats: { some: {} } }, - ], + sport: "football", + footballTeamStats: { some: {} }, + playerParticipations: { some: { isStarting: true } }, + }, + { + sport: "basketball", + basketballTeamStats: { some: {} }, + basketballPlayerStats: { some: {} }, }, ], }, - select: { id: true }, + select: { id: true, sport: true }, }); - return matches.map((m) => m.id); + + const footballIds = matches + .filter((m) => m.sport === "football") + .map((m) => m.id); + const completeFootballIds = new Set(); + + if (footballIds.length > 0) { + const starterCounts = await this.prisma.matchPlayerParticipation.groupBy({ + by: ["matchId"], + where: { + matchId: { in: footballIds }, + isStarting: true, + }, + _count: { _all: true }, + }); + + for (const row of starterCounts) { + if (row._count._all >= 18) completeFootballIds.add(row.matchId); + } + } + + return matches + .filter((m) => m.sport !== "football" || completeFootballIds.has(m.id)) + .map((m) => m.id); } async hasOdds(matchId: string): Promise { diff --git a/src/modules/feeder/feeder.service.ts b/src/modules/feeder/feeder.service.ts index d861074..f804f4d 100755 --- a/src/modules/feeder/feeder.service.ts +++ b/src/modules/feeder/feeder.service.ts @@ -168,7 +168,7 @@ export class FeederService { // writing to live_matches. Historical scan should only fill matches table. endDate.setDate(endDate.getDate() - 2); - const stateKey = `historical_scan_state_${sports.join("_")}${targetLeagueIds.length > 0 ? "_filtered" : ""}_desc`; + const stateKey = `historical_full_data_v2_state_${sports.join("_")}${targetLeagueIds.length > 0 ? "_filtered" : ""}_desc`; let currentDate: Date | null = null; // Resume from saved state @@ -753,10 +753,7 @@ export class FeederService { } // Starting Formation & Substitutes (Always for lineups or all) - // V20 OPTIMIZATION: Disabled to speed up feeder and reduce 502 errors. - // We only use Team Stats for V20 model. - /* - if (scope === 'all' || scope === 'lineups') { + if (scope === "all" || scope === "lineups") { // Starting Formation try { const formationData = @@ -780,7 +777,7 @@ export class FeederService { ); } } catch (e: any) { - if (e.message?.includes('502')) hasCriticalError = true; + if (e.message?.includes("502")) hasCriticalError = true; this.logger.warn(`[${matchId}] Formation failed: ${e.message}`); } @@ -807,11 +804,10 @@ export class FeederService { ); } } catch (e: any) { - if (e.message?.includes('502')) hasCriticalError = true; + if (e.message?.includes("502")) hasCriticalError = true; this.logger.warn(`[${matchId}] Subs failed: ${e.message}`); } } - */ // Game Stats & Officials if (scope === "all") { @@ -935,6 +931,8 @@ export class FeederService { const missingParts: string[] = []; if (scope === "all" && completedMatch) { if (sport === "football" && !stats) missingParts.push("Stats"); + if (sport === "football" && participationData.length < 18) + missingParts.push("Lineups"); if (sport === "basketball" && !basketballTeamStats) missingParts.push("BoxScore"); if (oddsArray.length === 0) missingParts.push("Odds"); diff --git a/src/modules/matches/matches.service.ts b/src/modules/matches/matches.service.ts index f7be42f..8e4b790 100755 --- a/src/modules/matches/matches.service.ts +++ b/src/modules/matches/matches.service.ts @@ -588,6 +588,10 @@ export class MatchesService { teamStats: [], playerParticipations: (() => { const parsed: Array<{ teamId: string; isStarting: boolean; shirtNumber: string | number | null; position: string | null; player: { id: string; name: string } }> = []; + const canTrustFeedLineups = displayStatus === "LIVE" || displayStatus === "Finished"; + if (!canTrustFeedLineups) { + return parsed; + } if (liveMatch.lineups && typeof liveMatch.lineups === 'object') { const lu = liveMatch.lineups as Record; const addPlayers = (teamLu: any, teamId: string | null) => { @@ -630,6 +634,64 @@ export class MatchesService { if (!match) return null; + const detailDisplayStatus = getDisplayMatchStatus({ + state: match.state, + status: match.status, + substate: match.substate, + scoreHome: match.scoreHome, + scoreAway: match.scoreAway, + }); + const canTrustStoredLineups = this.canTrustStoredLineups(detailDisplayStatus); + + if (Array.isArray(match.playerParticipations)) { + if (!canTrustStoredLineups) { + match.playerParticipations = []; + } + + const hasHomeLineup = match.playerParticipations.some( + (p: any) => p.teamId === match.homeTeamId && p.isStarting, + ); + const hasAwayLineup = match.playerParticipations.some( + (p: any) => p.teamId === match.awayTeamId && p.isStarting, + ); + + if (!hasHomeLineup || !hasAwayLineup) { + const sidelined = + match.sidelined && typeof match.sidelined === "object" + ? (match.sidelined as Record) + : {}; + const matchDateMs = Number(match.mstUtc || Date.now()); + const probableLineups: any[] = []; + + if (!hasHomeLineup && match.homeTeamId) { + probableLineups.push( + ...(await this.buildProbableLineupForTeam({ + teamId: match.homeTeamId, + beforeDateMs: matchDateMs, + sidelinedTeamData: sidelined.homeTeam, + })), + ); + } + + if (!hasAwayLineup && match.awayTeamId) { + probableLineups.push( + ...(await this.buildProbableLineupForTeam({ + teamId: match.awayTeamId, + beforeDateMs: matchDateMs, + sidelinedTeamData: sidelined.awayTeam, + })), + ); + } + + if (probableLineups.length > 0) { + match.playerParticipations = canTrustStoredLineups + ? [...match.playerParticipations, ...probableLineups] + : probableLineups; + match.lineupSource = "probable_xi"; + } + } + } + // Structure odds const odds: Record< string, @@ -732,4 +794,211 @@ export class MatchesService { return team?.id || null; } + + private async buildProbableLineupForTeam(params: { + teamId: string; + beforeDateMs: number; + sidelinedTeamData?: any; + matchLimit?: number; + lookbackDays?: number; + maxStalenessDays?: number; + }) { + const matchLimit = params.matchLimit ?? 5; + const lookbackDays = params.lookbackDays ?? 370; + const maxStalenessDays = params.maxStalenessDays ?? 120; + const beforeDateMs = params.beforeDateMs || Date.now(); + const minDateMs = Math.max( + 0, + beforeDateMs - lookbackDays * 24 * 60 * 60 * 1000, + ); + const excluded = this.extractSidelinedPlayerIds(params.sidelinedTeamData); + + const rows = await this.prisma.$queryRaw` + SELECT + mpp.player_id AS "playerId", + p.name AS "playerName", + mpp.position AS "position", + mpp.shirt_number AS "shirtNumber", + m.id AS "matchId", + m.mst_utc AS "mstUtc" + FROM match_player_participation mpp + JOIN matches m ON m.id = mpp.match_id + JOIN players p ON p.id = mpp.player_id + WHERE mpp.team_id = ${params.teamId} + AND mpp.is_starting = true + AND NOT EXISTS ( + SELECT 1 + FROM match_player_participation later_mpp + JOIN matches later_m ON later_m.id = later_mpp.match_id + WHERE later_mpp.player_id = mpp.player_id + AND later_mpp.team_id <> ${params.teamId} + AND later_m.mst_utc > m.mst_utc + AND later_m.mst_utc < ${BigInt(beforeDateMs)} + AND ( + later_m.status = 'FT' + OR later_m.state = 'postGame' + OR (later_m.score_home IS NOT NULL AND later_m.score_away IS NOT NULL) + ) + ) + AND m.id IN ( + SELECT m2.id + FROM matches m2 + JOIN match_player_participation recent_mpp + ON recent_mpp.match_id = m2.id + AND recent_mpp.team_id = ${params.teamId} + AND recent_mpp.is_starting = true + WHERE (m2.home_team_id = ${params.teamId} OR m2.away_team_id = ${params.teamId}) + AND ( + m2.status = 'FT' + OR m2.state = 'postGame' + OR (m2.score_home IS NOT NULL AND m2.score_away IS NOT NULL) + ) + AND m2.mst_utc < ${BigInt(beforeDateMs)} + AND m2.mst_utc >= ${BigInt(minDateMs)} + GROUP BY m2.id + HAVING COUNT(recent_mpp.*) >= 9 + ORDER BY MAX(m2.mst_utc) DESC + LIMIT ${matchLimit} + ) + ORDER BY m.mst_utc DESC + `; + + if (!rows.length) return []; + + const latestMst = Math.max( + ...rows.map((row) => Number(row.mstUtc || 0)), + ); + const ageDays = + latestMst > 0 + ? (beforeDateMs - latestMst) / (24 * 60 * 60 * 1000) + : Number.POSITIVE_INFINITY; + const staleProjection = ageDays > maxStalenessDays; + + const matchOrder = new Map(); + for (const row of rows) { + const matchId = String(row.matchId); + if (!matchOrder.has(matchId)) { + matchOrder.set(matchId, matchOrder.size); + } + } + + const playerMap = new Map< + string, + { + playerId: string; + playerName: string; + position: string | null; + shirtNumber: number | null; + score: number; + starts: number; + lastSeenRank: number; + } + >(); + + for (const row of rows) { + const playerId = String(row.playerId); + if (excluded.has(playerId)) continue; + + const rank = matchOrder.get(String(row.matchId)) ?? matchLimit; + const recencyWeight = Math.max(1, matchLimit - rank); + const score = + recencyWeight + (rank === 0 ? 3 : rank === 1 ? 1.5 : 0); + const existing = playerMap.get(playerId); + + if (!existing) { + playerMap.set(playerId, { + playerId, + playerName: row.playerName || "Bilinmiyor", + position: row.position ?? null, + shirtNumber: + row.shirtNumber === null || row.shirtNumber === undefined + ? null + : Number(row.shirtNumber), + score, + starts: 1, + lastSeenRank: rank, + }); + } else { + existing.score += score; + existing.starts += 1; + existing.lastSeenRank = Math.min(existing.lastSeenRank, rank); + existing.position = existing.position || row.position || null; + existing.shirtNumber = + existing.shirtNumber ?? + (row.shirtNumber === null || row.shirtNumber === undefined + ? null + : Number(row.shirtNumber)); + } + } + + const ranked = [...playerMap.values()] + .sort((a, b) => { + if (b.score !== a.score) return b.score - a.score; + if (b.starts !== a.starts) return b.starts - a.starts; + return a.lastSeenRank - b.lastSeenRank; + }) + .slice(0, 11); + + const coverage = Math.min(1, ranked.length / 11); + const historyScore = Math.min(1, matchOrder.size / matchLimit); + const stableCore = ranked.filter((p) => p.starts >= 2).length / 11; + const stalenessFactor = Math.max( + 0.35, + Math.min(1, maxStalenessDays / Math.max(ageDays, 1)), + ); + const confidence = Math.max( + 0, + Math.min( + staleProjection ? 0.58 : 0.88, + (coverage * 0.45 + historyScore * 0.25 + stableCore * 0.3) * + stalenessFactor, + ), + ); + + return ranked.map((p) => ({ + teamId: params.teamId, + isStarting: true, + shirtNumber: p.shirtNumber, + position: p.position, + isProbable: true, + lineupSource: "probable_xi", + projectionConfidence: Number(confidence.toFixed(3)), + projectionAgeDays: Number(ageDays.toFixed(1)), + projectionStale: staleProjection, + projectionMatchLimit: matchLimit, + projectionLookbackDays: lookbackDays, + projectionMaxStalenessDays: maxStalenessDays, + player: { + id: p.playerId, + name: p.playerName, + }, + })); + } + + private extractSidelinedPlayerIds(teamData: any): Set { + if (!teamData || typeof teamData !== "object") return new Set(); + const players = Array.isArray(teamData.players) ? teamData.players : []; + return new Set( + players + .map((player: any) => + String( + player?.playerId ?? + player?.player_id ?? + player?.id ?? + player?.personId ?? + "", + ), + ) + .filter(Boolean), + ); + } + + private canTrustStoredLineups(displayStatus?: string): boolean { + const normalized = String(displayStatus || "").toLowerCase(); + return ( + normalized === "live" || + normalized === "finished" || + normalized === "ft" + ); + } } diff --git a/src/modules/predictions/predictions.controller.ts b/src/modules/predictions/predictions.controller.ts index ca7087d..8cc80d8 100755 --- a/src/modules/predictions/predictions.controller.ts +++ b/src/modules/predictions/predictions.controller.ts @@ -96,11 +96,10 @@ export class PredictionsController { async getPrediction( @Param("matchId") matchId: string, ): Promise { - // Check cache first - DISABLED per user request to always fetch from scratch - // const cached = await this.predictionsService.getCachedPrediction(matchId); - // if (cached) { - // return cached; - // } + const cached = await this.predictionsService.getCachedPrediction(matchId); + if (cached) { + return cached; + } // Get from AI Engine const prediction = await this.predictionsService.getPredictionById(matchId); diff --git a/src/modules/predictions/predictions.service.ts b/src/modules/predictions/predictions.service.ts index b601b12..be06bbe 100755 --- a/src/modules/predictions/predictions.service.ts +++ b/src/modules/predictions/predictions.service.ts @@ -223,11 +223,13 @@ export class PredictionsService implements OnModuleInit, OnModuleDestroy { `/v20plus/analyze/${matchId}`, { simulate: true, is_simulation: true, pre_match_only: true }, ); - await this.recordPredictionRun(matchId, response.data); - return this.enrichPredictionResponse( - response.data as MatchPredictionDto, + const prediction = this.enrichPredictionResponse( + response.data, matchContext, ); + await this.recordPredictionRun(matchId, response.data); + await this.cachePrediction(matchId, prediction); + return prediction; } catch (e: unknown) { const requestError = e instanceof AiEngineRequestError @@ -235,6 +237,20 @@ export class PredictionsService implements OnModuleInit, OnModuleDestroy { : new AiEngineRequestError("AI Engine request failed"); const status = requestError.status; const detail = requestError.detail || requestError.message; + + if ( + status === HttpStatus.SERVICE_UNAVAILABLE && + this.hasCooldown(detail) + ) { + const storedPrediction = await this.getStoredPrediction(matchId); + if (storedPrediction) { + this.logger.warn( + `AI Engine cooldown for ${matchId}; returning stored prediction`, + ); + return this.enrichPredictionResponse(storedPrediction, matchContext); + } + } + this.logger.error( `Direct AI Engine call failed for ${matchId}: status=${status}, detail=${JSON.stringify(detail)}`, ); @@ -674,6 +690,11 @@ export class PredictionsService implements OnModuleInit, OnModuleDestroy { odds: this.normalizeDisplayOdds(odds, impliedProb), implied_prob: impliedProb, ev_edge: evEdge, + playable: Boolean(record.playable) && interval.threshold_met, + stake_units: + Boolean(record.playable) && interval.threshold_met + ? this.asNumber(record.stake_units) + : 0, reasons: Array.isArray(record.reasons) ? record.reasons.map((reason) => this.translateReason(String(reason))) : [], @@ -919,15 +940,39 @@ export class PredictionsService implements OnModuleInit, OnModuleDestroy { return 0; } - const normalizedPick = pickName.toUpperCase(); + const normalizedPick = this.normalizePickKey(pickName); for (const [key, value] of Object.entries(probabilities)) { - if (key.toUpperCase() === normalizedPick) { + if (this.normalizePickKey(key) === normalizedPick) { return this.asNumber(value); } } return 0; } + private normalizePickKey(value: string): string { + const normalized = value.trim().toUpperCase(); + const aliases: Record = { + ÜST: "OVER", + UST: "OVER", + OVER: "OVER", + ALT: "UNDER", + UNDER: "UNDER", + "KG VAR": "YES", + VAR: "YES", + YES: "YES", + "KG YOK": "NO", + YOK: "NO", + NO: "NO", + TEK: "ODD", + ODD: "ODD", + ÇİFT: "EVEN", + CIFT: "EVEN", + EVEN: "EVEN", + }; + + return aliases[normalized] ?? normalized; + } + private impliedProbabilityFromOdds(odds: number): number { if (odds <= 1) { return 0; @@ -1132,6 +1177,30 @@ export class PredictionsService implements OnModuleInit, OnModuleDestroy { return prediction.predictionJson as unknown as MatchPredictionDto; } + private async getStoredPrediction( + matchId: string, + ): Promise { + const prediction = await this.prisma.prediction.findUnique({ + where: { matchId }, + }); + + return prediction + ? (prediction.predictionJson as unknown as MatchPredictionDto) + : null; + } + + private hasCooldown(detail: unknown): boolean { + if (typeof detail === "string") { + return detail.includes("cooldownRemainingMs"); + } + + if (detail && typeof detail === "object") { + return "cooldownRemainingMs" in detail; + } + + return false; + } + private async ensureSmartCouponDataReady(matchIds: string[]): Promise { const uniqueMatchIds = [...new Set(matchIds.filter((id) => !!id))]; if (uniqueMatchIds.length === 0) { diff --git a/src/scripts/run-feeder-previous-day.ts b/src/scripts/run-feeder-previous-day.ts new file mode 100644 index 0000000..d25a514 --- /dev/null +++ b/src/scripts/run-feeder-previous-day.ts @@ -0,0 +1,39 @@ +/** + * Run Previous-Day Completed Match Sync + * Usage: npm run feeder:previous-day + */ + +import { NestFactory } from "@nestjs/core"; +import { FeederService } from "../modules/feeder/feeder.service"; +import { Logger } from "@nestjs/common"; + +async function bootstrap() { + process.env.FEEDER_MODE = "historical"; + + const logger = new Logger("FeederPreviousDayScript"); + + logger.log("🚀 Starting previous-day completed match sync..."); + + // Load AppModule after FEEDER_MODE is set so cron imports can be disabled. + // eslint-disable-next-line @typescript-eslint/no-require-imports + const { AppModule } = require("../app.module"); + const app = await NestFactory.createApplicationContext(AppModule, { + logger: ["log", "error", "warn"], + }); + + try { + const feederService = app.get(FeederService); + await feederService.runPreviousDayCompletedMatchesScan(); + logger.log("✅ Previous-day completed match sync completed successfully!"); + } catch (error: any) { + logger.error(`❌ Feeder failed: ${error.message}`); + logger.error(error.stack); + process.exit(1); + } finally { + await app.close(); + } + + process.exit(0); +} + +void bootstrap(); diff --git a/src/scripts/run-feeder.ts b/src/scripts/run-feeder.ts index 4e0d1c0..b238bb4 100755 --- a/src/scripts/run-feeder.ts +++ b/src/scripts/run-feeder.ts @@ -1,5 +1,5 @@ /** - * Run Previous-Day Completed Match Sync + * Run Full Historical Feeder * Usage: npm run feeder:historical */ @@ -12,7 +12,7 @@ async function bootstrap() { const logger = new Logger("FeederScript"); - logger.log("🚀 Starting previous-day completed match sync..."); + logger.log("🚀 Starting full historical feeder..."); // Load AppModule after FEEDER_MODE is set so cron imports can be disabled. // eslint-disable-next-line @typescript-eslint/no-require-imports @@ -23,8 +23,14 @@ async function bootstrap() { try { const feederService = app.get(FeederService); - await feederService.runPreviousDayCompletedMatchesScan(); - logger.log("✅ Previous-day completed match sync completed successfully!"); + const startDate = process.env.FEEDER_START_DATE || "2023-06-01"; + const sports = (process.env.FEEDER_SPORTS || "football,basketball") + .split(",") + .map((sport) => sport.trim()) + .filter(Boolean) as Array<"football" | "basketball">; + + await feederService.runHistoricalScan(sports, startDate); + logger.log("✅ Full historical feeder completed successfully!"); } catch (error: any) { logger.error(`❌ Feeder failed: ${error.message}`); logger.error(error.stack);