v28

2026-04-24 23:46:28 +03:00
parent 3875f2a512
commit 9027cc9900
17 changed files with 4315 additions and 122 deletions
@@ -0,0 +1,413 @@
+"""
+Calibration Module for XGBoost Models
+=====================================
+Calibrates raw probabilities from XGBoost models using Isotonic Regression.
+Ensures that a predicted probability of 70% actually corresponds to a 70% win rate.
+
+Usage:
+    from ai_engine.models.calibration import Calibrator
+    calibrator = Calibrator()
+    calibrated_prob = calibrator.calibrate("ms", raw_prob)
+    
+    # Training new calibration models:
+    calibrator.train_calibration(valid_df, market="ms")
+"""
+
+import os
+import pickle
+import json
+import numpy as np
+import pandas as pd
+from datetime import datetime
+from typing import Dict, List, Optional, Tuple, Any
+from sklearn.isotonic import IsotonicRegression
+from sklearn.calibration import calibration_curve
+from sklearn.metrics import brier_score_loss
+
+AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+CALIBRATION_DIR = os.path.join(AI_ENGINE_DIR, "models", "calibration")
+
+os.makedirs(CALIBRATION_DIR, exist_ok=True)
+
+# Supported markets for calibration
+SUPPORTED_MARKETS = [
+    "ms",        # Match Result (1X2) - multi-class, calibrated per class
+    "ms_home",   # Standard Home win probability
+    "ms_home_heavy_fav", # Context: home odds <= 1.40
+    "ms_home_fav",       # Context: 1.40 < home odds <= 1.80
+    "ms_home_balanced",  # Context: 1.80 < home odds <= 2.50
+    "ms_home_underdog",  # Context: home odds > 2.50
+    "ms_draw",   # Draw probability  
+    "ms_away",   # Away win probability
+    "ou15",      # Over/Under 1.5
+    "ou25",      # Over/Under 2.5
+    "ou35",      # Over/Under 3.5
+    "btts",      # Both Teams to Score
+    "ht_ft",     # Half-Time/Full-Time
+    "dc",        # Double Chance
+    "ht",        # Half-Time Result
+]
+
+
+class CalibrationMetrics:
+    """Stores calibration quality metrics for a market."""
+    
+    def __init__(self):
+        self.brier_score: float = 0.0
+        self.calibration_error: float = 0.0
+        self.sample_count: int = 0
+        self.last_trained: str = ""
+        self.mean_predicted: float = 0.0
+        self.mean_actual: float = 0.0
+        
+    def to_dict(self) -> Dict:
+        return {
+            "brier_score": round(self.brier_score, 4),
+            "calibration_error": round(self.calibration_error, 4),
+            "sample_count": self.sample_count,
+            "last_trained": self.last_trained,
+            "mean_predicted": round(self.mean_predicted, 4),
+            "mean_actual": round(self.mean_actual, 4),
+        }
+
+
+class Calibrator:
+    """
+    Probability calibration using Isotonic Regression.
+    
+    Isotonic Regression is a non-parametric method that fits a piecewise
+    constant function that is monotonically increasing. It's ideal for
+    calibrating probabilities because:
+    
+    1. It preserves ranking (if P(A) > P(B) before, P(A) > P(B) after)
+    2. It doesn't assume a specific distribution shape
+    3. It can correct systematic over/under-confidence
+    
+    Example:
+        # Before calibration: model predicts 70% but actual win rate is 60%
+        # After calibration: model predicts 70% → calibrated to 60%
+    """
+    
+    def __init__(self):
+        self.calibrators: Dict[str, IsotonicRegression] = {}
+        self.metrics: Dict[str, CalibrationMetrics] = {}
+        self.heuristic_fallback: Dict[str, float] = {
+            "ms": 0.90,
+            "ms_home": 0.90,
+            "ms_home_heavy_fav": 0.95,
+            "ms_home_fav": 0.90,
+            "ms_home_balanced": 0.85,
+            "ms_home_underdog": 0.80,
+            "ms_draw": 0.90,
+            "ms_away": 0.90,
+            "ou15": 0.90,
+            "ou25": 0.90,
+            "ou35": 0.90,
+            "btts": 0.90,
+            "ht_ft": 0.85,
+            "dc": 0.93,
+            "ht": 0.85,
+        }
+        self._load_calibrators()
+        
+    def _load_calibrators(self):
+        """Load trained calibrators for each market from disk."""
+        for market in SUPPORTED_MARKETS:
+            model_path = os.path.join(CALIBRATION_DIR, f"{market}_calibrator.pkl")
+            metrics_path = os.path.join(CALIBRATION_DIR, f"{market}_metrics.json")
+            
+            if os.path.exists(model_path):
+                try:
+                    with open(model_path, "rb") as f:
+                        self.calibrators[market] = pickle.load(f)
+                    print(f"[Calibrator] Loaded calibration model for {market}")
+                except Exception as e:
+                    print(f"[Calibrator] Warning: Failed to load {market}: {e}")
+                    
+            if os.path.exists(metrics_path):
+                try:
+                    with open(metrics_path, "r") as f:
+                        data = json.load(f)
+                        metrics = CalibrationMetrics()
+                        metrics.brier_score = data.get("brier_score", 0.0)
+                        metrics.calibration_error = data.get("calibration_error", 0.0)
+                        metrics.sample_count = data.get("sample_count", 0)
+                        metrics.last_trained = data.get("last_trained", "")
+                        metrics.mean_predicted = data.get("mean_predicted", 0.0)
+                        metrics.mean_actual = data.get("mean_actual", 0.0)
+                        self.metrics[market] = metrics
+                except Exception as e:
+                    print(f"[Calibrator] Warning: Failed to load metrics for {market}: {e}")
+
+    def calibrate(self, market_type: str, raw_prob: float, odds_val: Optional[float] = None) -> float:
+        """
+        Calibrate a raw probability using Isotonic Regression.
+        
+        Args:
+            market_type (str): 'ms_home', 'ou25', 'btts', 'ht_ft', etc.
+            raw_prob (float): The raw probability from XGBoost (0.0 - 1.0)
+            odds_val (float, optional): The pre-match odds, used for context-aware bucket mapping
+            
+        Returns:
+            float: Calibrated probability (0.0 - 1.0)
+        """
+        # Normalize market type
+        market_key = market_type.lower().replace("-", "_")
+        
+        # Route to bucket if ms_home and odds provided
+        if market_key == "ms_home" and odds_val is not None and odds_val > 1.0:
+            if odds_val <= 1.40:
+                bucket_key = "ms_home_heavy_fav"
+            elif odds_val <= 1.80:
+                bucket_key = "ms_home_fav"
+            elif odds_val <= 2.50:
+                bucket_key = "ms_home_balanced"
+            else:
+                bucket_key = "ms_home_underdog"
+                
+            if bucket_key in self.calibrators:
+                market_key = bucket_key
+        
+        # If we have a trained Isotonic Regression model, use it
+        if market_key in self.calibrators:
+            try:
+                calibrated = self.calibrators[market_key].predict([raw_prob])[0]
+                # Ensure output is valid probability
+                return float(np.clip(calibrated, 0.01, 0.99))
+            except Exception as e:
+                print(f"[Calibrator] Warning: Isotonic failed for {market_key}: {e}")
+                # Fall through to heuristic
+        
+        # Fallback to heuristic calibration
+        return self._heuristic_calibrate(market_key, raw_prob)
+    
+    def _heuristic_calibrate(self, market_type: str, raw_prob: float) -> float:
+        """
+        Heuristic calibration fallback when no trained model exists.
+        
+        This applies a conservative shrinkage towards the mean:
+        - Binary markets (OU, BTTS): shrink towards 0.5
+        - Multi-class (MS): shrink towards 0.33
+        - HT/FT: stronger shrinkage due to higher variance
+        """
+        # Get shrinkage factor for this market
+        shrinkage = self.heuristic_fallback.get(market_type, 0.90)
+        
+        if market_type in ["ms", "ms_home", "ms_home_heavy_fav", "ms_home_fav", "ms_home_balanced", "ms_home_underdog", "ms_draw", "ms_away"]:
+            # Pull towards 0.33 (uniform for 3-class)
+            return (raw_prob * shrinkage) + (0.33 * (1.0 - shrinkage))
+            
+        elif market_type in ["ou15", "ou25", "ou35", "btts"]:
+            # Pull towards 0.5 (uniform for binary)
+            return (raw_prob * shrinkage) + (0.5 * (1.0 - shrinkage))
+            
+        elif market_type in ["ht_ft", "ht"]:
+            # Stronger shrinkage for high-variance markets
+            return raw_prob * shrinkage
+            
+        elif market_type == "dc":
+            # Double chance is more reliable
+            return (raw_prob * shrinkage) + (0.66 * (1.0 - shrinkage))
+            
+        return raw_prob
+
+    def train_calibration(
+        self,
+        df: pd.DataFrame,
+        market: str,
+        prob_col: str,
+        actual_col: str,
+        min_samples: int = 100,
+        save: bool = True,
+    ) -> CalibrationMetrics:
+        """
+        Train an Isotonic Regression calibration model for a specific market.
+        
+        Args:
+            df: DataFrame with predictions and actual outcomes
+            market: Market identifier (e.g., 'ms_home', 'ou25', 'btts')
+            prob_col: Column name for raw probabilities
+            actual_col: Column name for actual outcomes (0 or 1)
+            min_samples: Minimum samples required to train
+            save: Whether to save the model to disk
+            
+        Returns:
+            CalibrationMetrics with quality metrics
+        """
+        # Filter valid data
+        valid_df = df[[prob_col, actual_col]].dropna()
+        n_samples = len(valid_df)
+        
+        if n_samples < min_samples:
+            print(f"[Calibrator] Warning: Only {n_samples} samples for {market}, "
+                  f"need at least {min_samples}")
+            metrics = CalibrationMetrics()
+            metrics.sample_count = n_samples
+            return metrics
+        
+        # Extract arrays
+        raw_probs = valid_df[prob_col].values
+        actuals = valid_df[actual_col].values
+        
+        # Train Isotonic Regression
+        iso = IsotonicRegression(out_of_bounds="clip", increasing=True)
+        iso.fit(raw_probs, actuals)
+        
+        # Calculate calibrated probabilities
+        calibrated_probs = iso.predict(raw_probs)
+        
+        # Calculate metrics
+        metrics = CalibrationMetrics()
+        metrics.sample_count = n_samples
+        metrics.last_trained = datetime.utcnow().isoformat()
+        metrics.brier_score = brier_score_loss(actuals, calibrated_probs)
+        metrics.mean_predicted = np.mean(raw_probs)
+        metrics.mean_actual = np.mean(actuals)
+        
+        # Calculate Expected Calibration Error (ECE)
+        metrics.calibration_error = self._calculate_ece(
+            calibrated_probs, actuals, n_bins=10
+        )
+        
+        # Store in memory
+        self.calibrators[market] = iso
+        self.metrics[market] = metrics
+        
+        # Save to disk
+        if save:
+            self._save_calibration(market, iso, metrics)
+        
+        print(f"[Calibrator] Trained {market}: "
+              f"Brier={metrics.brier_score:.4f}, "
+              f"ECE={metrics.calibration_error:.4f}, "
+              f"n={n_samples}")
+        
+        return metrics
+    
+    def train_all_markets(
+        self,
+        df: pd.DataFrame,
+        market_config: Dict[str, Tuple[str, str]],
+        min_samples: int = 100,
+    ) -> Dict[str, CalibrationMetrics]:
+        """
+        Train calibration models for multiple markets at once.
+        
+        Args:
+            df: DataFrame with all predictions and outcomes
+            market_config: Dict mapping market -> (prob_col, actual_col)
+                          e.g., {'ou25': ('ou25_over_prob', 'ou25_over_actual')}
+            min_samples: Minimum samples per market
+            
+        Returns:
+            Dict of market -> CalibrationMetrics
+        """
+        results = {}
+        
+        for market, (prob_col, actual_col) in market_config.items():
+            print(f"\n[Calibrator] Training {market}...")
+            try:
+                metrics = self.train_calibration(
+                    df=df,
+                    market=market,
+                    prob_col=prob_col,
+                    actual_col=actual_col,
+                    min_samples=min_samples,
+                    save=True,
+                )
+                results[market] = metrics
+            except Exception as e:
+                print(f"[Calibrator] Failed to train {market}: {e}")
+                
+        return results
+    
+    def _calculate_ece(
+        self, 
+        probs: np.ndarray, 
+        actuals: np.ndarray, 
+        n_bins: int = 10
+    ) -> float:
+        """
+        Calculate Expected Calibration Error (ECE).
+        
+        ECE = sum(|bin_accuracy - bin_confidence| * bin_weight)
+        
+        Lower is better. Perfect calibration = 0.
+        """
+        bin_boundaries = np.linspace(0, 1, n_bins + 1)
+        ece = 0.0
+        
+        for i in range(n_bins):
+            in_bin = (probs >= bin_boundaries[i]) & (probs < bin_boundaries[i + 1])
+            prop_in_bin = np.mean(in_bin)
+            
+            if prop_in_bin > 0:
+                accuracy_in_bin = np.mean(actuals[in_bin])
+                avg_confidence_in_bin = np.mean(probs[in_bin])
+                ece += np.abs(accuracy_in_bin - avg_confidence_in_bin) * prop_in_bin
+                
+        return ece
+    
+    def _save_calibration(
+        self, 
+        market: str, 
+        calibrator: IsotonicRegression,
+        metrics: CalibrationMetrics
+    ):
+        """Save calibration model and metrics to disk."""
+        # Save model
+        model_path = os.path.join(CALIBRATION_DIR, f"{market}_calibrator.pkl")
+        with open(model_path, "wb") as f:
+            pickle.dump(calibrator, f)
+            
+        # Save metrics
+        metrics_path = os.path.join(CALIBRATION_DIR, f"{market}_metrics.json")
+        with open(metrics_path, "w") as f:
+            json.dump(metrics.to_dict(), f, indent=2)
+            
+        print(f"[Calibrator] Saved {market} to {CALIBRATION_DIR}")
+    
+    def get_calibration_report(self) -> Dict[str, Any]:
+        """Generate a summary report of all calibration models."""
+        report = {
+            "trained_markets": list(self.calibrators.keys()),
+            "metrics": {},
+            "heuristic_only": [],
+        }
+        
+        for market in SUPPORTED_MARKETS:
+            if market in self.metrics:
+                report["metrics"][market] = self.metrics[market].to_dict()
+            elif market not in self.calibrators:
+                report["heuristic_only"].append(market)
+                
+        return report
+    
+    def get_calibrated_probabilities(
+        self, 
+        market: str, 
+        raw_probs: np.ndarray
+    ) -> np.ndarray:
+        """
+        Batch calibration for array of probabilities.
+        
+        Args:
+            market: Market type
+            raw_probs: Array of raw probabilities
+            
+        Returns:
+            Array of calibrated probabilities
+        """
+        return np.array([self.calibrate(market, p) for p in raw_probs])
+
+
+# Singleton instance
+_calibrator_instance: Optional[Calibrator] = None
+
+
+def get_calibrator() -> Calibrator:
+    """Get or create the global Calibrator instance."""
+    global _calibrator_instance
+    if _calibrator_instance is None:
+        _calibrator_instance = Calibrator()
+    return _calibrator_instance
@@ -0,0 +1,645 @@
+"""
+V25 Ensemble Predictor - NO TARGET LEAKAGE
+===========================================
+Multi-model ensemble for match prediction using XGBoost and LightGBM.
+
+Features:
+- 73 engineered features (NO target leakage)
+- Market-specific models (MS, OU25, BTTS)
+- Weighted ensemble predictions
+- Value bet detection
+"""
+
+import os
+import json
+import numpy as np
+import pandas as pd
+from typing import Dict, List, Optional, Any
+from dataclasses import dataclass, field
+
+import xgboost as xgb
+import lightgbm as lgb
+
+# CatBoost is optional
+try:
+    from catboost import CatBoostClassifier
+    CATBOOST_AVAILABLE = True
+except ImportError:
+    CatBoostClassifier = None
+    CATBOOST_AVAILABLE = False
+
+# Paths
+MODELS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'v25')
+
+
+@dataclass
+class MarketPrediction:
+    """Prediction for a single betting market."""
+    market_type: str
+    pick: str
+    probability: float
+    confidence: float
+    odds: float = 0.0
+    is_value_bet: bool = False
+    edge: float = 0.0
+
+    def to_dict(self) -> dict:
+        return {
+            'market_type': self.market_type,
+            'pick': self.pick,
+            'probability': round(self.probability * 100, 1),
+            'confidence': round(self.confidence, 1),
+            'odds': self.odds,
+            'is_value_bet': self.is_value_bet,
+            'edge': round(self.edge * 100, 1),
+        }
+
+
+@dataclass
+class ValueBet:
+    """Detected value bet opportunity."""
+    market_type: str
+    pick: str
+    probability: float
+    odds: float
+    edge: float
+    confidence: float
+
+    def to_dict(self) -> dict:
+        return {
+            'market_type': self.market_type,
+            'pick': self.pick,
+            'probability': round(self.probability * 100, 1),
+            'odds': self.odds,
+            'edge': round(self.edge * 100, 1),
+            'confidence': round(self.confidence, 1),
+        }
+
+
+@dataclass
+class MatchPrediction:
+    """Complete match prediction with all markets."""
+    match_id: str
+    home_team: str
+    away_team: str
+    
+    # MS predictions
+    home_prob: float = 0.0
+    draw_prob: float = 0.0
+    away_prob: float = 0.0
+    ms_pick: str = ''
+    ms_confidence: float = 0.0
+    
+    # OU25 predictions
+    over_prob: float = 0.0
+    under_prob: float = 0.0
+    ou25_pick: str = ''
+    ou25_confidence: float = 0.0
+    
+    # BTTS predictions
+    btts_yes_prob: float = 0.0
+    btts_no_prob: float = 0.0
+    btts_pick: str = ''
+    btts_confidence: float = 0.0
+    
+    # Value bets
+    value_bets: List[ValueBet] = field(default_factory=list)
+    
+    def to_dict(self) -> dict:
+        return {
+            'match_id': self.match_id,
+            'home_team': self.home_team,
+            'away_team': self.away_team,
+            'ms': {
+                'home_prob': round(self.home_prob * 100, 1),
+                'draw_prob': round(self.draw_prob * 100, 1),
+                'away_prob': round(self.away_prob * 100, 1),
+                'pick': self.ms_pick,
+                'confidence': round(self.ms_confidence, 1),
+            },
+            'ou25': {
+                'over_prob': round(self.over_prob * 100, 1),
+                'under_prob': round(self.under_prob * 100, 1),
+                'pick': self.ou25_pick,
+                'confidence': round(self.ou25_confidence, 1),
+            },
+            'btts': {
+                'yes_prob': round(self.btts_yes_prob * 100, 1),
+                'no_prob': round(self.btts_no_prob * 100, 1),
+                'pick': self.btts_pick,
+                'confidence': round(self.btts_confidence, 1),
+            },
+            'value_bets': [vb.to_dict() for vb in self.value_bets],
+        }
+
+
+class V25Predictor:
+    """
+    V25 Ensemble Predictor - NO TARGET LEAKAGE
+    
+    Uses market-specific XGBoost and LightGBM models.
+    Each market (MS, OU25, BTTS) has its own trained models.
+    """
+    
+    # Feature columns (82 features, NO target leakage)
+    FEATURE_COLS = [
+        # ELO Features (8)
+        'home_overall_elo', 'away_overall_elo', 'elo_diff',
+        'home_home_elo', 'away_away_elo',
+        'home_form_elo', 'away_form_elo', 'form_elo_diff',
+        
+        # Form Features (12)
+        'home_goals_avg', 'home_conceded_avg',
+        'away_goals_avg', 'away_conceded_avg',
+        'home_clean_sheet_rate', 'away_clean_sheet_rate',
+        'home_scoring_rate', 'away_scoring_rate',
+        'home_winning_streak', 'away_winning_streak',
+        'home_unbeaten_streak', 'away_unbeaten_streak',
+        
+        # H2H Features (6)
+        'h2h_total_matches', 'h2h_home_win_rate', 'h2h_draw_rate',
+        'h2h_avg_goals', 'h2h_btts_rate', 'h2h_over25_rate',
+        
+        # Team Stats Features (8)
+        'home_avg_possession', 'away_avg_possession',
+        'home_avg_shots_on_target', 'away_avg_shots_on_target',
+        'home_shot_conversion', 'away_shot_conversion',
+        'home_avg_corners', 'away_avg_corners',
+        
+        # Odds Features (24)
+        'odds_ms_h', 'odds_ms_d', 'odds_ms_a',
+        'implied_home', 'implied_draw', 'implied_away',
+        'odds_ht_ms_h', 'odds_ht_ms_d', 'odds_ht_ms_a',
+        'odds_ou05_o', 'odds_ou05_u',
+        'odds_ou15_o', 'odds_ou15_u',
+        'odds_ou25_o', 'odds_ou25_u',
+        'odds_ou35_o', 'odds_ou35_u',
+        'odds_ht_ou05_o', 'odds_ht_ou05_u',
+        'odds_ht_ou15_o', 'odds_ht_ou15_u',
+        'odds_btts_y', 'odds_btts_n',
+        
+        # League Features (4)
+        'home_xga', 'away_xga',
+        'league_avg_goals', 'league_zero_goal_rate',
+        
+        # Upset Engine (4)
+        'upset_atmosphere', 'upset_motivation', 'upset_fatigue', 'upset_potential',
+        
+        # Referee Engine (5)
+        'referee_home_bias', 'referee_avg_goals', 'referee_cards_total',
+        'referee_avg_yellow', 'referee_experience',
+        
+        # Momentum Engine (3)
+        'home_momentum_score', 'away_momentum_score', 'momentum_diff',
+
+        # Squad Features (9)
+        'home_squad_quality', 'away_squad_quality', 'squad_diff',
+        'home_key_players', 'away_key_players',
+        'home_missing_impact', 'away_missing_impact',
+        'home_goals_form', 'away_goals_form',
+    ]
+    
+    # Model weights for ensemble
+    DEFAULT_WEIGHTS = {
+        'xgb': 0.50,
+        'lgb': 0.50,
+    }
+    
+    def __init__(self, models_dir: str = None):
+        """
+        Initialize V25 Predictor.
+        
+        Args:
+            models_dir: Directory containing model files. Defaults to v25/ directory.
+        """
+        self.models_dir = models_dir or MODELS_DIR
+        self.models = {}  # market -> {'xgb': model, 'lgb': model}
+        self._loaded = False
+    
+    # All trained market models available in V25
+    ALL_MARKETS = [
+        'ms', 'ou25', 'btts',           # Core markets
+        'ou15', 'ou35',                  # Additional OU lines
+        'ht_result', 'ht_ou05', 'ht_ou15',  # HT markets
+        'htft',                          # HT/FT combo
+        'cards_ou45',                    # Cards market
+        'handicap_ms',                   # Handicap
+        'odd_even',                      # Odd/Even goals
+    ]
+    
+    # Multi-class markets (output > 2 classes)
+    MULTICLASS_MARKETS = {'ms', 'ht_result', 'htft', 'handicap_ms'}
+    
+    def load_models(self) -> bool:
+        """Load all market-specific models from disk."""
+        try:
+            loaded_count = 0
+            
+            for market in self.ALL_MARKETS:
+                self.models[market] = {}
+                
+                # Load XGBoost (read content in Python to avoid non-ASCII path issues)
+                xgb_path = os.path.join(self.models_dir, f'xgb_v25_{market}.json')
+                if os.path.exists(xgb_path) and os.path.getsize(xgb_path) > 0:
+                    with open(xgb_path, 'r', encoding='utf-8') as f:
+                        xgb_content = f.read()
+                    booster = xgb.Booster()
+                    booster.load_model(bytearray(xgb_content, 'utf-8'))
+                    self.models[market]['xgb'] = booster
+                    loaded_count += 1
+                
+                # Load LightGBM (read content in Python to avoid non-ASCII path issues)
+                lgb_path = os.path.join(self.models_dir, f'lgb_v25_{market}.txt')
+                if os.path.exists(lgb_path) and os.path.getsize(lgb_path) > 0:
+                    with open(lgb_path, 'r', encoding='utf-8') as f:
+                        model_str = f.read()
+                    self.models[market]['lgb'] = lgb.Booster(model_str=model_str)
+                    loaded_count += 1
+                
+                # Remove empty entries
+                if not self.models[market]:
+                    del self.models[market]
+            
+            print(f"[V25] Loaded {loaded_count} model files across {len(self.models)} markets: {list(self.models.keys())}")
+            self._loaded = loaded_count > 0
+            return self._loaded
+            
+        except Exception as e:
+            print(f"[ERROR] Error loading models: {e}")
+            import traceback
+            traceback.print_exc()
+            return False
+    
+    def _ensure_loaded(self):
+        """Ensure models are loaded before prediction."""
+        if not self._loaded:
+            if not self.load_models():
+                raise RuntimeError("Failed to load V25 models")
+    
+    def _prepare_features(self, features: Dict[str, float]) -> pd.DataFrame:
+        """Prepare feature vector for prediction."""
+        X = pd.DataFrame([{col: features.get(col, 0.0) for col in self.FEATURE_COLS}])
+        return X
+    
+    def predict_ms(self, features: Dict[str, float]) -> tuple:
+        """
+        Predict match result (1X2).
+        
+        Returns:
+            (home_prob, draw_prob, away_prob)
+        """
+        self._ensure_loaded()
+        
+        X = self._prepare_features(features)
+        probs = []
+        
+        # XGBoost
+        if 'xgb' in self.models.get('ms', {}):
+            dmat = xgb.DMatrix(X)
+            xgb_proba = self.models['ms']['xgb'].predict(dmat)
+            if len(xgb_proba.shape) == 1:
+                xgb_proba = np.array([xgb_proba])
+            probs.append(xgb_proba[0] * self.DEFAULT_WEIGHTS['xgb'])
+        
+        # LightGBM
+        if 'lgb' in self.models.get('ms', {}):
+            lgb_proba = self.models['ms']['lgb'].predict(X)
+            if len(lgb_proba.shape) == 2:
+                probs.append(lgb_proba[0] * self.DEFAULT_WEIGHTS['lgb'])
+        
+        if not probs:
+            return 0.33, 0.33, 0.33
+        
+        ensemble_proba = np.sum(probs, axis=0)
+        ensemble_proba = ensemble_proba / ensemble_proba.sum()
+        
+        return float(ensemble_proba[0]), float(ensemble_proba[1]), float(ensemble_proba[2])
+    
+    def predict_ou25(self, features: Dict[str, float]) -> tuple:
+        """
+        Predict Over/Under 2.5 goals.
+        
+        Returns:
+            (over_prob, under_prob)
+        """
+        self._ensure_loaded()
+        
+        X = self._prepare_features(features)
+        probs = []
+        
+        # XGBoost
+        if 'xgb' in self.models.get('ou25', {}):
+            dmat = xgb.DMatrix(X)
+            xgb_proba = self.models['ou25']['xgb'].predict(dmat)
+            if isinstance(xgb_proba, np.ndarray) and len(xgb_proba.shape) == 1:
+                probs.append(xgb_proba[0])
+        
+        # LightGBM
+        if 'lgb' in self.models.get('ou25', {}):
+            lgb_proba = self.models['ou25']['lgb'].predict(X)
+            if isinstance(lgb_proba, np.ndarray):
+                probs.append(lgb_proba[0])
+        
+        if not probs:
+            return 0.5, 0.5
+        
+        # Average probability
+        avg_prob = np.mean(probs)
+        
+        return float(avg_prob), float(1 - avg_prob)
+    
+    def predict_btts(self, features: Dict[str, float]) -> tuple:
+        """
+        Predict Both Teams To Score.
+        
+        Returns:
+            (yes_prob, no_prob)
+        """
+        self._ensure_loaded()
+        
+        X = self._prepare_features(features)
+        probs = []
+        
+        # XGBoost
+        if 'xgb' in self.models.get('btts', {}):
+            dmat = xgb.DMatrix(X)
+            xgb_proba = self.models['btts']['xgb'].predict(dmat)
+            if isinstance(xgb_proba, np.ndarray) and len(xgb_proba.shape) == 1:
+                probs.append(xgb_proba[0])
+        
+        # LightGBM
+        if 'lgb' in self.models.get('btts', {}):
+            lgb_proba = self.models['btts']['lgb'].predict(X)
+            if isinstance(lgb_proba, np.ndarray):
+                probs.append(lgb_proba[0])
+        
+        if not probs:
+            return 0.5, 0.5
+        
+        # Average probability
+        avg_prob = np.mean(probs)
+        
+        return float(avg_prob), float(1 - avg_prob)
+    
+    def predict_market(self, market: str, features: Dict[str, float]) -> np.ndarray:
+        """
+        Generic prediction for any loaded market.
+        
+        Args:
+            market: Market key (e.g. 'ht_result', 'htft', 'cards_ou45')
+            features: Feature dictionary.
+            
+        Returns:
+            numpy array of probabilities.
+            For binary markets: [positive_prob]
+            For multi-class markets: [class0_prob, class1_prob, ...]
+        """
+        self._ensure_loaded()
+        
+        if market not in self.models:
+            return None
+        
+        X = self._prepare_features(features)
+        probs = []
+        weights = []
+        is_multiclass = market in self.MULTICLASS_MARKETS
+        
+        # XGBoost
+        if 'xgb' in self.models[market]:
+            dmat = xgb.DMatrix(X)
+            xgb_proba = self.models[market]['xgb'].predict(dmat)
+            if isinstance(xgb_proba, np.ndarray):
+                if is_multiclass and len(xgb_proba.shape) == 2:
+                    probs.append(xgb_proba[0])
+                elif is_multiclass and len(xgb_proba.shape) == 1:
+                    probs.append(xgb_proba)
+                else:
+                    probs.append(np.array([xgb_proba[0]]))
+            weights.append(self.DEFAULT_WEIGHTS['xgb'])
+        
+        # LightGBM
+        if 'lgb' in self.models[market]:
+            lgb_proba = self.models[market]['lgb'].predict(X)
+            if isinstance(lgb_proba, np.ndarray):
+                if is_multiclass and len(lgb_proba.shape) == 2:
+                    probs.append(lgb_proba[0])
+                elif is_multiclass and len(lgb_proba.shape) == 1:
+                    probs.append(lgb_proba)
+                else:
+                    probs.append(np.array([lgb_proba[0]]))
+            weights.append(self.DEFAULT_WEIGHTS['lgb'])
+        
+        if not probs:
+            return None
+        
+        # Weighted average
+        if len(probs) == 1:
+            return probs[0]
+        
+        total_w = sum(weights[:len(probs)])
+        result = np.zeros_like(probs[0])
+        for p, w in zip(probs, weights):
+            result += p * (w / total_w)
+        
+        # Normalize multi-class
+        if is_multiclass and result.sum() > 0:
+            result = result / result.sum()
+        
+        return result
+    
+    def has_market(self, market: str) -> bool:
+        """Check if a specific market model is loaded."""
+        return market in self.models
+    
+    def predict_match(
+        self,
+        match_id: str,
+        home_team: str,
+        away_team: str,
+        features: Dict[str, float],
+        odds: Optional[Dict[str, float]] = None,
+    ) -> MatchPrediction:
+        """
+        Predict all markets for a match.
+        
+        Args:
+            match_id: Match identifier.
+            home_team: Home team name.
+            away_team: Away team name.
+            features: Feature dictionary.
+            odds: Optional odds dictionary for value bet detection.
+            
+        Returns:
+            MatchPrediction object.
+        """
+        # Get predictions for each market
+        home_prob, draw_prob, away_prob = self.predict_ms(features)
+        over_prob, under_prob = self.predict_ou25(features)
+        btts_yes_prob, btts_no_prob = self.predict_btts(features)
+        
+        # Determine picks
+        ms_probs = {'1': home_prob, 'X': draw_prob, '2': away_prob}
+        ms_pick = max(ms_probs, key=ms_probs.get)
+        ms_confidence = ms_probs[ms_pick] * 100
+        
+        ou25_probs = {'Over': over_prob, 'Under': under_prob}
+        ou25_pick = max(ou25_probs, key=ou25_probs.get)
+        ou25_confidence = ou25_probs[ou25_pick] * 100
+        
+        btts_probs = {'Yes': btts_yes_prob, 'No': btts_no_prob}
+        btts_pick = max(btts_probs, key=btts_probs.get)
+        btts_confidence = btts_probs[btts_pick] * 100
+        
+        # Create prediction
+        prediction = MatchPrediction(
+            match_id=match_id,
+            home_team=home_team,
+            away_team=away_team,
+            home_prob=home_prob,
+            draw_prob=draw_prob,
+            away_prob=away_prob,
+            ms_pick=ms_pick,
+            ms_confidence=ms_confidence,
+            over_prob=over_prob,
+            under_prob=under_prob,
+            ou25_pick=ou25_pick,
+            ou25_confidence=ou25_confidence,
+            btts_yes_prob=btts_yes_prob,
+            btts_no_prob=btts_no_prob,
+            btts_pick=btts_pick,
+            btts_confidence=btts_confidence,
+        )
+        
+        # Detect value bets
+        if odds:
+            prediction.value_bets = self._detect_value_bets(
+                prediction, odds, home_prob, draw_prob, away_prob,
+                over_prob, under_prob, btts_yes_prob, btts_no_prob
+            )
+        
+        return prediction
+    
+    def _detect_value_bets(
+        self,
+        prediction: MatchPrediction,
+        odds: Dict[str, float],
+        home_prob: float,
+        draw_prob: float,
+        away_prob: float,
+        over_prob: float,
+        under_prob: float,
+        btts_yes_prob: float,
+        btts_no_prob: float,
+    ) -> List[ValueBet]:
+        """Detect value bets based on model vs market odds."""
+        value_bets = []
+        min_edge = 0.05  # 5% minimum edge
+        
+        # MS value bets
+        if 'ms_h' in odds and odds['ms_h'] > 0:
+            implied = 1 / odds['ms_h']
+            edge = home_prob - implied
+            if edge > min_edge:
+                value_bets.append(ValueBet(
+                    market_type='MS',
+                    pick='1',
+                    probability=home_prob,
+                    odds=odds['ms_h'],
+                    edge=edge,
+                    confidence=home_prob * 100,
+                ))
+        
+        if 'ms_d' in odds and odds['ms_d'] > 0:
+            implied = 1 / odds['ms_d']
+            edge = draw_prob - implied
+            if edge > min_edge:
+                value_bets.append(ValueBet(
+                    market_type='MS',
+                    pick='X',
+                    probability=draw_prob,
+                    odds=odds['ms_d'],
+                    edge=edge,
+                    confidence=draw_prob * 100,
+                ))
+        
+        if 'ms_a' in odds and odds['ms_a'] > 0:
+            implied = 1 / odds['ms_a']
+            edge = away_prob - implied
+            if edge > min_edge:
+                value_bets.append(ValueBet(
+                    market_type='MS',
+                    pick='2',
+                    probability=away_prob,
+                    odds=odds['ms_a'],
+                    edge=edge,
+                    confidence=away_prob * 100,
+                ))
+        
+        # OU25 value bets
+        if 'ou25_o' in odds and odds['ou25_o'] > 0:
+            implied = 1 / odds['ou25_o']
+            edge = over_prob - implied
+            if edge > min_edge:
+                value_bets.append(ValueBet(
+                    market_type='OU25',
+                    pick='Over',
+                    probability=over_prob,
+                    odds=odds['ou25_o'],
+                    edge=edge,
+                    confidence=over_prob * 100,
+                ))
+        
+        if 'ou25_u' in odds and odds['ou25_u'] > 0:
+            implied = 1 / odds['ou25_u']
+            edge = under_prob - implied
+            if edge > min_edge:
+                value_bets.append(ValueBet(
+                    market_type='OU25',
+                    pick='Under',
+                    probability=under_prob,
+                    odds=odds['ou25_u'],
+                    edge=edge,
+                    confidence=under_prob * 100,
+                ))
+        
+        # BTTS value bets
+        if 'btts_y' in odds and odds['btts_y'] > 0:
+            implied = 1 / odds['btts_y']
+            edge = btts_yes_prob - implied
+            if edge > min_edge:
+                value_bets.append(ValueBet(
+                    market_type='BTTS',
+                    pick='Yes',
+                    probability=btts_yes_prob,
+                    odds=odds['btts_y'],
+                    edge=edge,
+                    confidence=btts_yes_prob * 100,
+                ))
+        
+        if 'btts_n' in odds and odds['btts_n'] > 0:
+            implied = 1 / odds['btts_n']
+            edge = btts_no_prob - implied
+            if edge > min_edge:
+                value_bets.append(ValueBet(
+                    market_type='BTTS',
+                    pick='No',
+                    probability=btts_no_prob,
+                    odds=odds['btts_n'],
+                    edge=edge,
+                    confidence=btts_no_prob * 100,
+                ))
+        
+        return value_bets
+
+
+# Singleton instance
+_v25_predictor: Optional[V25Predictor] = None
+
+
+def get_v25_predictor() -> V25Predictor:
+    """Get or create V25 predictor instance."""
+    global _v25_predictor
+    if _v25_predictor is None:
+        _v25_predictor = V25Predictor()
+        _v25_predictor.load_models()
+    return _v25_predictor
@@ -0,0 +1,291 @@
+"""
+V27 Pro Predictor — Odds-Free Fundamentals + Value Edge Detection
+
+This module loads V27 ensemble models (XGBoost, LightGBM, CatBoost)
+and produces market-independent probability estimates.
+
+The key insight: V27 is trained WITHOUT odds features, so it produces
+"true" probabilities unbiased by market pricing. The divergence between
+V25 (odds-aware) and V27 (odds-free) predictions signals market mispricing.
+"""
+
+import json
+import logging
+import os
+import pickle
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+
+import numpy as np
+
+logger = logging.getLogger(__name__)
+
+V27_DIR = Path(__file__).parent / "v27"
+
+
+class V27Predictor:
+    """
+    Loads V27 ensemble models and provides predictions using the
+    82-feature odds-free vector.
+    """
+
+    MARKETS = ["ms", "ou25"]
+
+    def __init__(self):
+        self.models: Dict[str, Dict[str, object]] = {}
+        self.feature_cols: List[str] = []
+        self._loaded = False
+
+    def load_models(self) -> bool:
+        """Load all V27 ensemble models and feature column spec."""
+        if self._loaded:
+            return True
+
+        # Feature columns
+        cols_path = V27_DIR / "v27_feature_cols.json"
+        if not cols_path.exists():
+            logger.error("[V27] Feature columns file not found: %s", cols_path)
+            return False
+
+        try:
+            with open(cols_path, "r", encoding="utf-8") as f:
+                self.feature_cols = json.load(f)
+            logger.info("[V27] Loaded %d feature columns", len(self.feature_cols))
+        except Exception as e:
+            logger.error("[V27] Failed to load feature columns: %s", e)
+            return False
+
+        # Load models per market
+        model_types = {"xgb": "xgb", "lgb": "lgb", "cb": "cb"}
+
+        for market in self.MARKETS:
+            self.models[market] = {}
+            for short, label in model_types.items():
+                # Try market-specific file first: v27_ms_xgb.pkl
+                path = V27_DIR / f"v27_{market}_{short}.pkl"
+                if not path.exists():
+                    # Fallback to generic: v27_xgboost.pkl (for MS only)
+                    generic_names = {"xgb": "v27_xgboost.pkl", "lgb": "v27_lightgbm.pkl", "cb": "v27_catboost.pkl"}
+                    path = V27_DIR / generic_names.get(short, "")
+                    if not path.exists():
+                        logger.warning("[V27] Model file not found for %s/%s", market, short)
+                        continue
+
+                try:
+                    with open(path, "rb") as f:
+                        model = pickle.load(f)
+                    self.models[market][label] = model
+                    logger.info("[V27] ✓ Loaded %s/%s from %s", market, label, path.name)
+                except Exception as e:
+                    logger.error("[V27] ✗ Failed to load %s/%s: %s", market, label, e)
+
+        loaded_count = sum(len(v) for v in self.models.values())
+        if loaded_count == 0:
+            logger.error("[V27] No models loaded!")
+            return False
+
+        self._loaded = True
+        logger.info("[V27] Total models loaded: %d across %d markets", loaded_count, len(self.models))
+        return True
+
+    def _build_feature_array(self, features: Dict[str, float]) -> np.ndarray:
+        """
+        Build ordered feature array from the full feature dict.
+        V27 uses only its 82 features (odds-free subset).
+        """
+        row = []
+        for col in self.feature_cols:
+            row.append(float(features.get(col, 0.0)))
+        return np.array([row])
+
+    def _predict_with_model(self, model, X: np.ndarray, label: str, expected_classes: int) -> Optional[np.ndarray]:
+        """
+        Predict probabilities from a model, handling both sklearn wrappers
+        (predict_proba) and raw Booster objects (predict).
+
+        For raw XGBoost Boosters, DMatrix is created WITH feature_names
+        to match the training schema.
+        """
+        import xgboost as xgb
+        import lightgbm as lgbm
+        import pandas as pd
+
+        # 1. Try sklearn-style predict_proba first
+        if hasattr(model, 'predict_proba'):
+            try:
+                proba = model.predict_proba(X)[0]
+                if len(proba) == expected_classes:
+                    return proba
+                logger.warning("[V27] %s predict_proba returned %d classes, expected %d", label, len(proba), expected_classes)
+            except Exception:
+                pass  # Fall through to raw predict
+
+        # 2. Raw xgboost.Booster — MUST pass feature_names
+        if isinstance(model, xgb.Booster):
+            try:
+                feature_names = self.feature_cols if self.feature_cols else None
+                dmat = xgb.DMatrix(X, feature_names=feature_names)
+                raw = model.predict(dmat)
+                if isinstance(raw, np.ndarray):
+                    if raw.ndim == 2 and raw.shape[1] == expected_classes:
+                        return raw[0]
+                    elif raw.ndim == 1 and expected_classes == 2:
+                        p = float(raw[0])
+                        return np.array([1.0 - p, p])
+                    elif raw.ndim == 1 and len(raw) == expected_classes:
+                        return raw
+            except Exception as e:
+                logger.warning("[V27] %s xgb.Booster predict failed: %s", label, e)
+                return None
+
+        # 3. Raw lightgbm.Booster — pass as DataFrame with column names
+        if isinstance(model, lgbm.Booster):
+            try:
+                if self.feature_cols:
+                    X_named = pd.DataFrame(X, columns=self.feature_cols)
+                    raw = model.predict(X_named)
+                else:
+                    raw = model.predict(X)
+                if isinstance(raw, np.ndarray):
+                    if raw.ndim == 2 and raw.shape[1] == expected_classes:
+                        return raw[0]
+                    elif raw.ndim == 1 and expected_classes == 2:
+                        p = float(raw[0])
+                        return np.array([1.0 - p, p])
+                    elif raw.ndim == 1 and len(raw) == expected_classes:
+                        return raw
+            except Exception as e:
+                logger.warning("[V27] %s lgb.Booster predict failed: %s", label, e)
+                return None
+
+        # 4. Generic fallback (CatBoost, etc.)
+        try:
+            if hasattr(model, 'predict'):
+                raw = model.predict(X)
+                if isinstance(raw, np.ndarray):
+                    if raw.ndim == 2 and raw.shape[1] == expected_classes:
+                        return raw[0]
+                    elif raw.ndim == 1 and expected_classes == 2:
+                        p = float(raw[0])
+                        return np.array([1.0 - p, p])
+                    elif raw.ndim == 1 and len(raw) == expected_classes:
+                        return raw
+        except Exception as e:
+            logger.warning("[V27] %s generic predict failed: %s", label, e)
+
+        return None
+
+    def predict_ms(self, features: Dict[str, float]) -> Optional[Dict[str, float]]:
+        """
+        Predict Match Score probabilities (Home/Draw/Away).
+        Returns dict with keys: home, draw, away.
+        """
+        if not self._loaded or "ms" not in self.models or not self.models["ms"]:
+            return None
+
+        X = self._build_feature_array(features)
+        probs_list = []
+
+        for label, model in self.models["ms"].items():
+            proba = self._predict_with_model(model, X, f"MS/{label}", expected_classes=3)
+            if proba is not None and len(proba) == 3:
+                probs_list.append(proba)
+
+        if not probs_list:
+            return None
+
+        # Ensemble average
+        avg = np.mean(probs_list, axis=0)
+        return {
+            "home": float(avg[0]),
+            "draw": float(avg[1]),
+            "away": float(avg[2]),
+        }
+
+    def predict_ou25(self, features: Dict[str, float]) -> Optional[Dict[str, float]]:
+        """
+        Predict Over/Under 2.5 probabilities.
+        Returns dict with keys: under, over.
+        """
+        if not self._loaded or "ou25" not in self.models or not self.models["ou25"]:
+            return None
+
+        X = self._build_feature_array(features)
+        probs_list = []
+
+        for label, model in self.models["ou25"].items():
+            proba = self._predict_with_model(model, X, f"OU25/{label}", expected_classes=2)
+            if proba is not None and len(proba) == 2:
+                probs_list.append(proba)
+
+        if not probs_list:
+            return None
+
+        avg = np.mean(probs_list, axis=0)
+        return {
+            "under": float(avg[0]),
+            "over": float(avg[1]),
+        }
+
+    def predict_all(self, features: Dict[str, float]) -> Dict[str, Optional[Dict[str, float]]]:
+        """Run predictions for all supported markets."""
+        return {
+            "ms": self.predict_ms(features),
+            "ou25": self.predict_ou25(features),
+        }
+
+
+def compute_divergence(
+    v25_probs: Dict[str, float],
+    v27_probs: Dict[str, float],
+) -> Dict[str, float]:
+    """
+    Compute the divergence signal between V25 (odds-aware) and V27 (odds-free).
+
+    Positive divergence = V27 thinks it's MORE likely than the market → VALUE BET
+    Negative divergence = V27 thinks it's LESS likely than the market → PASS
+
+    Returns per-outcome divergence values.
+    """
+    divergence = {}
+    for key in v27_probs:
+        v25_val = v25_probs.get(key, 0.33)
+        v27_val = v27_probs.get(key, 0.33)
+        divergence[key] = round(v27_val - v25_val, 4)
+    return divergence
+
+
+def compute_value_edge(
+    v25_probs: Dict[str, float],
+    v27_probs: Dict[str, float],
+    odds: Dict[str, float],
+) -> Dict[str, Dict]:
+    """
+    Detect value bets by combining V25/V27 divergence with odds.
+
+    A value bet exists when:
+    1. V27 (odds-free) probability > implied odds probability  (model says it's underpriced)
+    2. V27 and V25 divergence is positive  (V27 sees more signal than the market)
+
+    Returns per-outcome: { probability, implied_prob, edge, is_value }
+    """
+    results = {}
+    for key in v27_probs:
+        v27_p = v27_probs[key]
+        v25_p = v25_probs.get(key, 0.33)
+        odds_val = odds.get(key, 0.0)
+
+        implied_p = (1.0 / odds_val) if odds_val > 1.01 else 0.0
+        divergence = v27_p - v25_p
+        edge = v27_p - implied_p if implied_p > 0 else 0.0
+
+        results[key] = {
+            "v27_prob": round(v27_p, 4),
+            "v25_prob": round(v25_p, 4),
+            "implied_prob": round(implied_p, 4),
+            "divergence": round(divergence, 4),
+            "edge": round(edge, 4),
+            "is_value": edge > 0.05 and divergence > 0.02,  # 5% edge + 2% divergence
+        }
+
+    return results