This commit is contained in:
@@ -0,0 +1,413 @@
|
||||
"""
|
||||
Calibration Module for XGBoost Models
|
||||
=====================================
|
||||
Calibrates raw probabilities from XGBoost models using Isotonic Regression.
|
||||
Ensures that a predicted probability of 70% actually corresponds to a 70% win rate.
|
||||
|
||||
Usage:
|
||||
from ai_engine.models.calibration import Calibrator
|
||||
calibrator = Calibrator()
|
||||
calibrated_prob = calibrator.calibrate("ms", raw_prob)
|
||||
|
||||
# Training new calibration models:
|
||||
calibrator.train_calibration(valid_df, market="ms")
|
||||
"""
|
||||
|
||||
import os
|
||||
import pickle
|
||||
import json
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional, Tuple, Any
|
||||
from sklearn.isotonic import IsotonicRegression
|
||||
from sklearn.calibration import calibration_curve
|
||||
from sklearn.metrics import brier_score_loss
|
||||
|
||||
AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
CALIBRATION_DIR = os.path.join(AI_ENGINE_DIR, "models", "calibration")
|
||||
|
||||
os.makedirs(CALIBRATION_DIR, exist_ok=True)
|
||||
|
||||
# Supported markets for calibration
|
||||
SUPPORTED_MARKETS = [
|
||||
"ms", # Match Result (1X2) - multi-class, calibrated per class
|
||||
"ms_home", # Standard Home win probability
|
||||
"ms_home_heavy_fav", # Context: home odds <= 1.40
|
||||
"ms_home_fav", # Context: 1.40 < home odds <= 1.80
|
||||
"ms_home_balanced", # Context: 1.80 < home odds <= 2.50
|
||||
"ms_home_underdog", # Context: home odds > 2.50
|
||||
"ms_draw", # Draw probability
|
||||
"ms_away", # Away win probability
|
||||
"ou15", # Over/Under 1.5
|
||||
"ou25", # Over/Under 2.5
|
||||
"ou35", # Over/Under 3.5
|
||||
"btts", # Both Teams to Score
|
||||
"ht_ft", # Half-Time/Full-Time
|
||||
"dc", # Double Chance
|
||||
"ht", # Half-Time Result
|
||||
]
|
||||
|
||||
|
||||
class CalibrationMetrics:
|
||||
"""Stores calibration quality metrics for a market."""
|
||||
|
||||
def __init__(self):
|
||||
self.brier_score: float = 0.0
|
||||
self.calibration_error: float = 0.0
|
||||
self.sample_count: int = 0
|
||||
self.last_trained: str = ""
|
||||
self.mean_predicted: float = 0.0
|
||||
self.mean_actual: float = 0.0
|
||||
|
||||
def to_dict(self) -> Dict:
|
||||
return {
|
||||
"brier_score": round(self.brier_score, 4),
|
||||
"calibration_error": round(self.calibration_error, 4),
|
||||
"sample_count": self.sample_count,
|
||||
"last_trained": self.last_trained,
|
||||
"mean_predicted": round(self.mean_predicted, 4),
|
||||
"mean_actual": round(self.mean_actual, 4),
|
||||
}
|
||||
|
||||
|
||||
class Calibrator:
|
||||
"""
|
||||
Probability calibration using Isotonic Regression.
|
||||
|
||||
Isotonic Regression is a non-parametric method that fits a piecewise
|
||||
constant function that is monotonically increasing. It's ideal for
|
||||
calibrating probabilities because:
|
||||
|
||||
1. It preserves ranking (if P(A) > P(B) before, P(A) > P(B) after)
|
||||
2. It doesn't assume a specific distribution shape
|
||||
3. It can correct systematic over/under-confidence
|
||||
|
||||
Example:
|
||||
# Before calibration: model predicts 70% but actual win rate is 60%
|
||||
# After calibration: model predicts 70% → calibrated to 60%
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.calibrators: Dict[str, IsotonicRegression] = {}
|
||||
self.metrics: Dict[str, CalibrationMetrics] = {}
|
||||
self.heuristic_fallback: Dict[str, float] = {
|
||||
"ms": 0.90,
|
||||
"ms_home": 0.90,
|
||||
"ms_home_heavy_fav": 0.95,
|
||||
"ms_home_fav": 0.90,
|
||||
"ms_home_balanced": 0.85,
|
||||
"ms_home_underdog": 0.80,
|
||||
"ms_draw": 0.90,
|
||||
"ms_away": 0.90,
|
||||
"ou15": 0.90,
|
||||
"ou25": 0.90,
|
||||
"ou35": 0.90,
|
||||
"btts": 0.90,
|
||||
"ht_ft": 0.85,
|
||||
"dc": 0.93,
|
||||
"ht": 0.85,
|
||||
}
|
||||
self._load_calibrators()
|
||||
|
||||
def _load_calibrators(self):
|
||||
"""Load trained calibrators for each market from disk."""
|
||||
for market in SUPPORTED_MARKETS:
|
||||
model_path = os.path.join(CALIBRATION_DIR, f"{market}_calibrator.pkl")
|
||||
metrics_path = os.path.join(CALIBRATION_DIR, f"{market}_metrics.json")
|
||||
|
||||
if os.path.exists(model_path):
|
||||
try:
|
||||
with open(model_path, "rb") as f:
|
||||
self.calibrators[market] = pickle.load(f)
|
||||
print(f"[Calibrator] Loaded calibration model for {market}")
|
||||
except Exception as e:
|
||||
print(f"[Calibrator] Warning: Failed to load {market}: {e}")
|
||||
|
||||
if os.path.exists(metrics_path):
|
||||
try:
|
||||
with open(metrics_path, "r") as f:
|
||||
data = json.load(f)
|
||||
metrics = CalibrationMetrics()
|
||||
metrics.brier_score = data.get("brier_score", 0.0)
|
||||
metrics.calibration_error = data.get("calibration_error", 0.0)
|
||||
metrics.sample_count = data.get("sample_count", 0)
|
||||
metrics.last_trained = data.get("last_trained", "")
|
||||
metrics.mean_predicted = data.get("mean_predicted", 0.0)
|
||||
metrics.mean_actual = data.get("mean_actual", 0.0)
|
||||
self.metrics[market] = metrics
|
||||
except Exception as e:
|
||||
print(f"[Calibrator] Warning: Failed to load metrics for {market}: {e}")
|
||||
|
||||
def calibrate(self, market_type: str, raw_prob: float, odds_val: Optional[float] = None) -> float:
|
||||
"""
|
||||
Calibrate a raw probability using Isotonic Regression.
|
||||
|
||||
Args:
|
||||
market_type (str): 'ms_home', 'ou25', 'btts', 'ht_ft', etc.
|
||||
raw_prob (float): The raw probability from XGBoost (0.0 - 1.0)
|
||||
odds_val (float, optional): The pre-match odds, used for context-aware bucket mapping
|
||||
|
||||
Returns:
|
||||
float: Calibrated probability (0.0 - 1.0)
|
||||
"""
|
||||
# Normalize market type
|
||||
market_key = market_type.lower().replace("-", "_")
|
||||
|
||||
# Route to bucket if ms_home and odds provided
|
||||
if market_key == "ms_home" and odds_val is not None and odds_val > 1.0:
|
||||
if odds_val <= 1.40:
|
||||
bucket_key = "ms_home_heavy_fav"
|
||||
elif odds_val <= 1.80:
|
||||
bucket_key = "ms_home_fav"
|
||||
elif odds_val <= 2.50:
|
||||
bucket_key = "ms_home_balanced"
|
||||
else:
|
||||
bucket_key = "ms_home_underdog"
|
||||
|
||||
if bucket_key in self.calibrators:
|
||||
market_key = bucket_key
|
||||
|
||||
# If we have a trained Isotonic Regression model, use it
|
||||
if market_key in self.calibrators:
|
||||
try:
|
||||
calibrated = self.calibrators[market_key].predict([raw_prob])[0]
|
||||
# Ensure output is valid probability
|
||||
return float(np.clip(calibrated, 0.01, 0.99))
|
||||
except Exception as e:
|
||||
print(f"[Calibrator] Warning: Isotonic failed for {market_key}: {e}")
|
||||
# Fall through to heuristic
|
||||
|
||||
# Fallback to heuristic calibration
|
||||
return self._heuristic_calibrate(market_key, raw_prob)
|
||||
|
||||
def _heuristic_calibrate(self, market_type: str, raw_prob: float) -> float:
|
||||
"""
|
||||
Heuristic calibration fallback when no trained model exists.
|
||||
|
||||
This applies a conservative shrinkage towards the mean:
|
||||
- Binary markets (OU, BTTS): shrink towards 0.5
|
||||
- Multi-class (MS): shrink towards 0.33
|
||||
- HT/FT: stronger shrinkage due to higher variance
|
||||
"""
|
||||
# Get shrinkage factor for this market
|
||||
shrinkage = self.heuristic_fallback.get(market_type, 0.90)
|
||||
|
||||
if market_type in ["ms", "ms_home", "ms_home_heavy_fav", "ms_home_fav", "ms_home_balanced", "ms_home_underdog", "ms_draw", "ms_away"]:
|
||||
# Pull towards 0.33 (uniform for 3-class)
|
||||
return (raw_prob * shrinkage) + (0.33 * (1.0 - shrinkage))
|
||||
|
||||
elif market_type in ["ou15", "ou25", "ou35", "btts"]:
|
||||
# Pull towards 0.5 (uniform for binary)
|
||||
return (raw_prob * shrinkage) + (0.5 * (1.0 - shrinkage))
|
||||
|
||||
elif market_type in ["ht_ft", "ht"]:
|
||||
# Stronger shrinkage for high-variance markets
|
||||
return raw_prob * shrinkage
|
||||
|
||||
elif market_type == "dc":
|
||||
# Double chance is more reliable
|
||||
return (raw_prob * shrinkage) + (0.66 * (1.0 - shrinkage))
|
||||
|
||||
return raw_prob
|
||||
|
||||
def train_calibration(
|
||||
self,
|
||||
df: pd.DataFrame,
|
||||
market: str,
|
||||
prob_col: str,
|
||||
actual_col: str,
|
||||
min_samples: int = 100,
|
||||
save: bool = True,
|
||||
) -> CalibrationMetrics:
|
||||
"""
|
||||
Train an Isotonic Regression calibration model for a specific market.
|
||||
|
||||
Args:
|
||||
df: DataFrame with predictions and actual outcomes
|
||||
market: Market identifier (e.g., 'ms_home', 'ou25', 'btts')
|
||||
prob_col: Column name for raw probabilities
|
||||
actual_col: Column name for actual outcomes (0 or 1)
|
||||
min_samples: Minimum samples required to train
|
||||
save: Whether to save the model to disk
|
||||
|
||||
Returns:
|
||||
CalibrationMetrics with quality metrics
|
||||
"""
|
||||
# Filter valid data
|
||||
valid_df = df[[prob_col, actual_col]].dropna()
|
||||
n_samples = len(valid_df)
|
||||
|
||||
if n_samples < min_samples:
|
||||
print(f"[Calibrator] Warning: Only {n_samples} samples for {market}, "
|
||||
f"need at least {min_samples}")
|
||||
metrics = CalibrationMetrics()
|
||||
metrics.sample_count = n_samples
|
||||
return metrics
|
||||
|
||||
# Extract arrays
|
||||
raw_probs = valid_df[prob_col].values
|
||||
actuals = valid_df[actual_col].values
|
||||
|
||||
# Train Isotonic Regression
|
||||
iso = IsotonicRegression(out_of_bounds="clip", increasing=True)
|
||||
iso.fit(raw_probs, actuals)
|
||||
|
||||
# Calculate calibrated probabilities
|
||||
calibrated_probs = iso.predict(raw_probs)
|
||||
|
||||
# Calculate metrics
|
||||
metrics = CalibrationMetrics()
|
||||
metrics.sample_count = n_samples
|
||||
metrics.last_trained = datetime.utcnow().isoformat()
|
||||
metrics.brier_score = brier_score_loss(actuals, calibrated_probs)
|
||||
metrics.mean_predicted = np.mean(raw_probs)
|
||||
metrics.mean_actual = np.mean(actuals)
|
||||
|
||||
# Calculate Expected Calibration Error (ECE)
|
||||
metrics.calibration_error = self._calculate_ece(
|
||||
calibrated_probs, actuals, n_bins=10
|
||||
)
|
||||
|
||||
# Store in memory
|
||||
self.calibrators[market] = iso
|
||||
self.metrics[market] = metrics
|
||||
|
||||
# Save to disk
|
||||
if save:
|
||||
self._save_calibration(market, iso, metrics)
|
||||
|
||||
print(f"[Calibrator] Trained {market}: "
|
||||
f"Brier={metrics.brier_score:.4f}, "
|
||||
f"ECE={metrics.calibration_error:.4f}, "
|
||||
f"n={n_samples}")
|
||||
|
||||
return metrics
|
||||
|
||||
def train_all_markets(
|
||||
self,
|
||||
df: pd.DataFrame,
|
||||
market_config: Dict[str, Tuple[str, str]],
|
||||
min_samples: int = 100,
|
||||
) -> Dict[str, CalibrationMetrics]:
|
||||
"""
|
||||
Train calibration models for multiple markets at once.
|
||||
|
||||
Args:
|
||||
df: DataFrame with all predictions and outcomes
|
||||
market_config: Dict mapping market -> (prob_col, actual_col)
|
||||
e.g., {'ou25': ('ou25_over_prob', 'ou25_over_actual')}
|
||||
min_samples: Minimum samples per market
|
||||
|
||||
Returns:
|
||||
Dict of market -> CalibrationMetrics
|
||||
"""
|
||||
results = {}
|
||||
|
||||
for market, (prob_col, actual_col) in market_config.items():
|
||||
print(f"\n[Calibrator] Training {market}...")
|
||||
try:
|
||||
metrics = self.train_calibration(
|
||||
df=df,
|
||||
market=market,
|
||||
prob_col=prob_col,
|
||||
actual_col=actual_col,
|
||||
min_samples=min_samples,
|
||||
save=True,
|
||||
)
|
||||
results[market] = metrics
|
||||
except Exception as e:
|
||||
print(f"[Calibrator] Failed to train {market}: {e}")
|
||||
|
||||
return results
|
||||
|
||||
def _calculate_ece(
|
||||
self,
|
||||
probs: np.ndarray,
|
||||
actuals: np.ndarray,
|
||||
n_bins: int = 10
|
||||
) -> float:
|
||||
"""
|
||||
Calculate Expected Calibration Error (ECE).
|
||||
|
||||
ECE = sum(|bin_accuracy - bin_confidence| * bin_weight)
|
||||
|
||||
Lower is better. Perfect calibration = 0.
|
||||
"""
|
||||
bin_boundaries = np.linspace(0, 1, n_bins + 1)
|
||||
ece = 0.0
|
||||
|
||||
for i in range(n_bins):
|
||||
in_bin = (probs >= bin_boundaries[i]) & (probs < bin_boundaries[i + 1])
|
||||
prop_in_bin = np.mean(in_bin)
|
||||
|
||||
if prop_in_bin > 0:
|
||||
accuracy_in_bin = np.mean(actuals[in_bin])
|
||||
avg_confidence_in_bin = np.mean(probs[in_bin])
|
||||
ece += np.abs(accuracy_in_bin - avg_confidence_in_bin) * prop_in_bin
|
||||
|
||||
return ece
|
||||
|
||||
def _save_calibration(
|
||||
self,
|
||||
market: str,
|
||||
calibrator: IsotonicRegression,
|
||||
metrics: CalibrationMetrics
|
||||
):
|
||||
"""Save calibration model and metrics to disk."""
|
||||
# Save model
|
||||
model_path = os.path.join(CALIBRATION_DIR, f"{market}_calibrator.pkl")
|
||||
with open(model_path, "wb") as f:
|
||||
pickle.dump(calibrator, f)
|
||||
|
||||
# Save metrics
|
||||
metrics_path = os.path.join(CALIBRATION_DIR, f"{market}_metrics.json")
|
||||
with open(metrics_path, "w") as f:
|
||||
json.dump(metrics.to_dict(), f, indent=2)
|
||||
|
||||
print(f"[Calibrator] Saved {market} to {CALIBRATION_DIR}")
|
||||
|
||||
def get_calibration_report(self) -> Dict[str, Any]:
|
||||
"""Generate a summary report of all calibration models."""
|
||||
report = {
|
||||
"trained_markets": list(self.calibrators.keys()),
|
||||
"metrics": {},
|
||||
"heuristic_only": [],
|
||||
}
|
||||
|
||||
for market in SUPPORTED_MARKETS:
|
||||
if market in self.metrics:
|
||||
report["metrics"][market] = self.metrics[market].to_dict()
|
||||
elif market not in self.calibrators:
|
||||
report["heuristic_only"].append(market)
|
||||
|
||||
return report
|
||||
|
||||
def get_calibrated_probabilities(
|
||||
self,
|
||||
market: str,
|
||||
raw_probs: np.ndarray
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
Batch calibration for array of probabilities.
|
||||
|
||||
Args:
|
||||
market: Market type
|
||||
raw_probs: Array of raw probabilities
|
||||
|
||||
Returns:
|
||||
Array of calibrated probabilities
|
||||
"""
|
||||
return np.array([self.calibrate(market, p) for p in raw_probs])
|
||||
|
||||
|
||||
# Singleton instance
|
||||
_calibrator_instance: Optional[Calibrator] = None
|
||||
|
||||
|
||||
def get_calibrator() -> Calibrator:
|
||||
"""Get or create the global Calibrator instance."""
|
||||
global _calibrator_instance
|
||||
if _calibrator_instance is None:
|
||||
_calibrator_instance = Calibrator()
|
||||
return _calibrator_instance
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,645 @@
|
||||
"""
|
||||
V25 Ensemble Predictor - NO TARGET LEAKAGE
|
||||
===========================================
|
||||
Multi-model ensemble for match prediction using XGBoost and LightGBM.
|
||||
|
||||
Features:
|
||||
- 73 engineered features (NO target leakage)
|
||||
- Market-specific models (MS, OU25, BTTS)
|
||||
- Weighted ensemble predictions
|
||||
- Value bet detection
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from typing import Dict, List, Optional, Any
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
import xgboost as xgb
|
||||
import lightgbm as lgb
|
||||
|
||||
# CatBoost is optional
|
||||
try:
|
||||
from catboost import CatBoostClassifier
|
||||
CATBOOST_AVAILABLE = True
|
||||
except ImportError:
|
||||
CatBoostClassifier = None
|
||||
CATBOOST_AVAILABLE = False
|
||||
|
||||
# Paths
|
||||
MODELS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'v25')
|
||||
|
||||
|
||||
@dataclass
|
||||
class MarketPrediction:
|
||||
"""Prediction for a single betting market."""
|
||||
market_type: str
|
||||
pick: str
|
||||
probability: float
|
||||
confidence: float
|
||||
odds: float = 0.0
|
||||
is_value_bet: bool = False
|
||||
edge: float = 0.0
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
'market_type': self.market_type,
|
||||
'pick': self.pick,
|
||||
'probability': round(self.probability * 100, 1),
|
||||
'confidence': round(self.confidence, 1),
|
||||
'odds': self.odds,
|
||||
'is_value_bet': self.is_value_bet,
|
||||
'edge': round(self.edge * 100, 1),
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class ValueBet:
|
||||
"""Detected value bet opportunity."""
|
||||
market_type: str
|
||||
pick: str
|
||||
probability: float
|
||||
odds: float
|
||||
edge: float
|
||||
confidence: float
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
'market_type': self.market_type,
|
||||
'pick': self.pick,
|
||||
'probability': round(self.probability * 100, 1),
|
||||
'odds': self.odds,
|
||||
'edge': round(self.edge * 100, 1),
|
||||
'confidence': round(self.confidence, 1),
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class MatchPrediction:
|
||||
"""Complete match prediction with all markets."""
|
||||
match_id: str
|
||||
home_team: str
|
||||
away_team: str
|
||||
|
||||
# MS predictions
|
||||
home_prob: float = 0.0
|
||||
draw_prob: float = 0.0
|
||||
away_prob: float = 0.0
|
||||
ms_pick: str = ''
|
||||
ms_confidence: float = 0.0
|
||||
|
||||
# OU25 predictions
|
||||
over_prob: float = 0.0
|
||||
under_prob: float = 0.0
|
||||
ou25_pick: str = ''
|
||||
ou25_confidence: float = 0.0
|
||||
|
||||
# BTTS predictions
|
||||
btts_yes_prob: float = 0.0
|
||||
btts_no_prob: float = 0.0
|
||||
btts_pick: str = ''
|
||||
btts_confidence: float = 0.0
|
||||
|
||||
# Value bets
|
||||
value_bets: List[ValueBet] = field(default_factory=list)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
'match_id': self.match_id,
|
||||
'home_team': self.home_team,
|
||||
'away_team': self.away_team,
|
||||
'ms': {
|
||||
'home_prob': round(self.home_prob * 100, 1),
|
||||
'draw_prob': round(self.draw_prob * 100, 1),
|
||||
'away_prob': round(self.away_prob * 100, 1),
|
||||
'pick': self.ms_pick,
|
||||
'confidence': round(self.ms_confidence, 1),
|
||||
},
|
||||
'ou25': {
|
||||
'over_prob': round(self.over_prob * 100, 1),
|
||||
'under_prob': round(self.under_prob * 100, 1),
|
||||
'pick': self.ou25_pick,
|
||||
'confidence': round(self.ou25_confidence, 1),
|
||||
},
|
||||
'btts': {
|
||||
'yes_prob': round(self.btts_yes_prob * 100, 1),
|
||||
'no_prob': round(self.btts_no_prob * 100, 1),
|
||||
'pick': self.btts_pick,
|
||||
'confidence': round(self.btts_confidence, 1),
|
||||
},
|
||||
'value_bets': [vb.to_dict() for vb in self.value_bets],
|
||||
}
|
||||
|
||||
|
||||
class V25Predictor:
|
||||
"""
|
||||
V25 Ensemble Predictor - NO TARGET LEAKAGE
|
||||
|
||||
Uses market-specific XGBoost and LightGBM models.
|
||||
Each market (MS, OU25, BTTS) has its own trained models.
|
||||
"""
|
||||
|
||||
# Feature columns (82 features, NO target leakage)
|
||||
FEATURE_COLS = [
|
||||
# ELO Features (8)
|
||||
'home_overall_elo', 'away_overall_elo', 'elo_diff',
|
||||
'home_home_elo', 'away_away_elo',
|
||||
'home_form_elo', 'away_form_elo', 'form_elo_diff',
|
||||
|
||||
# Form Features (12)
|
||||
'home_goals_avg', 'home_conceded_avg',
|
||||
'away_goals_avg', 'away_conceded_avg',
|
||||
'home_clean_sheet_rate', 'away_clean_sheet_rate',
|
||||
'home_scoring_rate', 'away_scoring_rate',
|
||||
'home_winning_streak', 'away_winning_streak',
|
||||
'home_unbeaten_streak', 'away_unbeaten_streak',
|
||||
|
||||
# H2H Features (6)
|
||||
'h2h_total_matches', 'h2h_home_win_rate', 'h2h_draw_rate',
|
||||
'h2h_avg_goals', 'h2h_btts_rate', 'h2h_over25_rate',
|
||||
|
||||
# Team Stats Features (8)
|
||||
'home_avg_possession', 'away_avg_possession',
|
||||
'home_avg_shots_on_target', 'away_avg_shots_on_target',
|
||||
'home_shot_conversion', 'away_shot_conversion',
|
||||
'home_avg_corners', 'away_avg_corners',
|
||||
|
||||
# Odds Features (24)
|
||||
'odds_ms_h', 'odds_ms_d', 'odds_ms_a',
|
||||
'implied_home', 'implied_draw', 'implied_away',
|
||||
'odds_ht_ms_h', 'odds_ht_ms_d', 'odds_ht_ms_a',
|
||||
'odds_ou05_o', 'odds_ou05_u',
|
||||
'odds_ou15_o', 'odds_ou15_u',
|
||||
'odds_ou25_o', 'odds_ou25_u',
|
||||
'odds_ou35_o', 'odds_ou35_u',
|
||||
'odds_ht_ou05_o', 'odds_ht_ou05_u',
|
||||
'odds_ht_ou15_o', 'odds_ht_ou15_u',
|
||||
'odds_btts_y', 'odds_btts_n',
|
||||
|
||||
# League Features (4)
|
||||
'home_xga', 'away_xga',
|
||||
'league_avg_goals', 'league_zero_goal_rate',
|
||||
|
||||
# Upset Engine (4)
|
||||
'upset_atmosphere', 'upset_motivation', 'upset_fatigue', 'upset_potential',
|
||||
|
||||
# Referee Engine (5)
|
||||
'referee_home_bias', 'referee_avg_goals', 'referee_cards_total',
|
||||
'referee_avg_yellow', 'referee_experience',
|
||||
|
||||
# Momentum Engine (3)
|
||||
'home_momentum_score', 'away_momentum_score', 'momentum_diff',
|
||||
|
||||
# Squad Features (9)
|
||||
'home_squad_quality', 'away_squad_quality', 'squad_diff',
|
||||
'home_key_players', 'away_key_players',
|
||||
'home_missing_impact', 'away_missing_impact',
|
||||
'home_goals_form', 'away_goals_form',
|
||||
]
|
||||
|
||||
# Model weights for ensemble
|
||||
DEFAULT_WEIGHTS = {
|
||||
'xgb': 0.50,
|
||||
'lgb': 0.50,
|
||||
}
|
||||
|
||||
def __init__(self, models_dir: str = None):
|
||||
"""
|
||||
Initialize V25 Predictor.
|
||||
|
||||
Args:
|
||||
models_dir: Directory containing model files. Defaults to v25/ directory.
|
||||
"""
|
||||
self.models_dir = models_dir or MODELS_DIR
|
||||
self.models = {} # market -> {'xgb': model, 'lgb': model}
|
||||
self._loaded = False
|
||||
|
||||
# All trained market models available in V25
|
||||
ALL_MARKETS = [
|
||||
'ms', 'ou25', 'btts', # Core markets
|
||||
'ou15', 'ou35', # Additional OU lines
|
||||
'ht_result', 'ht_ou05', 'ht_ou15', # HT markets
|
||||
'htft', # HT/FT combo
|
||||
'cards_ou45', # Cards market
|
||||
'handicap_ms', # Handicap
|
||||
'odd_even', # Odd/Even goals
|
||||
]
|
||||
|
||||
# Multi-class markets (output > 2 classes)
|
||||
MULTICLASS_MARKETS = {'ms', 'ht_result', 'htft', 'handicap_ms'}
|
||||
|
||||
def load_models(self) -> bool:
|
||||
"""Load all market-specific models from disk."""
|
||||
try:
|
||||
loaded_count = 0
|
||||
|
||||
for market in self.ALL_MARKETS:
|
||||
self.models[market] = {}
|
||||
|
||||
# Load XGBoost (read content in Python to avoid non-ASCII path issues)
|
||||
xgb_path = os.path.join(self.models_dir, f'xgb_v25_{market}.json')
|
||||
if os.path.exists(xgb_path) and os.path.getsize(xgb_path) > 0:
|
||||
with open(xgb_path, 'r', encoding='utf-8') as f:
|
||||
xgb_content = f.read()
|
||||
booster = xgb.Booster()
|
||||
booster.load_model(bytearray(xgb_content, 'utf-8'))
|
||||
self.models[market]['xgb'] = booster
|
||||
loaded_count += 1
|
||||
|
||||
# Load LightGBM (read content in Python to avoid non-ASCII path issues)
|
||||
lgb_path = os.path.join(self.models_dir, f'lgb_v25_{market}.txt')
|
||||
if os.path.exists(lgb_path) and os.path.getsize(lgb_path) > 0:
|
||||
with open(lgb_path, 'r', encoding='utf-8') as f:
|
||||
model_str = f.read()
|
||||
self.models[market]['lgb'] = lgb.Booster(model_str=model_str)
|
||||
loaded_count += 1
|
||||
|
||||
# Remove empty entries
|
||||
if not self.models[market]:
|
||||
del self.models[market]
|
||||
|
||||
print(f"[V25] Loaded {loaded_count} model files across {len(self.models)} markets: {list(self.models.keys())}")
|
||||
self._loaded = loaded_count > 0
|
||||
return self._loaded
|
||||
|
||||
except Exception as e:
|
||||
print(f"[ERROR] Error loading models: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
def _ensure_loaded(self):
|
||||
"""Ensure models are loaded before prediction."""
|
||||
if not self._loaded:
|
||||
if not self.load_models():
|
||||
raise RuntimeError("Failed to load V25 models")
|
||||
|
||||
def _prepare_features(self, features: Dict[str, float]) -> pd.DataFrame:
|
||||
"""Prepare feature vector for prediction."""
|
||||
X = pd.DataFrame([{col: features.get(col, 0.0) for col in self.FEATURE_COLS}])
|
||||
return X
|
||||
|
||||
def predict_ms(self, features: Dict[str, float]) -> tuple:
|
||||
"""
|
||||
Predict match result (1X2).
|
||||
|
||||
Returns:
|
||||
(home_prob, draw_prob, away_prob)
|
||||
"""
|
||||
self._ensure_loaded()
|
||||
|
||||
X = self._prepare_features(features)
|
||||
probs = []
|
||||
|
||||
# XGBoost
|
||||
if 'xgb' in self.models.get('ms', {}):
|
||||
dmat = xgb.DMatrix(X)
|
||||
xgb_proba = self.models['ms']['xgb'].predict(dmat)
|
||||
if len(xgb_proba.shape) == 1:
|
||||
xgb_proba = np.array([xgb_proba])
|
||||
probs.append(xgb_proba[0] * self.DEFAULT_WEIGHTS['xgb'])
|
||||
|
||||
# LightGBM
|
||||
if 'lgb' in self.models.get('ms', {}):
|
||||
lgb_proba = self.models['ms']['lgb'].predict(X)
|
||||
if len(lgb_proba.shape) == 2:
|
||||
probs.append(lgb_proba[0] * self.DEFAULT_WEIGHTS['lgb'])
|
||||
|
||||
if not probs:
|
||||
return 0.33, 0.33, 0.33
|
||||
|
||||
ensemble_proba = np.sum(probs, axis=0)
|
||||
ensemble_proba = ensemble_proba / ensemble_proba.sum()
|
||||
|
||||
return float(ensemble_proba[0]), float(ensemble_proba[1]), float(ensemble_proba[2])
|
||||
|
||||
def predict_ou25(self, features: Dict[str, float]) -> tuple:
|
||||
"""
|
||||
Predict Over/Under 2.5 goals.
|
||||
|
||||
Returns:
|
||||
(over_prob, under_prob)
|
||||
"""
|
||||
self._ensure_loaded()
|
||||
|
||||
X = self._prepare_features(features)
|
||||
probs = []
|
||||
|
||||
# XGBoost
|
||||
if 'xgb' in self.models.get('ou25', {}):
|
||||
dmat = xgb.DMatrix(X)
|
||||
xgb_proba = self.models['ou25']['xgb'].predict(dmat)
|
||||
if isinstance(xgb_proba, np.ndarray) and len(xgb_proba.shape) == 1:
|
||||
probs.append(xgb_proba[0])
|
||||
|
||||
# LightGBM
|
||||
if 'lgb' in self.models.get('ou25', {}):
|
||||
lgb_proba = self.models['ou25']['lgb'].predict(X)
|
||||
if isinstance(lgb_proba, np.ndarray):
|
||||
probs.append(lgb_proba[0])
|
||||
|
||||
if not probs:
|
||||
return 0.5, 0.5
|
||||
|
||||
# Average probability
|
||||
avg_prob = np.mean(probs)
|
||||
|
||||
return float(avg_prob), float(1 - avg_prob)
|
||||
|
||||
def predict_btts(self, features: Dict[str, float]) -> tuple:
|
||||
"""
|
||||
Predict Both Teams To Score.
|
||||
|
||||
Returns:
|
||||
(yes_prob, no_prob)
|
||||
"""
|
||||
self._ensure_loaded()
|
||||
|
||||
X = self._prepare_features(features)
|
||||
probs = []
|
||||
|
||||
# XGBoost
|
||||
if 'xgb' in self.models.get('btts', {}):
|
||||
dmat = xgb.DMatrix(X)
|
||||
xgb_proba = self.models['btts']['xgb'].predict(dmat)
|
||||
if isinstance(xgb_proba, np.ndarray) and len(xgb_proba.shape) == 1:
|
||||
probs.append(xgb_proba[0])
|
||||
|
||||
# LightGBM
|
||||
if 'lgb' in self.models.get('btts', {}):
|
||||
lgb_proba = self.models['btts']['lgb'].predict(X)
|
||||
if isinstance(lgb_proba, np.ndarray):
|
||||
probs.append(lgb_proba[0])
|
||||
|
||||
if not probs:
|
||||
return 0.5, 0.5
|
||||
|
||||
# Average probability
|
||||
avg_prob = np.mean(probs)
|
||||
|
||||
return float(avg_prob), float(1 - avg_prob)
|
||||
|
||||
def predict_market(self, market: str, features: Dict[str, float]) -> np.ndarray:
|
||||
"""
|
||||
Generic prediction for any loaded market.
|
||||
|
||||
Args:
|
||||
market: Market key (e.g. 'ht_result', 'htft', 'cards_ou45')
|
||||
features: Feature dictionary.
|
||||
|
||||
Returns:
|
||||
numpy array of probabilities.
|
||||
For binary markets: [positive_prob]
|
||||
For multi-class markets: [class0_prob, class1_prob, ...]
|
||||
"""
|
||||
self._ensure_loaded()
|
||||
|
||||
if market not in self.models:
|
||||
return None
|
||||
|
||||
X = self._prepare_features(features)
|
||||
probs = []
|
||||
weights = []
|
||||
is_multiclass = market in self.MULTICLASS_MARKETS
|
||||
|
||||
# XGBoost
|
||||
if 'xgb' in self.models[market]:
|
||||
dmat = xgb.DMatrix(X)
|
||||
xgb_proba = self.models[market]['xgb'].predict(dmat)
|
||||
if isinstance(xgb_proba, np.ndarray):
|
||||
if is_multiclass and len(xgb_proba.shape) == 2:
|
||||
probs.append(xgb_proba[0])
|
||||
elif is_multiclass and len(xgb_proba.shape) == 1:
|
||||
probs.append(xgb_proba)
|
||||
else:
|
||||
probs.append(np.array([xgb_proba[0]]))
|
||||
weights.append(self.DEFAULT_WEIGHTS['xgb'])
|
||||
|
||||
# LightGBM
|
||||
if 'lgb' in self.models[market]:
|
||||
lgb_proba = self.models[market]['lgb'].predict(X)
|
||||
if isinstance(lgb_proba, np.ndarray):
|
||||
if is_multiclass and len(lgb_proba.shape) == 2:
|
||||
probs.append(lgb_proba[0])
|
||||
elif is_multiclass and len(lgb_proba.shape) == 1:
|
||||
probs.append(lgb_proba)
|
||||
else:
|
||||
probs.append(np.array([lgb_proba[0]]))
|
||||
weights.append(self.DEFAULT_WEIGHTS['lgb'])
|
||||
|
||||
if not probs:
|
||||
return None
|
||||
|
||||
# Weighted average
|
||||
if len(probs) == 1:
|
||||
return probs[0]
|
||||
|
||||
total_w = sum(weights[:len(probs)])
|
||||
result = np.zeros_like(probs[0])
|
||||
for p, w in zip(probs, weights):
|
||||
result += p * (w / total_w)
|
||||
|
||||
# Normalize multi-class
|
||||
if is_multiclass and result.sum() > 0:
|
||||
result = result / result.sum()
|
||||
|
||||
return result
|
||||
|
||||
def has_market(self, market: str) -> bool:
|
||||
"""Check if a specific market model is loaded."""
|
||||
return market in self.models
|
||||
|
||||
def predict_match(
|
||||
self,
|
||||
match_id: str,
|
||||
home_team: str,
|
||||
away_team: str,
|
||||
features: Dict[str, float],
|
||||
odds: Optional[Dict[str, float]] = None,
|
||||
) -> MatchPrediction:
|
||||
"""
|
||||
Predict all markets for a match.
|
||||
|
||||
Args:
|
||||
match_id: Match identifier.
|
||||
home_team: Home team name.
|
||||
away_team: Away team name.
|
||||
features: Feature dictionary.
|
||||
odds: Optional odds dictionary for value bet detection.
|
||||
|
||||
Returns:
|
||||
MatchPrediction object.
|
||||
"""
|
||||
# Get predictions for each market
|
||||
home_prob, draw_prob, away_prob = self.predict_ms(features)
|
||||
over_prob, under_prob = self.predict_ou25(features)
|
||||
btts_yes_prob, btts_no_prob = self.predict_btts(features)
|
||||
|
||||
# Determine picks
|
||||
ms_probs = {'1': home_prob, 'X': draw_prob, '2': away_prob}
|
||||
ms_pick = max(ms_probs, key=ms_probs.get)
|
||||
ms_confidence = ms_probs[ms_pick] * 100
|
||||
|
||||
ou25_probs = {'Over': over_prob, 'Under': under_prob}
|
||||
ou25_pick = max(ou25_probs, key=ou25_probs.get)
|
||||
ou25_confidence = ou25_probs[ou25_pick] * 100
|
||||
|
||||
btts_probs = {'Yes': btts_yes_prob, 'No': btts_no_prob}
|
||||
btts_pick = max(btts_probs, key=btts_probs.get)
|
||||
btts_confidence = btts_probs[btts_pick] * 100
|
||||
|
||||
# Create prediction
|
||||
prediction = MatchPrediction(
|
||||
match_id=match_id,
|
||||
home_team=home_team,
|
||||
away_team=away_team,
|
||||
home_prob=home_prob,
|
||||
draw_prob=draw_prob,
|
||||
away_prob=away_prob,
|
||||
ms_pick=ms_pick,
|
||||
ms_confidence=ms_confidence,
|
||||
over_prob=over_prob,
|
||||
under_prob=under_prob,
|
||||
ou25_pick=ou25_pick,
|
||||
ou25_confidence=ou25_confidence,
|
||||
btts_yes_prob=btts_yes_prob,
|
||||
btts_no_prob=btts_no_prob,
|
||||
btts_pick=btts_pick,
|
||||
btts_confidence=btts_confidence,
|
||||
)
|
||||
|
||||
# Detect value bets
|
||||
if odds:
|
||||
prediction.value_bets = self._detect_value_bets(
|
||||
prediction, odds, home_prob, draw_prob, away_prob,
|
||||
over_prob, under_prob, btts_yes_prob, btts_no_prob
|
||||
)
|
||||
|
||||
return prediction
|
||||
|
||||
def _detect_value_bets(
|
||||
self,
|
||||
prediction: MatchPrediction,
|
||||
odds: Dict[str, float],
|
||||
home_prob: float,
|
||||
draw_prob: float,
|
||||
away_prob: float,
|
||||
over_prob: float,
|
||||
under_prob: float,
|
||||
btts_yes_prob: float,
|
||||
btts_no_prob: float,
|
||||
) -> List[ValueBet]:
|
||||
"""Detect value bets based on model vs market odds."""
|
||||
value_bets = []
|
||||
min_edge = 0.05 # 5% minimum edge
|
||||
|
||||
# MS value bets
|
||||
if 'ms_h' in odds and odds['ms_h'] > 0:
|
||||
implied = 1 / odds['ms_h']
|
||||
edge = home_prob - implied
|
||||
if edge > min_edge:
|
||||
value_bets.append(ValueBet(
|
||||
market_type='MS',
|
||||
pick='1',
|
||||
probability=home_prob,
|
||||
odds=odds['ms_h'],
|
||||
edge=edge,
|
||||
confidence=home_prob * 100,
|
||||
))
|
||||
|
||||
if 'ms_d' in odds and odds['ms_d'] > 0:
|
||||
implied = 1 / odds['ms_d']
|
||||
edge = draw_prob - implied
|
||||
if edge > min_edge:
|
||||
value_bets.append(ValueBet(
|
||||
market_type='MS',
|
||||
pick='X',
|
||||
probability=draw_prob,
|
||||
odds=odds['ms_d'],
|
||||
edge=edge,
|
||||
confidence=draw_prob * 100,
|
||||
))
|
||||
|
||||
if 'ms_a' in odds and odds['ms_a'] > 0:
|
||||
implied = 1 / odds['ms_a']
|
||||
edge = away_prob - implied
|
||||
if edge > min_edge:
|
||||
value_bets.append(ValueBet(
|
||||
market_type='MS',
|
||||
pick='2',
|
||||
probability=away_prob,
|
||||
odds=odds['ms_a'],
|
||||
edge=edge,
|
||||
confidence=away_prob * 100,
|
||||
))
|
||||
|
||||
# OU25 value bets
|
||||
if 'ou25_o' in odds and odds['ou25_o'] > 0:
|
||||
implied = 1 / odds['ou25_o']
|
||||
edge = over_prob - implied
|
||||
if edge > min_edge:
|
||||
value_bets.append(ValueBet(
|
||||
market_type='OU25',
|
||||
pick='Over',
|
||||
probability=over_prob,
|
||||
odds=odds['ou25_o'],
|
||||
edge=edge,
|
||||
confidence=over_prob * 100,
|
||||
))
|
||||
|
||||
if 'ou25_u' in odds and odds['ou25_u'] > 0:
|
||||
implied = 1 / odds['ou25_u']
|
||||
edge = under_prob - implied
|
||||
if edge > min_edge:
|
||||
value_bets.append(ValueBet(
|
||||
market_type='OU25',
|
||||
pick='Under',
|
||||
probability=under_prob,
|
||||
odds=odds['ou25_u'],
|
||||
edge=edge,
|
||||
confidence=under_prob * 100,
|
||||
))
|
||||
|
||||
# BTTS value bets
|
||||
if 'btts_y' in odds and odds['btts_y'] > 0:
|
||||
implied = 1 / odds['btts_y']
|
||||
edge = btts_yes_prob - implied
|
||||
if edge > min_edge:
|
||||
value_bets.append(ValueBet(
|
||||
market_type='BTTS',
|
||||
pick='Yes',
|
||||
probability=btts_yes_prob,
|
||||
odds=odds['btts_y'],
|
||||
edge=edge,
|
||||
confidence=btts_yes_prob * 100,
|
||||
))
|
||||
|
||||
if 'btts_n' in odds and odds['btts_n'] > 0:
|
||||
implied = 1 / odds['btts_n']
|
||||
edge = btts_no_prob - implied
|
||||
if edge > min_edge:
|
||||
value_bets.append(ValueBet(
|
||||
market_type='BTTS',
|
||||
pick='No',
|
||||
probability=btts_no_prob,
|
||||
odds=odds['btts_n'],
|
||||
edge=edge,
|
||||
confidence=btts_no_prob * 100,
|
||||
))
|
||||
|
||||
return value_bets
|
||||
|
||||
|
||||
# Singleton instance
|
||||
_v25_predictor: Optional[V25Predictor] = None
|
||||
|
||||
|
||||
def get_v25_predictor() -> V25Predictor:
|
||||
"""Get or create V25 predictor instance."""
|
||||
global _v25_predictor
|
||||
if _v25_predictor is None:
|
||||
_v25_predictor = V25Predictor()
|
||||
_v25_predictor.load_models()
|
||||
return _v25_predictor
|
||||
@@ -0,0 +1,291 @@
|
||||
"""
|
||||
V27 Pro Predictor — Odds-Free Fundamentals + Value Edge Detection
|
||||
|
||||
This module loads V27 ensemble models (XGBoost, LightGBM, CatBoost)
|
||||
and produces market-independent probability estimates.
|
||||
|
||||
The key insight: V27 is trained WITHOUT odds features, so it produces
|
||||
"true" probabilities unbiased by market pricing. The divergence between
|
||||
V25 (odds-aware) and V27 (odds-free) predictions signals market mispricing.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import pickle
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
V27_DIR = Path(__file__).parent / "v27"
|
||||
|
||||
|
||||
class V27Predictor:
|
||||
"""
|
||||
Loads V27 ensemble models and provides predictions using the
|
||||
82-feature odds-free vector.
|
||||
"""
|
||||
|
||||
MARKETS = ["ms", "ou25"]
|
||||
|
||||
def __init__(self):
|
||||
self.models: Dict[str, Dict[str, object]] = {}
|
||||
self.feature_cols: List[str] = []
|
||||
self._loaded = False
|
||||
|
||||
def load_models(self) -> bool:
|
||||
"""Load all V27 ensemble models and feature column spec."""
|
||||
if self._loaded:
|
||||
return True
|
||||
|
||||
# Feature columns
|
||||
cols_path = V27_DIR / "v27_feature_cols.json"
|
||||
if not cols_path.exists():
|
||||
logger.error("[V27] Feature columns file not found: %s", cols_path)
|
||||
return False
|
||||
|
||||
try:
|
||||
with open(cols_path, "r", encoding="utf-8") as f:
|
||||
self.feature_cols = json.load(f)
|
||||
logger.info("[V27] Loaded %d feature columns", len(self.feature_cols))
|
||||
except Exception as e:
|
||||
logger.error("[V27] Failed to load feature columns: %s", e)
|
||||
return False
|
||||
|
||||
# Load models per market
|
||||
model_types = {"xgb": "xgb", "lgb": "lgb", "cb": "cb"}
|
||||
|
||||
for market in self.MARKETS:
|
||||
self.models[market] = {}
|
||||
for short, label in model_types.items():
|
||||
# Try market-specific file first: v27_ms_xgb.pkl
|
||||
path = V27_DIR / f"v27_{market}_{short}.pkl"
|
||||
if not path.exists():
|
||||
# Fallback to generic: v27_xgboost.pkl (for MS only)
|
||||
generic_names = {"xgb": "v27_xgboost.pkl", "lgb": "v27_lightgbm.pkl", "cb": "v27_catboost.pkl"}
|
||||
path = V27_DIR / generic_names.get(short, "")
|
||||
if not path.exists():
|
||||
logger.warning("[V27] Model file not found for %s/%s", market, short)
|
||||
continue
|
||||
|
||||
try:
|
||||
with open(path, "rb") as f:
|
||||
model = pickle.load(f)
|
||||
self.models[market][label] = model
|
||||
logger.info("[V27] ✓ Loaded %s/%s from %s", market, label, path.name)
|
||||
except Exception as e:
|
||||
logger.error("[V27] ✗ Failed to load %s/%s: %s", market, label, e)
|
||||
|
||||
loaded_count = sum(len(v) for v in self.models.values())
|
||||
if loaded_count == 0:
|
||||
logger.error("[V27] No models loaded!")
|
||||
return False
|
||||
|
||||
self._loaded = True
|
||||
logger.info("[V27] Total models loaded: %d across %d markets", loaded_count, len(self.models))
|
||||
return True
|
||||
|
||||
def _build_feature_array(self, features: Dict[str, float]) -> np.ndarray:
|
||||
"""
|
||||
Build ordered feature array from the full feature dict.
|
||||
V27 uses only its 82 features (odds-free subset).
|
||||
"""
|
||||
row = []
|
||||
for col in self.feature_cols:
|
||||
row.append(float(features.get(col, 0.0)))
|
||||
return np.array([row])
|
||||
|
||||
def _predict_with_model(self, model, X: np.ndarray, label: str, expected_classes: int) -> Optional[np.ndarray]:
|
||||
"""
|
||||
Predict probabilities from a model, handling both sklearn wrappers
|
||||
(predict_proba) and raw Booster objects (predict).
|
||||
|
||||
For raw XGBoost Boosters, DMatrix is created WITH feature_names
|
||||
to match the training schema.
|
||||
"""
|
||||
import xgboost as xgb
|
||||
import lightgbm as lgbm
|
||||
import pandas as pd
|
||||
|
||||
# 1. Try sklearn-style predict_proba first
|
||||
if hasattr(model, 'predict_proba'):
|
||||
try:
|
||||
proba = model.predict_proba(X)[0]
|
||||
if len(proba) == expected_classes:
|
||||
return proba
|
||||
logger.warning("[V27] %s predict_proba returned %d classes, expected %d", label, len(proba), expected_classes)
|
||||
except Exception:
|
||||
pass # Fall through to raw predict
|
||||
|
||||
# 2. Raw xgboost.Booster — MUST pass feature_names
|
||||
if isinstance(model, xgb.Booster):
|
||||
try:
|
||||
feature_names = self.feature_cols if self.feature_cols else None
|
||||
dmat = xgb.DMatrix(X, feature_names=feature_names)
|
||||
raw = model.predict(dmat)
|
||||
if isinstance(raw, np.ndarray):
|
||||
if raw.ndim == 2 and raw.shape[1] == expected_classes:
|
||||
return raw[0]
|
||||
elif raw.ndim == 1 and expected_classes == 2:
|
||||
p = float(raw[0])
|
||||
return np.array([1.0 - p, p])
|
||||
elif raw.ndim == 1 and len(raw) == expected_classes:
|
||||
return raw
|
||||
except Exception as e:
|
||||
logger.warning("[V27] %s xgb.Booster predict failed: %s", label, e)
|
||||
return None
|
||||
|
||||
# 3. Raw lightgbm.Booster — pass as DataFrame with column names
|
||||
if isinstance(model, lgbm.Booster):
|
||||
try:
|
||||
if self.feature_cols:
|
||||
X_named = pd.DataFrame(X, columns=self.feature_cols)
|
||||
raw = model.predict(X_named)
|
||||
else:
|
||||
raw = model.predict(X)
|
||||
if isinstance(raw, np.ndarray):
|
||||
if raw.ndim == 2 and raw.shape[1] == expected_classes:
|
||||
return raw[0]
|
||||
elif raw.ndim == 1 and expected_classes == 2:
|
||||
p = float(raw[0])
|
||||
return np.array([1.0 - p, p])
|
||||
elif raw.ndim == 1 and len(raw) == expected_classes:
|
||||
return raw
|
||||
except Exception as e:
|
||||
logger.warning("[V27] %s lgb.Booster predict failed: %s", label, e)
|
||||
return None
|
||||
|
||||
# 4. Generic fallback (CatBoost, etc.)
|
||||
try:
|
||||
if hasattr(model, 'predict'):
|
||||
raw = model.predict(X)
|
||||
if isinstance(raw, np.ndarray):
|
||||
if raw.ndim == 2 and raw.shape[1] == expected_classes:
|
||||
return raw[0]
|
||||
elif raw.ndim == 1 and expected_classes == 2:
|
||||
p = float(raw[0])
|
||||
return np.array([1.0 - p, p])
|
||||
elif raw.ndim == 1 and len(raw) == expected_classes:
|
||||
return raw
|
||||
except Exception as e:
|
||||
logger.warning("[V27] %s generic predict failed: %s", label, e)
|
||||
|
||||
return None
|
||||
|
||||
def predict_ms(self, features: Dict[str, float]) -> Optional[Dict[str, float]]:
|
||||
"""
|
||||
Predict Match Score probabilities (Home/Draw/Away).
|
||||
Returns dict with keys: home, draw, away.
|
||||
"""
|
||||
if not self._loaded or "ms" not in self.models or not self.models["ms"]:
|
||||
return None
|
||||
|
||||
X = self._build_feature_array(features)
|
||||
probs_list = []
|
||||
|
||||
for label, model in self.models["ms"].items():
|
||||
proba = self._predict_with_model(model, X, f"MS/{label}", expected_classes=3)
|
||||
if proba is not None and len(proba) == 3:
|
||||
probs_list.append(proba)
|
||||
|
||||
if not probs_list:
|
||||
return None
|
||||
|
||||
# Ensemble average
|
||||
avg = np.mean(probs_list, axis=0)
|
||||
return {
|
||||
"home": float(avg[0]),
|
||||
"draw": float(avg[1]),
|
||||
"away": float(avg[2]),
|
||||
}
|
||||
|
||||
def predict_ou25(self, features: Dict[str, float]) -> Optional[Dict[str, float]]:
|
||||
"""
|
||||
Predict Over/Under 2.5 probabilities.
|
||||
Returns dict with keys: under, over.
|
||||
"""
|
||||
if not self._loaded or "ou25" not in self.models or not self.models["ou25"]:
|
||||
return None
|
||||
|
||||
X = self._build_feature_array(features)
|
||||
probs_list = []
|
||||
|
||||
for label, model in self.models["ou25"].items():
|
||||
proba = self._predict_with_model(model, X, f"OU25/{label}", expected_classes=2)
|
||||
if proba is not None and len(proba) == 2:
|
||||
probs_list.append(proba)
|
||||
|
||||
if not probs_list:
|
||||
return None
|
||||
|
||||
avg = np.mean(probs_list, axis=0)
|
||||
return {
|
||||
"under": float(avg[0]),
|
||||
"over": float(avg[1]),
|
||||
}
|
||||
|
||||
def predict_all(self, features: Dict[str, float]) -> Dict[str, Optional[Dict[str, float]]]:
|
||||
"""Run predictions for all supported markets."""
|
||||
return {
|
||||
"ms": self.predict_ms(features),
|
||||
"ou25": self.predict_ou25(features),
|
||||
}
|
||||
|
||||
|
||||
def compute_divergence(
|
||||
v25_probs: Dict[str, float],
|
||||
v27_probs: Dict[str, float],
|
||||
) -> Dict[str, float]:
|
||||
"""
|
||||
Compute the divergence signal between V25 (odds-aware) and V27 (odds-free).
|
||||
|
||||
Positive divergence = V27 thinks it's MORE likely than the market → VALUE BET
|
||||
Negative divergence = V27 thinks it's LESS likely than the market → PASS
|
||||
|
||||
Returns per-outcome divergence values.
|
||||
"""
|
||||
divergence = {}
|
||||
for key in v27_probs:
|
||||
v25_val = v25_probs.get(key, 0.33)
|
||||
v27_val = v27_probs.get(key, 0.33)
|
||||
divergence[key] = round(v27_val - v25_val, 4)
|
||||
return divergence
|
||||
|
||||
|
||||
def compute_value_edge(
|
||||
v25_probs: Dict[str, float],
|
||||
v27_probs: Dict[str, float],
|
||||
odds: Dict[str, float],
|
||||
) -> Dict[str, Dict]:
|
||||
"""
|
||||
Detect value bets by combining V25/V27 divergence with odds.
|
||||
|
||||
A value bet exists when:
|
||||
1. V27 (odds-free) probability > implied odds probability (model says it's underpriced)
|
||||
2. V27 and V25 divergence is positive (V27 sees more signal than the market)
|
||||
|
||||
Returns per-outcome: { probability, implied_prob, edge, is_value }
|
||||
"""
|
||||
results = {}
|
||||
for key in v27_probs:
|
||||
v27_p = v27_probs[key]
|
||||
v25_p = v25_probs.get(key, 0.33)
|
||||
odds_val = odds.get(key, 0.0)
|
||||
|
||||
implied_p = (1.0 / odds_val) if odds_val > 1.01 else 0.0
|
||||
divergence = v27_p - v25_p
|
||||
edge = v27_p - implied_p if implied_p > 0 else 0.0
|
||||
|
||||
results[key] = {
|
||||
"v27_prob": round(v27_p, 4),
|
||||
"v25_prob": round(v25_p, 4),
|
||||
"implied_prob": round(implied_p, 4),
|
||||
"divergence": round(divergence, 4),
|
||||
"edge": round(edge, 4),
|
||||
"is_value": edge > 0.05 and divergence > 0.02, # 5% edge + 2% divergence
|
||||
}
|
||||
|
||||
return results
|
||||
Reference in New Issue
Block a user