v28
Deploy Iddaai Backend / build-and-deploy (push) Successful in 3m21s

This commit is contained in:
2026-04-24 23:46:28 +03:00
parent 3875f2a512
commit 9027cc9900
17 changed files with 4315 additions and 122 deletions
+291
View File
@@ -0,0 +1,291 @@
"""
V27 Pro Predictor — Odds-Free Fundamentals + Value Edge Detection
This module loads V27 ensemble models (XGBoost, LightGBM, CatBoost)
and produces market-independent probability estimates.
The key insight: V27 is trained WITHOUT odds features, so it produces
"true" probabilities unbiased by market pricing. The divergence between
V25 (odds-aware) and V27 (odds-free) predictions signals market mispricing.
"""
import json
import logging
import os
import pickle
from pathlib import Path
from typing import Dict, List, Optional, Tuple
import numpy as np
logger = logging.getLogger(__name__)
V27_DIR = Path(__file__).parent / "v27"
class V27Predictor:
"""
Loads V27 ensemble models and provides predictions using the
82-feature odds-free vector.
"""
MARKETS = ["ms", "ou25"]
def __init__(self):
self.models: Dict[str, Dict[str, object]] = {}
self.feature_cols: List[str] = []
self._loaded = False
def load_models(self) -> bool:
"""Load all V27 ensemble models and feature column spec."""
if self._loaded:
return True
# Feature columns
cols_path = V27_DIR / "v27_feature_cols.json"
if not cols_path.exists():
logger.error("[V27] Feature columns file not found: %s", cols_path)
return False
try:
with open(cols_path, "r", encoding="utf-8") as f:
self.feature_cols = json.load(f)
logger.info("[V27] Loaded %d feature columns", len(self.feature_cols))
except Exception as e:
logger.error("[V27] Failed to load feature columns: %s", e)
return False
# Load models per market
model_types = {"xgb": "xgb", "lgb": "lgb", "cb": "cb"}
for market in self.MARKETS:
self.models[market] = {}
for short, label in model_types.items():
# Try market-specific file first: v27_ms_xgb.pkl
path = V27_DIR / f"v27_{market}_{short}.pkl"
if not path.exists():
# Fallback to generic: v27_xgboost.pkl (for MS only)
generic_names = {"xgb": "v27_xgboost.pkl", "lgb": "v27_lightgbm.pkl", "cb": "v27_catboost.pkl"}
path = V27_DIR / generic_names.get(short, "")
if not path.exists():
logger.warning("[V27] Model file not found for %s/%s", market, short)
continue
try:
with open(path, "rb") as f:
model = pickle.load(f)
self.models[market][label] = model
logger.info("[V27] ✓ Loaded %s/%s from %s", market, label, path.name)
except Exception as e:
logger.error("[V27] ✗ Failed to load %s/%s: %s", market, label, e)
loaded_count = sum(len(v) for v in self.models.values())
if loaded_count == 0:
logger.error("[V27] No models loaded!")
return False
self._loaded = True
logger.info("[V27] Total models loaded: %d across %d markets", loaded_count, len(self.models))
return True
def _build_feature_array(self, features: Dict[str, float]) -> np.ndarray:
"""
Build ordered feature array from the full feature dict.
V27 uses only its 82 features (odds-free subset).
"""
row = []
for col in self.feature_cols:
row.append(float(features.get(col, 0.0)))
return np.array([row])
def _predict_with_model(self, model, X: np.ndarray, label: str, expected_classes: int) -> Optional[np.ndarray]:
"""
Predict probabilities from a model, handling both sklearn wrappers
(predict_proba) and raw Booster objects (predict).
For raw XGBoost Boosters, DMatrix is created WITH feature_names
to match the training schema.
"""
import xgboost as xgb
import lightgbm as lgbm
import pandas as pd
# 1. Try sklearn-style predict_proba first
if hasattr(model, 'predict_proba'):
try:
proba = model.predict_proba(X)[0]
if len(proba) == expected_classes:
return proba
logger.warning("[V27] %s predict_proba returned %d classes, expected %d", label, len(proba), expected_classes)
except Exception:
pass # Fall through to raw predict
# 2. Raw xgboost.Booster — MUST pass feature_names
if isinstance(model, xgb.Booster):
try:
feature_names = self.feature_cols if self.feature_cols else None
dmat = xgb.DMatrix(X, feature_names=feature_names)
raw = model.predict(dmat)
if isinstance(raw, np.ndarray):
if raw.ndim == 2 and raw.shape[1] == expected_classes:
return raw[0]
elif raw.ndim == 1 and expected_classes == 2:
p = float(raw[0])
return np.array([1.0 - p, p])
elif raw.ndim == 1 and len(raw) == expected_classes:
return raw
except Exception as e:
logger.warning("[V27] %s xgb.Booster predict failed: %s", label, e)
return None
# 3. Raw lightgbm.Booster — pass as DataFrame with column names
if isinstance(model, lgbm.Booster):
try:
if self.feature_cols:
X_named = pd.DataFrame(X, columns=self.feature_cols)
raw = model.predict(X_named)
else:
raw = model.predict(X)
if isinstance(raw, np.ndarray):
if raw.ndim == 2 and raw.shape[1] == expected_classes:
return raw[0]
elif raw.ndim == 1 and expected_classes == 2:
p = float(raw[0])
return np.array([1.0 - p, p])
elif raw.ndim == 1 and len(raw) == expected_classes:
return raw
except Exception as e:
logger.warning("[V27] %s lgb.Booster predict failed: %s", label, e)
return None
# 4. Generic fallback (CatBoost, etc.)
try:
if hasattr(model, 'predict'):
raw = model.predict(X)
if isinstance(raw, np.ndarray):
if raw.ndim == 2 and raw.shape[1] == expected_classes:
return raw[0]
elif raw.ndim == 1 and expected_classes == 2:
p = float(raw[0])
return np.array([1.0 - p, p])
elif raw.ndim == 1 and len(raw) == expected_classes:
return raw
except Exception as e:
logger.warning("[V27] %s generic predict failed: %s", label, e)
return None
def predict_ms(self, features: Dict[str, float]) -> Optional[Dict[str, float]]:
"""
Predict Match Score probabilities (Home/Draw/Away).
Returns dict with keys: home, draw, away.
"""
if not self._loaded or "ms" not in self.models or not self.models["ms"]:
return None
X = self._build_feature_array(features)
probs_list = []
for label, model in self.models["ms"].items():
proba = self._predict_with_model(model, X, f"MS/{label}", expected_classes=3)
if proba is not None and len(proba) == 3:
probs_list.append(proba)
if not probs_list:
return None
# Ensemble average
avg = np.mean(probs_list, axis=0)
return {
"home": float(avg[0]),
"draw": float(avg[1]),
"away": float(avg[2]),
}
def predict_ou25(self, features: Dict[str, float]) -> Optional[Dict[str, float]]:
"""
Predict Over/Under 2.5 probabilities.
Returns dict with keys: under, over.
"""
if not self._loaded or "ou25" not in self.models or not self.models["ou25"]:
return None
X = self._build_feature_array(features)
probs_list = []
for label, model in self.models["ou25"].items():
proba = self._predict_with_model(model, X, f"OU25/{label}", expected_classes=2)
if proba is not None and len(proba) == 2:
probs_list.append(proba)
if not probs_list:
return None
avg = np.mean(probs_list, axis=0)
return {
"under": float(avg[0]),
"over": float(avg[1]),
}
def predict_all(self, features: Dict[str, float]) -> Dict[str, Optional[Dict[str, float]]]:
"""Run predictions for all supported markets."""
return {
"ms": self.predict_ms(features),
"ou25": self.predict_ou25(features),
}
def compute_divergence(
v25_probs: Dict[str, float],
v27_probs: Dict[str, float],
) -> Dict[str, float]:
"""
Compute the divergence signal between V25 (odds-aware) and V27 (odds-free).
Positive divergence = V27 thinks it's MORE likely than the market → VALUE BET
Negative divergence = V27 thinks it's LESS likely than the market → PASS
Returns per-outcome divergence values.
"""
divergence = {}
for key in v27_probs:
v25_val = v25_probs.get(key, 0.33)
v27_val = v27_probs.get(key, 0.33)
divergence[key] = round(v27_val - v25_val, 4)
return divergence
def compute_value_edge(
v25_probs: Dict[str, float],
v27_probs: Dict[str, float],
odds: Dict[str, float],
) -> Dict[str, Dict]:
"""
Detect value bets by combining V25/V27 divergence with odds.
A value bet exists when:
1. V27 (odds-free) probability > implied odds probability (model says it's underpriced)
2. V27 and V25 divergence is positive (V27 sees more signal than the market)
Returns per-outcome: { probability, implied_prob, edge, is_value }
"""
results = {}
for key in v27_probs:
v27_p = v27_probs[key]
v25_p = v25_probs.get(key, 0.33)
odds_val = odds.get(key, 0.0)
implied_p = (1.0 / odds_val) if odds_val > 1.01 else 0.0
divergence = v27_p - v25_p
edge = v27_p - implied_p if implied_p > 0 else 0.0
results[key] = {
"v27_prob": round(v27_p, 4),
"v25_prob": round(v25_p, 4),
"implied_prob": round(implied_p, 4),
"divergence": round(divergence, 4),
"edge": round(edge, 4),
"is_value": edge > 0.05 and divergence > 0.02, # 5% edge + 2% divergence
}
return results