@@ -46,6 +46,9 @@ SUPPORTED_MARKETS = [
|
||||
"ht_ft", # Half-Time/Full-Time
|
||||
"dc", # Double Chance
|
||||
"ht", # Half-Time Result
|
||||
"ht_home", # Half-Time Home win
|
||||
"ht_draw", # Half-Time Draw
|
||||
"ht_away", # Half-Time Away win
|
||||
]
|
||||
|
||||
|
||||
@@ -111,6 +114,9 @@ class Calibrator:
|
||||
"ht_ft": 0.92,
|
||||
"dc": 0.97,
|
||||
"ht": 0.92,
|
||||
"ht_home": 0.92,
|
||||
"ht_draw": 0.92,
|
||||
"ht_away": 0.92,
|
||||
}
|
||||
self._load_calibrators()
|
||||
|
||||
|
||||
@@ -0,0 +1,191 @@
|
||||
"""
|
||||
League-Specific Model Loader
|
||||
=============================
|
||||
Loads per-league XGBoost models + isotonic calibrators trained by
|
||||
scripts/train_league_models.py and provides a unified prediction interface.
|
||||
|
||||
Falls back to general V25 for any market/league without a dedicated model.
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import pickle
|
||||
from functools import lru_cache
|
||||
from typing import Dict, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import xgboost as xgb
|
||||
|
||||
AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
LEAGUE_MODEL_DIR = os.path.join(AI_ENGINE_DIR, "models", "league_specific")
|
||||
|
||||
# Market file name → (num_class, label_list)
|
||||
MARKET_META: Dict[str, Tuple[int, list]] = {
|
||||
"ms": (3, ["1", "X", "2"]),
|
||||
"ou15": (2, ["Over", "Under"]),
|
||||
"ou25": (2, ["Over", "Under"]),
|
||||
"ou35": (2, ["Over", "Under"]),
|
||||
"btts": (2, ["Yes", "No"]),
|
||||
"ht": (3, ["1", "X", "2"]),
|
||||
"ht_ou05": (2, ["Over", "Under"]),
|
||||
"ht_ou15": (2, ["Over", "Under"]),
|
||||
"htft": (9, ["1/1","1/X","1/2","X/1","X/X","X/2","2/1","2/X","2/2"]),
|
||||
"oe": (2, ["Odd", "Even"]),
|
||||
"cards": (2, ["Over", "Under"]),
|
||||
"handicap": (3, ["1", "X", "2"]),
|
||||
}
|
||||
|
||||
# Signal key map (file key → uppercase signal key used in _get_v25_signal)
|
||||
FILE_TO_SIGNAL = {
|
||||
"ms": "MS", "ou15": "OU15", "ou25": "OU25", "ou35": "OU35",
|
||||
"btts": "BTTS", "ht": "HT", "ht_ou05": "HT_OU05", "ht_ou15": "HT_OU15",
|
||||
"htft": "HTFT", "oe": "OE", "cards": "CARDS", "handicap": "HCAP",
|
||||
}
|
||||
|
||||
|
||||
class LeagueModel:
|
||||
"""Holds XGBoost models + isotonic calibrators for one league."""
|
||||
|
||||
def __init__(self, league_id: str):
|
||||
self.league_id = league_id
|
||||
self.league_dir = os.path.join(LEAGUE_MODEL_DIR, league_id)
|
||||
self.models: Dict[str, xgb.Booster] = {} # market_key → booster
|
||||
self.calibrators: Dict[str, object] = {} # cal_key → isotonic
|
||||
self.feature_cols: Optional[list] = None
|
||||
self._loaded = False
|
||||
|
||||
def load(self) -> bool:
|
||||
if not os.path.isdir(self.league_dir):
|
||||
return False
|
||||
try:
|
||||
fc_path = os.path.join(self.league_dir, "feature_cols.json")
|
||||
if os.path.exists(fc_path):
|
||||
with open(fc_path) as f:
|
||||
self.feature_cols = json.load(f)
|
||||
|
||||
for mkey in MARKET_META:
|
||||
xgb_path = os.path.join(self.league_dir, f"xgb_{mkey}.json")
|
||||
if os.path.exists(xgb_path) and os.path.getsize(xgb_path) > 100:
|
||||
b = xgb.Booster()
|
||||
b.load_model(xgb_path)
|
||||
self.models[mkey] = b
|
||||
|
||||
for fname in os.listdir(self.league_dir):
|
||||
if fname.startswith("cal_") and fname.endswith(".pkl"):
|
||||
cal_key = fname[4:-4] # strip cal_ and .pkl
|
||||
with open(os.path.join(self.league_dir, fname), "rb") as f:
|
||||
self.calibrators[cal_key] = pickle.load(f)
|
||||
|
||||
self._loaded = bool(self.models or self.calibrators)
|
||||
return self._loaded
|
||||
except Exception as e:
|
||||
print(f"[LeagueModel] Load failed for {self.league_id}: {e}")
|
||||
return False
|
||||
|
||||
def has_market(self, mkey: str) -> bool:
|
||||
return mkey in self.models
|
||||
|
||||
def predict_market(
|
||||
self,
|
||||
mkey: str,
|
||||
feature_row: Dict[str, float],
|
||||
) -> Optional[Dict[str, float]]:
|
||||
"""
|
||||
Predict one market using league-specific XGBoost + isotonic calibration.
|
||||
Returns {label: prob} dict or None if no model available.
|
||||
"""
|
||||
if mkey not in self.models:
|
||||
return None
|
||||
|
||||
num_class, labels = MARKET_META[mkey]
|
||||
fc = self.feature_cols
|
||||
if fc is None:
|
||||
# Fallback to whatever the booster expects (it knows its feature names)
|
||||
fc = list(self.models[mkey].feature_names or [])
|
||||
|
||||
try:
|
||||
X = pd.DataFrame([{col: feature_row.get(col, 0.0) for col in fc}])
|
||||
dmat = xgb.DMatrix(X)
|
||||
raw = self.models[mkey].predict(dmat)
|
||||
|
||||
if num_class > 2:
|
||||
probs_arr = raw.reshape(-1, num_class)[0]
|
||||
probs = {labels[i]: float(probs_arr[i]) for i in range(num_class)}
|
||||
# Apply isotonic calibration per class
|
||||
cal_total = 0.0
|
||||
for i, label in enumerate(labels):
|
||||
cal_key = f"{mkey}_{i}"
|
||||
if cal_key in self.calibrators:
|
||||
p_cal = float(self.calibrators[cal_key].predict([probs_arr[i]])[0])
|
||||
probs[label] = max(0.01, min(0.99, p_cal))
|
||||
cal_total += probs[label]
|
||||
if cal_total > 0:
|
||||
probs = {k: v / cal_total for k, v in probs.items()}
|
||||
else:
|
||||
p = float(raw[0])
|
||||
cal_key = mkey
|
||||
if cal_key in self.calibrators:
|
||||
p = float(self.calibrators[cal_key].predict([p])[0])
|
||||
p = max(0.01, min(0.99, p))
|
||||
probs = {labels[0]: p, labels[1]: 1.0 - p}
|
||||
|
||||
return probs
|
||||
except Exception as e:
|
||||
print(f"[LeagueModel] predict_market({mkey}) failed for {self.league_id}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
class LeagueModelLoader:
|
||||
"""
|
||||
In-memory cache for league-specific models.
|
||||
Thread-safe for single-process async servers (FastAPI/uvicorn).
|
||||
"""
|
||||
|
||||
def __init__(self, max_cached: int = 80):
|
||||
self._cache: Dict[str, Optional[LeagueModel]] = {}
|
||||
self._max_cached = max_cached
|
||||
|
||||
def get(self, league_id: str) -> Optional[LeagueModel]:
|
||||
"""Return loaded LeagueModel for this league, or None if unavailable."""
|
||||
if league_id in self._cache:
|
||||
return self._cache[league_id]
|
||||
|
||||
# Evict oldest entry if cache is full
|
||||
if len(self._cache) >= self._max_cached:
|
||||
oldest = next(iter(self._cache))
|
||||
del self._cache[oldest]
|
||||
|
||||
model = LeagueModel(league_id)
|
||||
loaded = model.load()
|
||||
self._cache[league_id] = model if loaded else None
|
||||
if loaded:
|
||||
n_models = len(model.models)
|
||||
n_cals = len(model.calibrators)
|
||||
print(f"[LeagueModel] Loaded {league_id}: {n_models} XGB models, {n_cals} calibrators")
|
||||
return self._cache[league_id]
|
||||
|
||||
def available_leagues(self) -> list:
|
||||
if not os.path.isdir(LEAGUE_MODEL_DIR):
|
||||
return []
|
||||
return [d for d in os.listdir(LEAGUE_MODEL_DIR)
|
||||
if os.path.isdir(os.path.join(LEAGUE_MODEL_DIR, d))]
|
||||
|
||||
def readiness_summary(self) -> dict:
|
||||
leagues = self.available_leagues()
|
||||
return {
|
||||
"league_specific_dir": LEAGUE_MODEL_DIR,
|
||||
"available_leagues": len(leagues),
|
||||
"cached": len([v for v in self._cache.values() if v is not None]),
|
||||
}
|
||||
|
||||
|
||||
# ── Singleton ──────────────────────────────────────────────────────
|
||||
_loader: Optional[LeagueModelLoader] = None
|
||||
|
||||
|
||||
def get_league_model_loader() -> LeagueModelLoader:
|
||||
global _loader
|
||||
if _loader is None:
|
||||
_loader = LeagueModelLoader()
|
||||
return _loader
|
||||
File diff suppressed because it is too large
Load Diff
@@ -20,6 +20,13 @@ from dataclasses import dataclass, field
|
||||
import xgboost as xgb
|
||||
import lightgbm as lgb
|
||||
|
||||
import sys
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
try:
|
||||
from config.config_loader import get_config as _get_cfg
|
||||
except ImportError:
|
||||
_get_cfg = None # type: ignore[assignment]
|
||||
|
||||
# CatBoost is optional
|
||||
try:
|
||||
from catboost import CatBoostClassifier
|
||||
@@ -228,7 +235,7 @@ class V25Predictor:
|
||||
print(f"[V25] Using fallback feature columns ({len(V25Predictor._FALLBACK_FEATURE_COLS)} features)")
|
||||
return V25Predictor._FALLBACK_FEATURE_COLS
|
||||
|
||||
# Model weights for ensemble
|
||||
# Model weights for ensemble (overridden from config in __init__)
|
||||
DEFAULT_WEIGHTS = {
|
||||
'xgb': 0.50,
|
||||
'lgb': 0.50,
|
||||
@@ -245,6 +252,16 @@ class V25Predictor:
|
||||
self.models = {} # market -> {'xgb': model, 'lgb': model}
|
||||
self._loaded = False
|
||||
self.FEATURE_COLS = self._load_feature_cols()
|
||||
# Load weights from config (falls back to class default 0.50/0.50)
|
||||
if _get_cfg is not None:
|
||||
try:
|
||||
cfg = _get_cfg()
|
||||
self.DEFAULT_WEIGHTS = {
|
||||
'xgb': float(cfg.get('model_ensemble.xgb_weight', 0.50)),
|
||||
'lgb': float(cfg.get('model_ensemble.lgb_weight', 0.50)),
|
||||
}
|
||||
except Exception:
|
||||
pass # keep class-level defaults
|
||||
|
||||
# All trained market models available in V25
|
||||
ALL_MARKETS = [
|
||||
@@ -275,21 +292,34 @@ class V25Predictor:
|
||||
xgb_content = f.read()
|
||||
booster = xgb.Booster()
|
||||
booster.load_model(bytearray(xgb_content, 'utf-8'))
|
||||
self.models[market]['xgb'] = booster
|
||||
loaded_count += 1
|
||||
|
||||
# Corruption detection: verify model can run a dummy prediction
|
||||
try:
|
||||
_dummy = pd.DataFrame([{col: 0.0 for col in self.FEATURE_COLS}])
|
||||
booster.predict(xgb.DMatrix(_dummy))
|
||||
self.models[market]['xgb'] = booster
|
||||
loaded_count += 1
|
||||
except Exception as _ce:
|
||||
print(f"[V25] ⚠️ XGB model for {market} failed integrity check: {_ce} — skipping")
|
||||
|
||||
# Load LightGBM (read content in Python to avoid non-ASCII path issues)
|
||||
lgb_path = os.path.join(self.models_dir, f'lgb_v25_{market}.txt')
|
||||
if os.path.exists(lgb_path) and os.path.getsize(lgb_path) > 0:
|
||||
with open(lgb_path, 'r', encoding='utf-8') as f:
|
||||
model_str = f.read()
|
||||
self.models[market]['lgb'] = lgb.Booster(model_str=model_str)
|
||||
loaded_count += 1
|
||||
|
||||
lgb_model = lgb.Booster(model_str=model_str)
|
||||
# Corruption detection: verify model can run a dummy prediction
|
||||
try:
|
||||
_dummy = pd.DataFrame([{col: 0.0 for col in self.FEATURE_COLS}])
|
||||
lgb_model.predict(_dummy)
|
||||
self.models[market]['lgb'] = lgb_model
|
||||
loaded_count += 1
|
||||
except Exception as _ce:
|
||||
print(f"[V25] ⚠️ LGB model for {market} failed integrity check: {_ce} — skipping")
|
||||
|
||||
# Remove empty entries
|
||||
if not self.models[market]:
|
||||
del self.models[market]
|
||||
|
||||
|
||||
print(f"[V25] Loaded {loaded_count} model files across {len(self.models)} markets: {list(self.models.keys())}")
|
||||
self._loaded = loaded_count > 0
|
||||
return self._loaded
|
||||
@@ -305,7 +335,27 @@ class V25Predictor:
|
||||
if not self._loaded:
|
||||
if not self.load_models():
|
||||
raise RuntimeError("Failed to load V25 models")
|
||||
|
||||
|
||||
def readiness_summary(self) -> Dict[str, Any]:
|
||||
"""Return per-market model status for health check endpoint."""
|
||||
if not self._loaded:
|
||||
self.load_models()
|
||||
market_status = {}
|
||||
for market in self.ALL_MARKETS:
|
||||
m = self.models.get(market, {})
|
||||
market_status[market] = {
|
||||
"xgb": "xgb" in m,
|
||||
"lgb": "lgb" in m,
|
||||
"ready": bool(m),
|
||||
}
|
||||
loaded_markets = [k for k, v in market_status.items() if v["ready"]]
|
||||
return {
|
||||
"fully_loaded": len(loaded_markets) == len(self.ALL_MARKETS),
|
||||
"loaded_markets": loaded_markets,
|
||||
"missing_markets": [m for m in self.ALL_MARKETS if m not in loaded_markets],
|
||||
"weights": self.DEFAULT_WEIGHTS,
|
||||
}
|
||||
|
||||
def _prepare_features(self, features: Dict[str, float]) -> pd.DataFrame:
|
||||
"""Prepare feature vector for prediction."""
|
||||
X = pd.DataFrame([{col: features.get(col, 0.0) for col in self.FEATURE_COLS}])
|
||||
@@ -563,13 +613,23 @@ class V25Predictor:
|
||||
) -> List[ValueBet]:
|
||||
"""Detect value bets based on model vs market odds."""
|
||||
value_bets = []
|
||||
min_edge = 0.05 # 5% minimum edge
|
||||
|
||||
# Market-specific minimum edge thresholds
|
||||
# MS: higher variance → require more edge
|
||||
# OU/BTTS: binary markets → tighter edge acceptable
|
||||
EDGE_THRESHOLDS = {
|
||||
'MS': 0.06,
|
||||
'OU25': 0.04,
|
||||
'BTTS': 0.04,
|
||||
}
|
||||
ms_edge = EDGE_THRESHOLDS['MS']
|
||||
ou_edge = EDGE_THRESHOLDS['OU25']
|
||||
btts_edge = EDGE_THRESHOLDS['BTTS']
|
||||
|
||||
# MS value bets
|
||||
if 'ms_h' in odds and odds['ms_h'] > 0:
|
||||
implied = 1 / odds['ms_h']
|
||||
edge = home_prob - implied
|
||||
if edge > min_edge:
|
||||
if edge > ms_edge:
|
||||
value_bets.append(ValueBet(
|
||||
market_type='MS',
|
||||
pick='1',
|
||||
@@ -582,7 +642,7 @@ class V25Predictor:
|
||||
if 'ms_d' in odds and odds['ms_d'] > 0:
|
||||
implied = 1 / odds['ms_d']
|
||||
edge = draw_prob - implied
|
||||
if edge > min_edge:
|
||||
if edge > ms_edge:
|
||||
value_bets.append(ValueBet(
|
||||
market_type='MS',
|
||||
pick='X',
|
||||
@@ -595,7 +655,7 @@ class V25Predictor:
|
||||
if 'ms_a' in odds and odds['ms_a'] > 0:
|
||||
implied = 1 / odds['ms_a']
|
||||
edge = away_prob - implied
|
||||
if edge > min_edge:
|
||||
if edge > ms_edge:
|
||||
value_bets.append(ValueBet(
|
||||
market_type='MS',
|
||||
pick='2',
|
||||
@@ -609,7 +669,7 @@ class V25Predictor:
|
||||
if 'ou25_o' in odds and odds['ou25_o'] > 0:
|
||||
implied = 1 / odds['ou25_o']
|
||||
edge = over_prob - implied
|
||||
if edge > min_edge:
|
||||
if edge > ou_edge:
|
||||
value_bets.append(ValueBet(
|
||||
market_type='OU25',
|
||||
pick='Over',
|
||||
@@ -622,7 +682,7 @@ class V25Predictor:
|
||||
if 'ou25_u' in odds and odds['ou25_u'] > 0:
|
||||
implied = 1 / odds['ou25_u']
|
||||
edge = under_prob - implied
|
||||
if edge > min_edge:
|
||||
if edge > ou_edge:
|
||||
value_bets.append(ValueBet(
|
||||
market_type='OU25',
|
||||
pick='Under',
|
||||
@@ -636,7 +696,7 @@ class V25Predictor:
|
||||
if 'btts_y' in odds and odds['btts_y'] > 0:
|
||||
implied = 1 / odds['btts_y']
|
||||
edge = btts_yes_prob - implied
|
||||
if edge > min_edge:
|
||||
if edge > btts_edge:
|
||||
value_bets.append(ValueBet(
|
||||
market_type='BTTS',
|
||||
pick='Yes',
|
||||
@@ -649,7 +709,7 @@ class V25Predictor:
|
||||
if 'btts_n' in odds and odds['btts_n'] > 0:
|
||||
implied = 1 / odds['btts_n']
|
||||
edge = btts_no_prob - implied
|
||||
if edge > min_edge:
|
||||
if edge > btts_edge:
|
||||
value_bets.append(ValueBet(
|
||||
market_type='BTTS',
|
||||
pick='No',
|
||||
|
||||
Reference in New Issue
Block a user