6 Commits

Author SHA1 Message Date
fahricansecer eab95c4e5c Update feeder.service.ts
Deploy Iddaai Backend / build-and-deploy (push) Successful in 30s
2026-04-25 02:23:38 +03:00
fahricansecer 9027cc9900 v28
Deploy Iddaai Backend / build-and-deploy (push) Successful in 3m21s
2026-04-24 23:46:28 +03:00
fahricansecer 3875f2a512 Create v28-pro-max-architecture.md
Deploy Iddaai Backend / build-and-deploy (push) Successful in 27s
2026-04-24 02:30:26 +03:00
fahricansecer 300dceeb4b Merge branch 'main' of https://gitea.bilgich.com/fahricansecer/iddaai-be
Deploy Iddaai Backend / build-and-deploy (push) Successful in 27s
2026-04-24 02:10:48 +03:00
fahricansecer ad01976fb9 fix: lineup data normalization + tomorrow match sync + player field mapping 2026-04-24 02:09:58 +03:00
fahricansecer 6880eb92f5 Merge pull request 'v26-shadow' (#4) from v26-shadow into main
Deploy Iddaai Backend / build-and-deploy (push) Successful in 27s
Reviewed-on: #4
2026-04-24 01:15:54 +03:00
19 changed files with 4759 additions and 131 deletions
+4 -2
View File
@@ -42,7 +42,9 @@ uploads/
public/uploads/
# Large Datasets and ML Models
ai-engine/models/
models/
ai-engine/models/*
!ai-engine/models/*.py
models/*
!models/*.py
colab_export/
+16 -16
View File
@@ -323,8 +323,8 @@ class OddsBandAnalyzer:
m.home_team_id,
m.away_team_id,
CASE
WHEN m.home_team_id = %(team_id)s THEN os_sel.odd_value
ELSE os_sel2.odd_value
WHEN m.home_team_id = %(team_id)s THEN os_sel.odd_value::numeric
ELSE os_sel2.odd_value::numeric
END AS team_odds
FROM matches m
JOIN odd_categories oc
@@ -344,7 +344,7 @@ class OddsBandAnalyzer:
AND m.score_home IS NOT NULL
AND m.score_away IS NOT NULL
AND m.mst_utc < %(before_ts)s
AND COALESCE(os_sel.odd_value, os_sel2.odd_value)
AND COALESCE(os_sel.odd_value::numeric, os_sel2.odd_value::numeric)
BETWEEN %(band_low)s AND %(band_high)s
ORDER BY m.mst_utc DESC
LIMIT %(max_lookback)s
@@ -432,7 +432,7 @@ class OddsBandAnalyzer:
AND m.score_home IS NOT NULL
AND m.score_away IS NOT NULL
AND m.mst_utc < %(before_ts)s
AND os_h.odd_value BETWEEN %(band_low)s AND %(band_high)s
AND os_h.odd_value::numeric BETWEEN %(band_low)s AND %(band_high)s
ORDER BY m.mst_utc DESC
LIMIT %(max_lookback)s
)
@@ -508,7 +508,7 @@ class OddsBandAnalyzer:
f"İlk Yarı {line_str} Alt/Üst",
f"Ilk Yari {line_str} Alt/Ust",
]
score_expr = "COALESCE(m.score_ht_home, 0) + COALESCE(m.score_ht_away, 0)"
score_expr = "COALESCE(m.ht_score_home, 0) + COALESCE(m.ht_score_away, 0)"
else:
cat_names = [
f"{line_str} Alt/Üst",
@@ -535,7 +535,7 @@ class OddsBandAnalyzer:
AND m.status = 'FT'
AND m.score_home IS NOT NULL
AND m.mst_utc < %(before_ts)s
AND os_over.odd_value BETWEEN %(band_low)s AND %(band_high)s
AND os_over.odd_value::numeric BETWEEN %(band_low)s AND %(band_high)s
ORDER BY m.mst_utc DESC
LIMIT %(max_lookback)s
)
@@ -620,7 +620,7 @@ class OddsBandAnalyzer:
AND m.status = 'FT'
AND m.score_home IS NOT NULL
AND m.mst_utc < %(before_ts)s
AND os_yes.odd_value BETWEEN %(band_low)s AND %(band_high)s
AND os_yes.odd_value::numeric BETWEEN %(band_low)s AND %(band_high)s
ORDER BY m.mst_utc DESC
LIMIT %(max_lookback)s
)
@@ -696,7 +696,7 @@ class OddsBandAnalyzer:
AND m.sport = 'football' AND m.status = 'FT'
AND m.score_home IS NOT NULL
AND m.mst_utc < %(before_ts)s
AND os_sel.odd_value BETWEEN %(bl)s AND %(bh)s
AND os_sel.odd_value::numeric BETWEEN %(bl)s AND %(bh)s
ORDER BY m.mst_utc DESC LIMIT %(ml)s
)
SELECT COUNT(*) AS ss,
@@ -748,7 +748,7 @@ class OddsBandAnalyzer:
try:
cur.execute("""
WITH ht_matches AS (
SELECT m.score_ht_home, m.score_ht_away,
SELECT m.ht_score_home, m.ht_score_away,
m.home_team_id, m.away_team_id
FROM matches m
JOIN odd_categories oc ON oc.match_id = m.id
@@ -761,18 +761,18 @@ class OddsBandAnalyzer:
AND os2.name = '2' AND m.away_team_id = %(tid)s
WHERE (m.home_team_id = %(tid)s OR m.away_team_id = %(tid)s)
AND m.sport = 'football' AND m.status = 'FT'
AND m.score_ht_home IS NOT NULL
AND m.ht_score_home IS NOT NULL
AND m.mst_utc < %(before_ts)s
AND COALESCE(os1.odd_value, os2.odd_value)
AND COALESCE(os1.odd_value::numeric, os2.odd_value::numeric)
BETWEEN %(bl)s AND %(bh)s
ORDER BY m.mst_utc DESC LIMIT %(ml)s
)
SELECT COUNT(*) AS ss,
COALESCE(AVG(CASE
WHEN (home_team_id = %(tid)s AND score_ht_home > score_ht_away)
OR (away_team_id = %(tid)s AND score_ht_away > score_ht_home)
WHEN (home_team_id = %(tid)s AND ht_score_home > ht_score_away)
OR (away_team_id = %(tid)s AND ht_score_away > ht_score_home)
THEN 1.0 ELSE 0.0 END), 0.33) AS win_rate,
COALESCE(AVG(CASE WHEN score_ht_home = score_ht_away
COALESCE(AVG(CASE WHEN ht_score_home = ht_score_away
THEN 1.0 ELSE 0.0 END), 0.40) AS draw_rate
FROM ht_matches
""", {
@@ -824,7 +824,7 @@ class OddsBandAnalyzer:
AND m.sport = 'football' AND m.status = 'FT'
AND m.score_home IS NOT NULL
AND m.mst_utc < %(before_ts)s
AND os_odd.odd_value BETWEEN %(bl)s AND %(bh)s
AND os_odd.odd_value::numeric BETWEEN %(bl)s AND %(bh)s
ORDER BY m.mst_utc DESC LIMIT %(ml)s
)
SELECT COUNT(*) AS ss,
@@ -1185,7 +1185,7 @@ class OddsBandAnalyzer:
'IY/MS'
)
JOIN odd_selections os ON os.odd_category_db_id = oc.db_id
AND os.odd_value BETWEEN %(bl)s AND %(bh)s
AND os.odd_value::numeric BETWEEN %(bl)s AND %(bh)s
WHERE m.sport = 'football'
AND m.status = 'FT'
AND m.score_home IS NOT NULL
+12 -6
View File
@@ -14,10 +14,13 @@ from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from models.basketball_v25 import get_basketball_v25_predictor
try:
from models.basketball_v25 import get_basketball_v25_predictor
HAS_BASKETBALL = True
except ImportError:
HAS_BASKETBALL = False
from services.single_match_orchestrator import get_single_match_orchestrator
from services.v26_shadow_engine import get_v26_shadow_engine
from data.database import dispose_engine
load_dotenv()
@@ -49,9 +52,6 @@ async def lifespan(_: FastAPI):
yield
# Cleanup async DB connections on shutdown
await dispose_engine()
app = FastAPI(
title="Suggest-Bet AI Engine",
@@ -123,9 +123,15 @@ def health_check() -> dict[str, Any]:
try:
orchestrator = get_single_match_orchestrator()
shadow_engine = get_v26_shadow_engine()
if HAS_BASKETBALL:
basketball_predictor = get_basketball_v25_predictor()
basketball_readiness = basketball_predictor.readiness_summary()
ready = bool(basketball_readiness["fully_loaded"])
ready = bool(basketball_readiness.get("fully_loaded", True))
else:
basketball_readiness = {"fully_loaded": False, "error": "Basketball module not found"}
ready = True
return {
"status": "healthy" if ready else "degraded",
"engine": "v28.main",
+413
View File
@@ -0,0 +1,413 @@
"""
Calibration Module for XGBoost Models
=====================================
Calibrates raw probabilities from XGBoost models using Isotonic Regression.
Ensures that a predicted probability of 70% actually corresponds to a 70% win rate.
Usage:
from ai_engine.models.calibration import Calibrator
calibrator = Calibrator()
calibrated_prob = calibrator.calibrate("ms", raw_prob)
# Training new calibration models:
calibrator.train_calibration(valid_df, market="ms")
"""
import os
import pickle
import json
import numpy as np
import pandas as pd
from datetime import datetime
from typing import Dict, List, Optional, Tuple, Any
from sklearn.isotonic import IsotonicRegression
from sklearn.calibration import calibration_curve
from sklearn.metrics import brier_score_loss
AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
CALIBRATION_DIR = os.path.join(AI_ENGINE_DIR, "models", "calibration")
os.makedirs(CALIBRATION_DIR, exist_ok=True)
# Supported markets for calibration
SUPPORTED_MARKETS = [
"ms", # Match Result (1X2) - multi-class, calibrated per class
"ms_home", # Standard Home win probability
"ms_home_heavy_fav", # Context: home odds <= 1.40
"ms_home_fav", # Context: 1.40 < home odds <= 1.80
"ms_home_balanced", # Context: 1.80 < home odds <= 2.50
"ms_home_underdog", # Context: home odds > 2.50
"ms_draw", # Draw probability
"ms_away", # Away win probability
"ou15", # Over/Under 1.5
"ou25", # Over/Under 2.5
"ou35", # Over/Under 3.5
"btts", # Both Teams to Score
"ht_ft", # Half-Time/Full-Time
"dc", # Double Chance
"ht", # Half-Time Result
]
class CalibrationMetrics:
"""Stores calibration quality metrics for a market."""
def __init__(self):
self.brier_score: float = 0.0
self.calibration_error: float = 0.0
self.sample_count: int = 0
self.last_trained: str = ""
self.mean_predicted: float = 0.0
self.mean_actual: float = 0.0
def to_dict(self) -> Dict:
return {
"brier_score": round(self.brier_score, 4),
"calibration_error": round(self.calibration_error, 4),
"sample_count": self.sample_count,
"last_trained": self.last_trained,
"mean_predicted": round(self.mean_predicted, 4),
"mean_actual": round(self.mean_actual, 4),
}
class Calibrator:
"""
Probability calibration using Isotonic Regression.
Isotonic Regression is a non-parametric method that fits a piecewise
constant function that is monotonically increasing. It's ideal for
calibrating probabilities because:
1. It preserves ranking (if P(A) > P(B) before, P(A) > P(B) after)
2. It doesn't assume a specific distribution shape
3. It can correct systematic over/under-confidence
Example:
# Before calibration: model predicts 70% but actual win rate is 60%
# After calibration: model predicts 70% → calibrated to 60%
"""
def __init__(self):
self.calibrators: Dict[str, IsotonicRegression] = {}
self.metrics: Dict[str, CalibrationMetrics] = {}
self.heuristic_fallback: Dict[str, float] = {
"ms": 0.90,
"ms_home": 0.90,
"ms_home_heavy_fav": 0.95,
"ms_home_fav": 0.90,
"ms_home_balanced": 0.85,
"ms_home_underdog": 0.80,
"ms_draw": 0.90,
"ms_away": 0.90,
"ou15": 0.90,
"ou25": 0.90,
"ou35": 0.90,
"btts": 0.90,
"ht_ft": 0.85,
"dc": 0.93,
"ht": 0.85,
}
self._load_calibrators()
def _load_calibrators(self):
"""Load trained calibrators for each market from disk."""
for market in SUPPORTED_MARKETS:
model_path = os.path.join(CALIBRATION_DIR, f"{market}_calibrator.pkl")
metrics_path = os.path.join(CALIBRATION_DIR, f"{market}_metrics.json")
if os.path.exists(model_path):
try:
with open(model_path, "rb") as f:
self.calibrators[market] = pickle.load(f)
print(f"[Calibrator] Loaded calibration model for {market}")
except Exception as e:
print(f"[Calibrator] Warning: Failed to load {market}: {e}")
if os.path.exists(metrics_path):
try:
with open(metrics_path, "r") as f:
data = json.load(f)
metrics = CalibrationMetrics()
metrics.brier_score = data.get("brier_score", 0.0)
metrics.calibration_error = data.get("calibration_error", 0.0)
metrics.sample_count = data.get("sample_count", 0)
metrics.last_trained = data.get("last_trained", "")
metrics.mean_predicted = data.get("mean_predicted", 0.0)
metrics.mean_actual = data.get("mean_actual", 0.0)
self.metrics[market] = metrics
except Exception as e:
print(f"[Calibrator] Warning: Failed to load metrics for {market}: {e}")
def calibrate(self, market_type: str, raw_prob: float, odds_val: Optional[float] = None) -> float:
"""
Calibrate a raw probability using Isotonic Regression.
Args:
market_type (str): 'ms_home', 'ou25', 'btts', 'ht_ft', etc.
raw_prob (float): The raw probability from XGBoost (0.0 - 1.0)
odds_val (float, optional): The pre-match odds, used for context-aware bucket mapping
Returns:
float: Calibrated probability (0.0 - 1.0)
"""
# Normalize market type
market_key = market_type.lower().replace("-", "_")
# Route to bucket if ms_home and odds provided
if market_key == "ms_home" and odds_val is not None and odds_val > 1.0:
if odds_val <= 1.40:
bucket_key = "ms_home_heavy_fav"
elif odds_val <= 1.80:
bucket_key = "ms_home_fav"
elif odds_val <= 2.50:
bucket_key = "ms_home_balanced"
else:
bucket_key = "ms_home_underdog"
if bucket_key in self.calibrators:
market_key = bucket_key
# If we have a trained Isotonic Regression model, use it
if market_key in self.calibrators:
try:
calibrated = self.calibrators[market_key].predict([raw_prob])[0]
# Ensure output is valid probability
return float(np.clip(calibrated, 0.01, 0.99))
except Exception as e:
print(f"[Calibrator] Warning: Isotonic failed for {market_key}: {e}")
# Fall through to heuristic
# Fallback to heuristic calibration
return self._heuristic_calibrate(market_key, raw_prob)
def _heuristic_calibrate(self, market_type: str, raw_prob: float) -> float:
"""
Heuristic calibration fallback when no trained model exists.
This applies a conservative shrinkage towards the mean:
- Binary markets (OU, BTTS): shrink towards 0.5
- Multi-class (MS): shrink towards 0.33
- HT/FT: stronger shrinkage due to higher variance
"""
# Get shrinkage factor for this market
shrinkage = self.heuristic_fallback.get(market_type, 0.90)
if market_type in ["ms", "ms_home", "ms_home_heavy_fav", "ms_home_fav", "ms_home_balanced", "ms_home_underdog", "ms_draw", "ms_away"]:
# Pull towards 0.33 (uniform for 3-class)
return (raw_prob * shrinkage) + (0.33 * (1.0 - shrinkage))
elif market_type in ["ou15", "ou25", "ou35", "btts"]:
# Pull towards 0.5 (uniform for binary)
return (raw_prob * shrinkage) + (0.5 * (1.0 - shrinkage))
elif market_type in ["ht_ft", "ht"]:
# Stronger shrinkage for high-variance markets
return raw_prob * shrinkage
elif market_type == "dc":
# Double chance is more reliable
return (raw_prob * shrinkage) + (0.66 * (1.0 - shrinkage))
return raw_prob
def train_calibration(
self,
df: pd.DataFrame,
market: str,
prob_col: str,
actual_col: str,
min_samples: int = 100,
save: bool = True,
) -> CalibrationMetrics:
"""
Train an Isotonic Regression calibration model for a specific market.
Args:
df: DataFrame with predictions and actual outcomes
market: Market identifier (e.g., 'ms_home', 'ou25', 'btts')
prob_col: Column name for raw probabilities
actual_col: Column name for actual outcomes (0 or 1)
min_samples: Minimum samples required to train
save: Whether to save the model to disk
Returns:
CalibrationMetrics with quality metrics
"""
# Filter valid data
valid_df = df[[prob_col, actual_col]].dropna()
n_samples = len(valid_df)
if n_samples < min_samples:
print(f"[Calibrator] Warning: Only {n_samples} samples for {market}, "
f"need at least {min_samples}")
metrics = CalibrationMetrics()
metrics.sample_count = n_samples
return metrics
# Extract arrays
raw_probs = valid_df[prob_col].values
actuals = valid_df[actual_col].values
# Train Isotonic Regression
iso = IsotonicRegression(out_of_bounds="clip", increasing=True)
iso.fit(raw_probs, actuals)
# Calculate calibrated probabilities
calibrated_probs = iso.predict(raw_probs)
# Calculate metrics
metrics = CalibrationMetrics()
metrics.sample_count = n_samples
metrics.last_trained = datetime.utcnow().isoformat()
metrics.brier_score = brier_score_loss(actuals, calibrated_probs)
metrics.mean_predicted = np.mean(raw_probs)
metrics.mean_actual = np.mean(actuals)
# Calculate Expected Calibration Error (ECE)
metrics.calibration_error = self._calculate_ece(
calibrated_probs, actuals, n_bins=10
)
# Store in memory
self.calibrators[market] = iso
self.metrics[market] = metrics
# Save to disk
if save:
self._save_calibration(market, iso, metrics)
print(f"[Calibrator] Trained {market}: "
f"Brier={metrics.brier_score:.4f}, "
f"ECE={metrics.calibration_error:.4f}, "
f"n={n_samples}")
return metrics
def train_all_markets(
self,
df: pd.DataFrame,
market_config: Dict[str, Tuple[str, str]],
min_samples: int = 100,
) -> Dict[str, CalibrationMetrics]:
"""
Train calibration models for multiple markets at once.
Args:
df: DataFrame with all predictions and outcomes
market_config: Dict mapping market -> (prob_col, actual_col)
e.g., {'ou25': ('ou25_over_prob', 'ou25_over_actual')}
min_samples: Minimum samples per market
Returns:
Dict of market -> CalibrationMetrics
"""
results = {}
for market, (prob_col, actual_col) in market_config.items():
print(f"\n[Calibrator] Training {market}...")
try:
metrics = self.train_calibration(
df=df,
market=market,
prob_col=prob_col,
actual_col=actual_col,
min_samples=min_samples,
save=True,
)
results[market] = metrics
except Exception as e:
print(f"[Calibrator] Failed to train {market}: {e}")
return results
def _calculate_ece(
self,
probs: np.ndarray,
actuals: np.ndarray,
n_bins: int = 10
) -> float:
"""
Calculate Expected Calibration Error (ECE).
ECE = sum(|bin_accuracy - bin_confidence| * bin_weight)
Lower is better. Perfect calibration = 0.
"""
bin_boundaries = np.linspace(0, 1, n_bins + 1)
ece = 0.0
for i in range(n_bins):
in_bin = (probs >= bin_boundaries[i]) & (probs < bin_boundaries[i + 1])
prop_in_bin = np.mean(in_bin)
if prop_in_bin > 0:
accuracy_in_bin = np.mean(actuals[in_bin])
avg_confidence_in_bin = np.mean(probs[in_bin])
ece += np.abs(accuracy_in_bin - avg_confidence_in_bin) * prop_in_bin
return ece
def _save_calibration(
self,
market: str,
calibrator: IsotonicRegression,
metrics: CalibrationMetrics
):
"""Save calibration model and metrics to disk."""
# Save model
model_path = os.path.join(CALIBRATION_DIR, f"{market}_calibrator.pkl")
with open(model_path, "wb") as f:
pickle.dump(calibrator, f)
# Save metrics
metrics_path = os.path.join(CALIBRATION_DIR, f"{market}_metrics.json")
with open(metrics_path, "w") as f:
json.dump(metrics.to_dict(), f, indent=2)
print(f"[Calibrator] Saved {market} to {CALIBRATION_DIR}")
def get_calibration_report(self) -> Dict[str, Any]:
"""Generate a summary report of all calibration models."""
report = {
"trained_markets": list(self.calibrators.keys()),
"metrics": {},
"heuristic_only": [],
}
for market in SUPPORTED_MARKETS:
if market in self.metrics:
report["metrics"][market] = self.metrics[market].to_dict()
elif market not in self.calibrators:
report["heuristic_only"].append(market)
return report
def get_calibrated_probabilities(
self,
market: str,
raw_probs: np.ndarray
) -> np.ndarray:
"""
Batch calibration for array of probabilities.
Args:
market: Market type
raw_probs: Array of raw probabilities
Returns:
Array of calibrated probabilities
"""
return np.array([self.calibrate(market, p) for p in raw_probs])
# Singleton instance
_calibrator_instance: Optional[Calibrator] = None
def get_calibrator() -> Calibrator:
"""Get or create the global Calibrator instance."""
global _calibrator_instance
if _calibrator_instance is None:
_calibrator_instance = Calibrator()
return _calibrator_instance
File diff suppressed because it is too large Load Diff
+645
View File
@@ -0,0 +1,645 @@
"""
V25 Ensemble Predictor - NO TARGET LEAKAGE
===========================================
Multi-model ensemble for match prediction using XGBoost and LightGBM.
Features:
- 73 engineered features (NO target leakage)
- Market-specific models (MS, OU25, BTTS)
- Weighted ensemble predictions
- Value bet detection
"""
import os
import json
import numpy as np
import pandas as pd
from typing import Dict, List, Optional, Any
from dataclasses import dataclass, field
import xgboost as xgb
import lightgbm as lgb
# CatBoost is optional
try:
from catboost import CatBoostClassifier
CATBOOST_AVAILABLE = True
except ImportError:
CatBoostClassifier = None
CATBOOST_AVAILABLE = False
# Paths
MODELS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'v25')
@dataclass
class MarketPrediction:
"""Prediction for a single betting market."""
market_type: str
pick: str
probability: float
confidence: float
odds: float = 0.0
is_value_bet: bool = False
edge: float = 0.0
def to_dict(self) -> dict:
return {
'market_type': self.market_type,
'pick': self.pick,
'probability': round(self.probability * 100, 1),
'confidence': round(self.confidence, 1),
'odds': self.odds,
'is_value_bet': self.is_value_bet,
'edge': round(self.edge * 100, 1),
}
@dataclass
class ValueBet:
"""Detected value bet opportunity."""
market_type: str
pick: str
probability: float
odds: float
edge: float
confidence: float
def to_dict(self) -> dict:
return {
'market_type': self.market_type,
'pick': self.pick,
'probability': round(self.probability * 100, 1),
'odds': self.odds,
'edge': round(self.edge * 100, 1),
'confidence': round(self.confidence, 1),
}
@dataclass
class MatchPrediction:
"""Complete match prediction with all markets."""
match_id: str
home_team: str
away_team: str
# MS predictions
home_prob: float = 0.0
draw_prob: float = 0.0
away_prob: float = 0.0
ms_pick: str = ''
ms_confidence: float = 0.0
# OU25 predictions
over_prob: float = 0.0
under_prob: float = 0.0
ou25_pick: str = ''
ou25_confidence: float = 0.0
# BTTS predictions
btts_yes_prob: float = 0.0
btts_no_prob: float = 0.0
btts_pick: str = ''
btts_confidence: float = 0.0
# Value bets
value_bets: List[ValueBet] = field(default_factory=list)
def to_dict(self) -> dict:
return {
'match_id': self.match_id,
'home_team': self.home_team,
'away_team': self.away_team,
'ms': {
'home_prob': round(self.home_prob * 100, 1),
'draw_prob': round(self.draw_prob * 100, 1),
'away_prob': round(self.away_prob * 100, 1),
'pick': self.ms_pick,
'confidence': round(self.ms_confidence, 1),
},
'ou25': {
'over_prob': round(self.over_prob * 100, 1),
'under_prob': round(self.under_prob * 100, 1),
'pick': self.ou25_pick,
'confidence': round(self.ou25_confidence, 1),
},
'btts': {
'yes_prob': round(self.btts_yes_prob * 100, 1),
'no_prob': round(self.btts_no_prob * 100, 1),
'pick': self.btts_pick,
'confidence': round(self.btts_confidence, 1),
},
'value_bets': [vb.to_dict() for vb in self.value_bets],
}
class V25Predictor:
"""
V25 Ensemble Predictor - NO TARGET LEAKAGE
Uses market-specific XGBoost and LightGBM models.
Each market (MS, OU25, BTTS) has its own trained models.
"""
# Feature columns (82 features, NO target leakage)
FEATURE_COLS = [
# ELO Features (8)
'home_overall_elo', 'away_overall_elo', 'elo_diff',
'home_home_elo', 'away_away_elo',
'home_form_elo', 'away_form_elo', 'form_elo_diff',
# Form Features (12)
'home_goals_avg', 'home_conceded_avg',
'away_goals_avg', 'away_conceded_avg',
'home_clean_sheet_rate', 'away_clean_sheet_rate',
'home_scoring_rate', 'away_scoring_rate',
'home_winning_streak', 'away_winning_streak',
'home_unbeaten_streak', 'away_unbeaten_streak',
# H2H Features (6)
'h2h_total_matches', 'h2h_home_win_rate', 'h2h_draw_rate',
'h2h_avg_goals', 'h2h_btts_rate', 'h2h_over25_rate',
# Team Stats Features (8)
'home_avg_possession', 'away_avg_possession',
'home_avg_shots_on_target', 'away_avg_shots_on_target',
'home_shot_conversion', 'away_shot_conversion',
'home_avg_corners', 'away_avg_corners',
# Odds Features (24)
'odds_ms_h', 'odds_ms_d', 'odds_ms_a',
'implied_home', 'implied_draw', 'implied_away',
'odds_ht_ms_h', 'odds_ht_ms_d', 'odds_ht_ms_a',
'odds_ou05_o', 'odds_ou05_u',
'odds_ou15_o', 'odds_ou15_u',
'odds_ou25_o', 'odds_ou25_u',
'odds_ou35_o', 'odds_ou35_u',
'odds_ht_ou05_o', 'odds_ht_ou05_u',
'odds_ht_ou15_o', 'odds_ht_ou15_u',
'odds_btts_y', 'odds_btts_n',
# League Features (4)
'home_xga', 'away_xga',
'league_avg_goals', 'league_zero_goal_rate',
# Upset Engine (4)
'upset_atmosphere', 'upset_motivation', 'upset_fatigue', 'upset_potential',
# Referee Engine (5)
'referee_home_bias', 'referee_avg_goals', 'referee_cards_total',
'referee_avg_yellow', 'referee_experience',
# Momentum Engine (3)
'home_momentum_score', 'away_momentum_score', 'momentum_diff',
# Squad Features (9)
'home_squad_quality', 'away_squad_quality', 'squad_diff',
'home_key_players', 'away_key_players',
'home_missing_impact', 'away_missing_impact',
'home_goals_form', 'away_goals_form',
]
# Model weights for ensemble
DEFAULT_WEIGHTS = {
'xgb': 0.50,
'lgb': 0.50,
}
def __init__(self, models_dir: str = None):
"""
Initialize V25 Predictor.
Args:
models_dir: Directory containing model files. Defaults to v25/ directory.
"""
self.models_dir = models_dir or MODELS_DIR
self.models = {} # market -> {'xgb': model, 'lgb': model}
self._loaded = False
# All trained market models available in V25
ALL_MARKETS = [
'ms', 'ou25', 'btts', # Core markets
'ou15', 'ou35', # Additional OU lines
'ht_result', 'ht_ou05', 'ht_ou15', # HT markets
'htft', # HT/FT combo
'cards_ou45', # Cards market
'handicap_ms', # Handicap
'odd_even', # Odd/Even goals
]
# Multi-class markets (output > 2 classes)
MULTICLASS_MARKETS = {'ms', 'ht_result', 'htft', 'handicap_ms'}
def load_models(self) -> bool:
"""Load all market-specific models from disk."""
try:
loaded_count = 0
for market in self.ALL_MARKETS:
self.models[market] = {}
# Load XGBoost (read content in Python to avoid non-ASCII path issues)
xgb_path = os.path.join(self.models_dir, f'xgb_v25_{market}.json')
if os.path.exists(xgb_path) and os.path.getsize(xgb_path) > 0:
with open(xgb_path, 'r', encoding='utf-8') as f:
xgb_content = f.read()
booster = xgb.Booster()
booster.load_model(bytearray(xgb_content, 'utf-8'))
self.models[market]['xgb'] = booster
loaded_count += 1
# Load LightGBM (read content in Python to avoid non-ASCII path issues)
lgb_path = os.path.join(self.models_dir, f'lgb_v25_{market}.txt')
if os.path.exists(lgb_path) and os.path.getsize(lgb_path) > 0:
with open(lgb_path, 'r', encoding='utf-8') as f:
model_str = f.read()
self.models[market]['lgb'] = lgb.Booster(model_str=model_str)
loaded_count += 1
# Remove empty entries
if not self.models[market]:
del self.models[market]
print(f"[V25] Loaded {loaded_count} model files across {len(self.models)} markets: {list(self.models.keys())}")
self._loaded = loaded_count > 0
return self._loaded
except Exception as e:
print(f"[ERROR] Error loading models: {e}")
import traceback
traceback.print_exc()
return False
def _ensure_loaded(self):
"""Ensure models are loaded before prediction."""
if not self._loaded:
if not self.load_models():
raise RuntimeError("Failed to load V25 models")
def _prepare_features(self, features: Dict[str, float]) -> pd.DataFrame:
"""Prepare feature vector for prediction."""
X = pd.DataFrame([{col: features.get(col, 0.0) for col in self.FEATURE_COLS}])
return X
def predict_ms(self, features: Dict[str, float]) -> tuple:
"""
Predict match result (1X2).
Returns:
(home_prob, draw_prob, away_prob)
"""
self._ensure_loaded()
X = self._prepare_features(features)
probs = []
# XGBoost
if 'xgb' in self.models.get('ms', {}):
dmat = xgb.DMatrix(X)
xgb_proba = self.models['ms']['xgb'].predict(dmat)
if len(xgb_proba.shape) == 1:
xgb_proba = np.array([xgb_proba])
probs.append(xgb_proba[0] * self.DEFAULT_WEIGHTS['xgb'])
# LightGBM
if 'lgb' in self.models.get('ms', {}):
lgb_proba = self.models['ms']['lgb'].predict(X)
if len(lgb_proba.shape) == 2:
probs.append(lgb_proba[0] * self.DEFAULT_WEIGHTS['lgb'])
if not probs:
return 0.33, 0.33, 0.33
ensemble_proba = np.sum(probs, axis=0)
ensemble_proba = ensemble_proba / ensemble_proba.sum()
return float(ensemble_proba[0]), float(ensemble_proba[1]), float(ensemble_proba[2])
def predict_ou25(self, features: Dict[str, float]) -> tuple:
"""
Predict Over/Under 2.5 goals.
Returns:
(over_prob, under_prob)
"""
self._ensure_loaded()
X = self._prepare_features(features)
probs = []
# XGBoost
if 'xgb' in self.models.get('ou25', {}):
dmat = xgb.DMatrix(X)
xgb_proba = self.models['ou25']['xgb'].predict(dmat)
if isinstance(xgb_proba, np.ndarray) and len(xgb_proba.shape) == 1:
probs.append(xgb_proba[0])
# LightGBM
if 'lgb' in self.models.get('ou25', {}):
lgb_proba = self.models['ou25']['lgb'].predict(X)
if isinstance(lgb_proba, np.ndarray):
probs.append(lgb_proba[0])
if not probs:
return 0.5, 0.5
# Average probability
avg_prob = np.mean(probs)
return float(avg_prob), float(1 - avg_prob)
def predict_btts(self, features: Dict[str, float]) -> tuple:
"""
Predict Both Teams To Score.
Returns:
(yes_prob, no_prob)
"""
self._ensure_loaded()
X = self._prepare_features(features)
probs = []
# XGBoost
if 'xgb' in self.models.get('btts', {}):
dmat = xgb.DMatrix(X)
xgb_proba = self.models['btts']['xgb'].predict(dmat)
if isinstance(xgb_proba, np.ndarray) and len(xgb_proba.shape) == 1:
probs.append(xgb_proba[0])
# LightGBM
if 'lgb' in self.models.get('btts', {}):
lgb_proba = self.models['btts']['lgb'].predict(X)
if isinstance(lgb_proba, np.ndarray):
probs.append(lgb_proba[0])
if not probs:
return 0.5, 0.5
# Average probability
avg_prob = np.mean(probs)
return float(avg_prob), float(1 - avg_prob)
def predict_market(self, market: str, features: Dict[str, float]) -> np.ndarray:
"""
Generic prediction for any loaded market.
Args:
market: Market key (e.g. 'ht_result', 'htft', 'cards_ou45')
features: Feature dictionary.
Returns:
numpy array of probabilities.
For binary markets: [positive_prob]
For multi-class markets: [class0_prob, class1_prob, ...]
"""
self._ensure_loaded()
if market not in self.models:
return None
X = self._prepare_features(features)
probs = []
weights = []
is_multiclass = market in self.MULTICLASS_MARKETS
# XGBoost
if 'xgb' in self.models[market]:
dmat = xgb.DMatrix(X)
xgb_proba = self.models[market]['xgb'].predict(dmat)
if isinstance(xgb_proba, np.ndarray):
if is_multiclass and len(xgb_proba.shape) == 2:
probs.append(xgb_proba[0])
elif is_multiclass and len(xgb_proba.shape) == 1:
probs.append(xgb_proba)
else:
probs.append(np.array([xgb_proba[0]]))
weights.append(self.DEFAULT_WEIGHTS['xgb'])
# LightGBM
if 'lgb' in self.models[market]:
lgb_proba = self.models[market]['lgb'].predict(X)
if isinstance(lgb_proba, np.ndarray):
if is_multiclass and len(lgb_proba.shape) == 2:
probs.append(lgb_proba[0])
elif is_multiclass and len(lgb_proba.shape) == 1:
probs.append(lgb_proba)
else:
probs.append(np.array([lgb_proba[0]]))
weights.append(self.DEFAULT_WEIGHTS['lgb'])
if not probs:
return None
# Weighted average
if len(probs) == 1:
return probs[0]
total_w = sum(weights[:len(probs)])
result = np.zeros_like(probs[0])
for p, w in zip(probs, weights):
result += p * (w / total_w)
# Normalize multi-class
if is_multiclass and result.sum() > 0:
result = result / result.sum()
return result
def has_market(self, market: str) -> bool:
"""Check if a specific market model is loaded."""
return market in self.models
def predict_match(
self,
match_id: str,
home_team: str,
away_team: str,
features: Dict[str, float],
odds: Optional[Dict[str, float]] = None,
) -> MatchPrediction:
"""
Predict all markets for a match.
Args:
match_id: Match identifier.
home_team: Home team name.
away_team: Away team name.
features: Feature dictionary.
odds: Optional odds dictionary for value bet detection.
Returns:
MatchPrediction object.
"""
# Get predictions for each market
home_prob, draw_prob, away_prob = self.predict_ms(features)
over_prob, under_prob = self.predict_ou25(features)
btts_yes_prob, btts_no_prob = self.predict_btts(features)
# Determine picks
ms_probs = {'1': home_prob, 'X': draw_prob, '2': away_prob}
ms_pick = max(ms_probs, key=ms_probs.get)
ms_confidence = ms_probs[ms_pick] * 100
ou25_probs = {'Over': over_prob, 'Under': under_prob}
ou25_pick = max(ou25_probs, key=ou25_probs.get)
ou25_confidence = ou25_probs[ou25_pick] * 100
btts_probs = {'Yes': btts_yes_prob, 'No': btts_no_prob}
btts_pick = max(btts_probs, key=btts_probs.get)
btts_confidence = btts_probs[btts_pick] * 100
# Create prediction
prediction = MatchPrediction(
match_id=match_id,
home_team=home_team,
away_team=away_team,
home_prob=home_prob,
draw_prob=draw_prob,
away_prob=away_prob,
ms_pick=ms_pick,
ms_confidence=ms_confidence,
over_prob=over_prob,
under_prob=under_prob,
ou25_pick=ou25_pick,
ou25_confidence=ou25_confidence,
btts_yes_prob=btts_yes_prob,
btts_no_prob=btts_no_prob,
btts_pick=btts_pick,
btts_confidence=btts_confidence,
)
# Detect value bets
if odds:
prediction.value_bets = self._detect_value_bets(
prediction, odds, home_prob, draw_prob, away_prob,
over_prob, under_prob, btts_yes_prob, btts_no_prob
)
return prediction
def _detect_value_bets(
self,
prediction: MatchPrediction,
odds: Dict[str, float],
home_prob: float,
draw_prob: float,
away_prob: float,
over_prob: float,
under_prob: float,
btts_yes_prob: float,
btts_no_prob: float,
) -> List[ValueBet]:
"""Detect value bets based on model vs market odds."""
value_bets = []
min_edge = 0.05 # 5% minimum edge
# MS value bets
if 'ms_h' in odds and odds['ms_h'] > 0:
implied = 1 / odds['ms_h']
edge = home_prob - implied
if edge > min_edge:
value_bets.append(ValueBet(
market_type='MS',
pick='1',
probability=home_prob,
odds=odds['ms_h'],
edge=edge,
confidence=home_prob * 100,
))
if 'ms_d' in odds and odds['ms_d'] > 0:
implied = 1 / odds['ms_d']
edge = draw_prob - implied
if edge > min_edge:
value_bets.append(ValueBet(
market_type='MS',
pick='X',
probability=draw_prob,
odds=odds['ms_d'],
edge=edge,
confidence=draw_prob * 100,
))
if 'ms_a' in odds and odds['ms_a'] > 0:
implied = 1 / odds['ms_a']
edge = away_prob - implied
if edge > min_edge:
value_bets.append(ValueBet(
market_type='MS',
pick='2',
probability=away_prob,
odds=odds['ms_a'],
edge=edge,
confidence=away_prob * 100,
))
# OU25 value bets
if 'ou25_o' in odds and odds['ou25_o'] > 0:
implied = 1 / odds['ou25_o']
edge = over_prob - implied
if edge > min_edge:
value_bets.append(ValueBet(
market_type='OU25',
pick='Over',
probability=over_prob,
odds=odds['ou25_o'],
edge=edge,
confidence=over_prob * 100,
))
if 'ou25_u' in odds and odds['ou25_u'] > 0:
implied = 1 / odds['ou25_u']
edge = under_prob - implied
if edge > min_edge:
value_bets.append(ValueBet(
market_type='OU25',
pick='Under',
probability=under_prob,
odds=odds['ou25_u'],
edge=edge,
confidence=under_prob * 100,
))
# BTTS value bets
if 'btts_y' in odds and odds['btts_y'] > 0:
implied = 1 / odds['btts_y']
edge = btts_yes_prob - implied
if edge > min_edge:
value_bets.append(ValueBet(
market_type='BTTS',
pick='Yes',
probability=btts_yes_prob,
odds=odds['btts_y'],
edge=edge,
confidence=btts_yes_prob * 100,
))
if 'btts_n' in odds and odds['btts_n'] > 0:
implied = 1 / odds['btts_n']
edge = btts_no_prob - implied
if edge > min_edge:
value_bets.append(ValueBet(
market_type='BTTS',
pick='No',
probability=btts_no_prob,
odds=odds['btts_n'],
edge=edge,
confidence=btts_no_prob * 100,
))
return value_bets
# Singleton instance
_v25_predictor: Optional[V25Predictor] = None
def get_v25_predictor() -> V25Predictor:
"""Get or create V25 predictor instance."""
global _v25_predictor
if _v25_predictor is None:
_v25_predictor = V25Predictor()
_v25_predictor.load_models()
return _v25_predictor
+291
View File
@@ -0,0 +1,291 @@
"""
V27 Pro Predictor — Odds-Free Fundamentals + Value Edge Detection
This module loads V27 ensemble models (XGBoost, LightGBM, CatBoost)
and produces market-independent probability estimates.
The key insight: V27 is trained WITHOUT odds features, so it produces
"true" probabilities unbiased by market pricing. The divergence between
V25 (odds-aware) and V27 (odds-free) predictions signals market mispricing.
"""
import json
import logging
import os
import pickle
from pathlib import Path
from typing import Dict, List, Optional, Tuple
import numpy as np
logger = logging.getLogger(__name__)
V27_DIR = Path(__file__).parent / "v27"
class V27Predictor:
"""
Loads V27 ensemble models and provides predictions using the
82-feature odds-free vector.
"""
MARKETS = ["ms", "ou25"]
def __init__(self):
self.models: Dict[str, Dict[str, object]] = {}
self.feature_cols: List[str] = []
self._loaded = False
def load_models(self) -> bool:
"""Load all V27 ensemble models and feature column spec."""
if self._loaded:
return True
# Feature columns
cols_path = V27_DIR / "v27_feature_cols.json"
if not cols_path.exists():
logger.error("[V27] Feature columns file not found: %s", cols_path)
return False
try:
with open(cols_path, "r", encoding="utf-8") as f:
self.feature_cols = json.load(f)
logger.info("[V27] Loaded %d feature columns", len(self.feature_cols))
except Exception as e:
logger.error("[V27] Failed to load feature columns: %s", e)
return False
# Load models per market
model_types = {"xgb": "xgb", "lgb": "lgb", "cb": "cb"}
for market in self.MARKETS:
self.models[market] = {}
for short, label in model_types.items():
# Try market-specific file first: v27_ms_xgb.pkl
path = V27_DIR / f"v27_{market}_{short}.pkl"
if not path.exists():
# Fallback to generic: v27_xgboost.pkl (for MS only)
generic_names = {"xgb": "v27_xgboost.pkl", "lgb": "v27_lightgbm.pkl", "cb": "v27_catboost.pkl"}
path = V27_DIR / generic_names.get(short, "")
if not path.exists():
logger.warning("[V27] Model file not found for %s/%s", market, short)
continue
try:
with open(path, "rb") as f:
model = pickle.load(f)
self.models[market][label] = model
logger.info("[V27] ✓ Loaded %s/%s from %s", market, label, path.name)
except Exception as e:
logger.error("[V27] ✗ Failed to load %s/%s: %s", market, label, e)
loaded_count = sum(len(v) for v in self.models.values())
if loaded_count == 0:
logger.error("[V27] No models loaded!")
return False
self._loaded = True
logger.info("[V27] Total models loaded: %d across %d markets", loaded_count, len(self.models))
return True
def _build_feature_array(self, features: Dict[str, float]) -> np.ndarray:
"""
Build ordered feature array from the full feature dict.
V27 uses only its 82 features (odds-free subset).
"""
row = []
for col in self.feature_cols:
row.append(float(features.get(col, 0.0)))
return np.array([row])
def _predict_with_model(self, model, X: np.ndarray, label: str, expected_classes: int) -> Optional[np.ndarray]:
"""
Predict probabilities from a model, handling both sklearn wrappers
(predict_proba) and raw Booster objects (predict).
For raw XGBoost Boosters, DMatrix is created WITH feature_names
to match the training schema.
"""
import xgboost as xgb
import lightgbm as lgbm
import pandas as pd
# 1. Try sklearn-style predict_proba first
if hasattr(model, 'predict_proba'):
try:
proba = model.predict_proba(X)[0]
if len(proba) == expected_classes:
return proba
logger.warning("[V27] %s predict_proba returned %d classes, expected %d", label, len(proba), expected_classes)
except Exception:
pass # Fall through to raw predict
# 2. Raw xgboost.Booster — MUST pass feature_names
if isinstance(model, xgb.Booster):
try:
feature_names = self.feature_cols if self.feature_cols else None
dmat = xgb.DMatrix(X, feature_names=feature_names)
raw = model.predict(dmat)
if isinstance(raw, np.ndarray):
if raw.ndim == 2 and raw.shape[1] == expected_classes:
return raw[0]
elif raw.ndim == 1 and expected_classes == 2:
p = float(raw[0])
return np.array([1.0 - p, p])
elif raw.ndim == 1 and len(raw) == expected_classes:
return raw
except Exception as e:
logger.warning("[V27] %s xgb.Booster predict failed: %s", label, e)
return None
# 3. Raw lightgbm.Booster — pass as DataFrame with column names
if isinstance(model, lgbm.Booster):
try:
if self.feature_cols:
X_named = pd.DataFrame(X, columns=self.feature_cols)
raw = model.predict(X_named)
else:
raw = model.predict(X)
if isinstance(raw, np.ndarray):
if raw.ndim == 2 and raw.shape[1] == expected_classes:
return raw[0]
elif raw.ndim == 1 and expected_classes == 2:
p = float(raw[0])
return np.array([1.0 - p, p])
elif raw.ndim == 1 and len(raw) == expected_classes:
return raw
except Exception as e:
logger.warning("[V27] %s lgb.Booster predict failed: %s", label, e)
return None
# 4. Generic fallback (CatBoost, etc.)
try:
if hasattr(model, 'predict'):
raw = model.predict(X)
if isinstance(raw, np.ndarray):
if raw.ndim == 2 and raw.shape[1] == expected_classes:
return raw[0]
elif raw.ndim == 1 and expected_classes == 2:
p = float(raw[0])
return np.array([1.0 - p, p])
elif raw.ndim == 1 and len(raw) == expected_classes:
return raw
except Exception as e:
logger.warning("[V27] %s generic predict failed: %s", label, e)
return None
def predict_ms(self, features: Dict[str, float]) -> Optional[Dict[str, float]]:
"""
Predict Match Score probabilities (Home/Draw/Away).
Returns dict with keys: home, draw, away.
"""
if not self._loaded or "ms" not in self.models or not self.models["ms"]:
return None
X = self._build_feature_array(features)
probs_list = []
for label, model in self.models["ms"].items():
proba = self._predict_with_model(model, X, f"MS/{label}", expected_classes=3)
if proba is not None and len(proba) == 3:
probs_list.append(proba)
if not probs_list:
return None
# Ensemble average
avg = np.mean(probs_list, axis=0)
return {
"home": float(avg[0]),
"draw": float(avg[1]),
"away": float(avg[2]),
}
def predict_ou25(self, features: Dict[str, float]) -> Optional[Dict[str, float]]:
"""
Predict Over/Under 2.5 probabilities.
Returns dict with keys: under, over.
"""
if not self._loaded or "ou25" not in self.models or not self.models["ou25"]:
return None
X = self._build_feature_array(features)
probs_list = []
for label, model in self.models["ou25"].items():
proba = self._predict_with_model(model, X, f"OU25/{label}", expected_classes=2)
if proba is not None and len(proba) == 2:
probs_list.append(proba)
if not probs_list:
return None
avg = np.mean(probs_list, axis=0)
return {
"under": float(avg[0]),
"over": float(avg[1]),
}
def predict_all(self, features: Dict[str, float]) -> Dict[str, Optional[Dict[str, float]]]:
"""Run predictions for all supported markets."""
return {
"ms": self.predict_ms(features),
"ou25": self.predict_ou25(features),
}
def compute_divergence(
v25_probs: Dict[str, float],
v27_probs: Dict[str, float],
) -> Dict[str, float]:
"""
Compute the divergence signal between V25 (odds-aware) and V27 (odds-free).
Positive divergence = V27 thinks it's MORE likely than the market → VALUE BET
Negative divergence = V27 thinks it's LESS likely than the market → PASS
Returns per-outcome divergence values.
"""
divergence = {}
for key in v27_probs:
v25_val = v25_probs.get(key, 0.33)
v27_val = v27_probs.get(key, 0.33)
divergence[key] = round(v27_val - v25_val, 4)
return divergence
def compute_value_edge(
v25_probs: Dict[str, float],
v27_probs: Dict[str, float],
odds: Dict[str, float],
) -> Dict[str, Dict]:
"""
Detect value bets by combining V25/V27 divergence with odds.
A value bet exists when:
1. V27 (odds-free) probability > implied odds probability (model says it's underpriced)
2. V27 and V25 divergence is positive (V27 sees more signal than the market)
Returns per-outcome: { probability, implied_prob, edge, is_value }
"""
results = {}
for key in v27_probs:
v27_p = v27_probs[key]
v25_p = v25_probs.get(key, 0.33)
odds_val = odds.get(key, 0.0)
implied_p = (1.0 / odds_val) if odds_val > 1.01 else 0.0
divergence = v27_p - v25_p
edge = v27_p - implied_p if implied_p > 0 else 0.0
results[key] = {
"v27_prob": round(v27_p, 4),
"v25_prob": round(v25_p, 4),
"implied_prob": round(implied_p, 4),
"divergence": round(divergence, 4),
"edge": round(edge, 4),
"is_value": edge > 0.05 and divergence > 0.02, # 5% edge + 2% divergence
}
return results
+497
View File
@@ -0,0 +1,497 @@
"""
Deterministic betting judge for prediction packages.
The model layer estimates event probabilities. BettingBrain decides whether
those probabilities are trustworthy enough to risk money.
"""
from __future__ import annotations
from typing import Any, Dict, List, Optional, Tuple
class BettingBrain:
MIN_ODDS = 1.30
MIN_BET_SCORE = 72.0
MIN_WATCH_SCORE = 62.0
MIN_BAND_SAMPLE = 8
HARD_DIVERGENCE = 0.22
SOFT_DIVERGENCE = 0.14
EXTREME_MODEL_PROB = 0.85
EXTREME_GAP = 0.30
MARKET_PRIORS = {
"DC": 4.0,
"OU15": 3.0,
"OU25": 2.0,
"BTTS": 0.0,
"MS": -2.0,
"OU35": -2.0,
"HT": -6.0,
"HTFT": -12.0,
"CARDS": -5.0,
"OE": -8.0,
}
def judge(self, package: Dict[str, Any]) -> Dict[str, Any]:
v27_engine = package.get("v27_engine")
if not isinstance(v27_engine, dict):
return package
guarded = dict(package)
rows = self._collect_rows(guarded)
if not rows:
return guarded
judged_rows: Dict[str, Dict[str, Any]] = {}
decisions: List[Dict[str, Any]] = []
for row in rows:
key = self._row_key(row)
judged = self._judge_row(dict(row), guarded)
judged_rows[key] = judged
decisions.append(judged["betting_brain"])
approved = [
row for row in judged_rows.values()
if row.get("betting_brain", {}).get("action") == "BET"
]
watchlist = [
row for row in judged_rows.values()
if row.get("betting_brain", {}).get("action") == "WATCH"
]
approved.sort(key=self._candidate_sort_key, reverse=True)
watchlist.sort(key=self._candidate_sort_key, reverse=True)
original_main = guarded.get("main_pick") or {}
main_pick = None
decision = "NO_BET"
decision_reason = "No candidate passed the betting brain evidence gates."
if approved:
main_pick = dict(approved[0])
main_pick["is_guaranteed"] = bool(main_pick.get("betting_brain", {}).get("score", 0.0) >= 82.0)
main_pick["pick_reason"] = "betting_brain_approved"
decision = "BET"
decision_reason = main_pick.get("betting_brain", {}).get("summary", "Evidence is aligned.")
elif watchlist:
main_pick = dict(watchlist[0])
self._force_no_bet(main_pick, "betting_brain_watchlist")
decision = "WATCHLIST"
decision_reason = main_pick.get("betting_brain", {}).get("summary", "Interesting but not clean enough.")
elif original_main:
main_pick = dict(judged_rows.get(self._row_key(original_main), original_main))
self._force_no_bet(main_pick, "betting_brain_no_safe_pick")
main_key = self._row_key(main_pick) if main_pick else ""
supporting = [
dict(row)
for row in judged_rows.values()
if self._row_key(row) != main_key
]
supporting.sort(key=self._candidate_sort_key, reverse=True)
bet_summary = [
self._summary_item(row)
for row in sorted(judged_rows.values(), key=self._candidate_sort_key, reverse=True)
]
guarded["main_pick"] = main_pick
guarded["value_pick"] = self._pick_value_candidate(judged_rows, main_key)
guarded["supporting_picks"] = supporting[:6]
guarded["bet_summary"] = bet_summary
playable = decision == "BET" and bool(main_pick and main_pick.get("playable"))
advice = dict(guarded.get("bet_advice") or {})
advice["playable"] = playable
advice["suggested_stake_units"] = float(main_pick.get("stake_units", 0.0)) if playable else 0.0
advice["reason"] = "betting_brain_approved" if playable else "betting_brain_no_bet"
advice["decision"] = decision
advice["confidence_band"] = self._decision_band(main_pick)
guarded["bet_advice"] = advice
rejected = [d for d in decisions if d.get("action") == "REJECT"]
guarded["betting_brain"] = {
"version": "judge-v1",
"decision": decision,
"reason": decision_reason,
"main_pick_key": main_key or None,
"approved_count": len(approved),
"watchlist_count": len(watchlist),
"rejected_count": len(rejected),
"top_candidates": self._top_decisions(decisions),
"rules": {
"min_bet_score": self.MIN_BET_SCORE,
"min_watch_score": self.MIN_WATCH_SCORE,
"min_band_sample": self.MIN_BAND_SAMPLE,
"hard_divergence": self.HARD_DIVERGENCE,
"soft_divergence": self.SOFT_DIVERGENCE,
"extreme_model_probability": self.EXTREME_MODEL_PROB,
"extreme_model_market_gap": self.EXTREME_GAP,
},
}
guarded["upper_brain"] = guarded["betting_brain"]
guarded.setdefault("analysis_details", {})
guarded["analysis_details"]["betting_brain_applied"] = True
guarded["analysis_details"]["betting_brain_decision"] = decision
return guarded
def _judge_row(self, row: Dict[str, Any], package: Dict[str, Any]) -> Dict[str, Any]:
market = str(row.get("market") or "")
pick = str(row.get("pick") or "")
model_prob = self._market_probability(row, package)
odds = self._safe_float(row.get("odds"), 0.0) or 0.0
implied = (1.0 / odds) if odds > 1.0 else 0.0
model_gap = (model_prob - implied) if model_prob is not None and implied > 0 else None
calibrated_conf = self._safe_float(row.get("calibrated_confidence", row.get("confidence")), 0.0) or 0.0
play_score = self._safe_float(row.get("play_score"), 0.0) or 0.0
ev_edge = self._safe_float(row.get("ev_edge", row.get("edge")), 0.0) or 0.0
v27_prob = self._v27_probability(market, pick, package.get("v27_engine") or {})
divergence = abs(model_prob - v27_prob) if model_prob is not None and v27_prob is not None else None
triple_key = self._triple_key(market, pick)
triple = self._triple_value(package, triple_key)
band_sample = int(self._safe_float((triple or {}).get("band_sample"), 0.0) or 0.0)
triple_is_value = bool((triple or {}).get("is_value"))
consensus = str((package.get("v27_engine") or {}).get("consensus") or "").upper()
positives: List[str] = []
issues: List[str] = []
vetoes: List[str] = []
score = 0.0
if row.get("playable"):
score += 18.0
positives.append("base_model_playable")
else:
score -= 18.0
issues.append("base_model_not_playable")
score += max(0.0, min(20.0, calibrated_conf * 0.22))
score += max(-8.0, min(16.0, ev_edge * 45.0))
score += max(0.0, min(14.0, play_score * 0.12))
score += self.MARKET_PRIORS.get(market, -3.0)
data_quality = package.get("data_quality") or {}
quality_score = self._safe_float(data_quality.get("score"), 0.6) or 0.6
score += max(-8.0, min(6.0, (quality_score - 0.55) * 16.0))
risk = str((package.get("risk") or {}).get("level") or "MEDIUM").upper()
score += {"LOW": 5.0, "MEDIUM": 0.0, "HIGH": -12.0, "EXTREME": -22.0}.get(risk, -4.0)
if odds < self.MIN_ODDS:
vetoes.append("odds_below_minimum")
if calibrated_conf < 38.0:
vetoes.append("calibrated_confidence_too_low")
if play_score < 50.0:
vetoes.append("play_score_too_low")
if divergence is not None:
if divergence >= self.HARD_DIVERGENCE:
score -= 42.0
vetoes.append("v25_v27_hard_disagreement")
elif divergence >= self.SOFT_DIVERGENCE:
score -= 18.0
issues.append("v25_v27_soft_disagreement")
else:
score += 11.0
positives.append("v25_v27_aligned")
if isinstance(triple, dict):
if triple_is_value:
score += 18.0
positives.append("triple_value_confirmed")
elif market in {"DC", "MS", "OU25", "BTTS"}:
score -= 18.0
issues.append("triple_value_not_confirmed")
if band_sample >= 25:
score += 8.0
positives.append("strong_historical_sample")
elif band_sample >= self.MIN_BAND_SAMPLE:
score += 3.0
positives.append("usable_historical_sample")
else:
score -= 16.0
issues.append("historical_sample_too_low")
if market == "DC":
vetoes.append("dc_without_historical_sample")
elif market in {"MS", "DC", "OU25"}:
score -= 10.0
issues.append("missing_triple_value_evidence")
if consensus == "DISAGREE" and market in {"MS", "DC"}:
score -= 12.0
issues.append("engine_consensus_disagree")
if (
model_prob is not None
and model_gap is not None
and model_prob >= self.EXTREME_MODEL_PROB
and model_gap >= self.EXTREME_GAP
and not triple_is_value
):
score -= 24.0
vetoes.append("extreme_probability_without_evidence")
if market in {"HT", "HTFT", "OE"} and score < 86.0:
vetoes.append("volatile_market_requires_exceptional_evidence")
score = max(0.0, min(100.0, score))
action = "BET"
if vetoes:
action = "REJECT"
elif score < self.MIN_WATCH_SCORE:
action = "REJECT"
elif score < self.MIN_BET_SCORE:
action = "WATCH"
row["betting_brain"] = {
"action": action,
"score": round(score, 1),
"summary": self._summary(action, market, pick, positives, issues, vetoes),
"positives": positives[:5],
"issues": issues[:6],
"vetoes": vetoes[:6],
"model_prob": round(model_prob, 4) if model_prob is not None else None,
"implied_prob": round(implied, 4),
"model_market_gap": round(model_gap, 4) if model_gap is not None else None,
"v27_prob": round(v27_prob, 4) if v27_prob is not None else None,
"divergence": round(divergence, 4) if divergence is not None else None,
"triple_key": triple_key,
"triple_value": triple,
}
if action != "BET":
self._force_no_bet(row, f"betting_brain_{action.lower()}")
else:
row["is_guaranteed"] = bool(score >= 82.0)
row["pick_reason"] = "betting_brain_approved"
row["stake_units"] = self._brain_stake(row, score)
row["bet_grade"] = "A" if score >= 82.0 else "B"
row["playable"] = True
self._append_reason(row, f"betting_brain_{action.lower()}_{round(score)}")
return row
def _collect_rows(self, package: Dict[str, Any]) -> List[Dict[str, Any]]:
rows: Dict[str, Dict[str, Any]] = {}
for source in ("main_pick", "value_pick"):
item = package.get(source)
if isinstance(item, dict) and item.get("market"):
rows[self._row_key(item)] = dict(item)
for source in ("supporting_picks", "bet_summary"):
for item in package.get(source) or []:
if isinstance(item, dict) and item.get("market"):
key = self._row_key(item)
rows[key] = self._merge_row(rows.get(key), item)
return list(rows.values())
@staticmethod
def _merge_row(existing: Optional[Dict[str, Any]], incoming: Dict[str, Any]) -> Dict[str, Any]:
if existing is None:
return dict(incoming)
merged = dict(incoming)
merged.update({k: v for k, v in existing.items() if v is not None})
for key in ("decision_reasons", "reasons"):
reasons = list(existing.get(key) or []) + list(incoming.get(key) or [])
if reasons:
merged[key] = list(dict.fromkeys(reasons))
return merged
def _pick_value_candidate(self, rows: Dict[str, Dict[str, Any]], main_key: str) -> Optional[Dict[str, Any]]:
candidates = [
row for key, row in rows.items()
if key != main_key
and row.get("betting_brain", {}).get("action") in {"BET", "WATCH"}
and (self._safe_float(row.get("odds"), 0.0) or 0.0) >= 1.60
]
candidates.sort(key=self._candidate_sort_key, reverse=True)
return dict(candidates[0]) if candidates else None
def _summary_item(self, row: Dict[str, Any]) -> Dict[str, Any]:
reasons = list(row.get("decision_reasons") or row.get("reasons") or [])
return {
"market": row.get("market"),
"pick": row.get("pick"),
"raw_confidence": row.get("raw_confidence", row.get("confidence")),
"calibrated_confidence": row.get("calibrated_confidence", row.get("confidence")),
"bet_grade": row.get("bet_grade", "PASS"),
"playable": bool(row.get("playable")),
"stake_units": float(row.get("stake_units", 0.0) or 0.0),
"play_score": row.get("play_score", 0.0),
"ev_edge": row.get("ev_edge", row.get("edge", 0.0)),
"implied_prob": row.get("implied_prob", 0.0),
"odds_reliability": row.get("odds_reliability", 0.35),
"odds": row.get("odds", 0.0),
"reasons": reasons[:6],
"betting_brain": row.get("betting_brain"),
}
@staticmethod
def _candidate_sort_key(row: Dict[str, Any]) -> Tuple[float, float, float]:
brain = row.get("betting_brain") or {}
action_boost = {"BET": 2.0, "WATCH": 1.0, "REJECT": 0.0}.get(str(brain.get("action")), 0.0)
return (
action_boost,
float(brain.get("score", 0.0) or 0.0),
float(row.get("play_score", 0.0) or 0.0),
)
@staticmethod
def _row_key(row: Optional[Dict[str, Any]]) -> str:
if not isinstance(row, dict):
return ""
return f"{row.get('market')}:{row.get('pick')}"
def _force_no_bet(self, row: Dict[str, Any], reason: str) -> None:
row["playable"] = False
row["stake_units"] = 0.0
row["bet_grade"] = "PASS"
row["is_guaranteed"] = False
row["pick_reason"] = reason
if row.get("signal_tier") == "CORE":
row["signal_tier"] = "PASS"
self._append_reason(row, reason)
@staticmethod
def _append_reason(row: Dict[str, Any], reason: str) -> None:
key = "decision_reasons" if "decision_reasons" in row else "reasons"
reasons = list(row.get(key) or [])
if reason not in reasons:
reasons.append(reason)
row[key] = reasons[:6]
def _brain_stake(self, row: Dict[str, Any], score: float) -> float:
existing = self._safe_float(row.get("stake_units"), 0.0) or 0.0
odds = self._safe_float(row.get("odds"), 0.0) or 0.0
if odds <= 1.0:
return 0.0
cap = 2.0 if score >= 82.0 else 1.2
if score < 78.0:
cap = 0.8
return round(max(0.25, min(existing if existing > 0 else cap, cap)), 1)
@staticmethod
def _decision_band(main_pick: Optional[Dict[str, Any]]) -> str:
if not main_pick:
return "LOW"
score = float((main_pick.get("betting_brain") or {}).get("score", 0.0) or 0.0)
if score >= 82.0:
return "HIGH"
if score >= 72.0:
return "MEDIUM"
return "LOW"
@staticmethod
def _top_decisions(decisions: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
ordered = sorted(decisions, key=lambda d: float(d.get("score", 0.0) or 0.0), reverse=True)
return [
{
"action": item.get("action"),
"score": item.get("score"),
"summary": item.get("summary"),
"vetoes": item.get("vetoes", []),
"issues": item.get("issues", []),
}
for item in ordered[:5]
]
@staticmethod
def _summary(action: str, market: str, pick: str, positives: List[str], issues: List[str], vetoes: List[str]) -> str:
if action == "BET":
return f"{market} {pick} approved: evidence is aligned enough for a controlled stake."
if action == "WATCH":
return f"{market} {pick} is interesting but not clean enough for stake."
if vetoes:
return f"{market} {pick} rejected: {', '.join(vetoes[:3])}."
if issues:
return f"{market} {pick} rejected: {', '.join(issues[:3])}."
return f"{market} {pick} rejected by evidence score."
def _market_probability(self, row: Dict[str, Any], package: Dict[str, Any]) -> Optional[float]:
direct = self._safe_float(row.get("probability"))
if direct is not None:
return direct
board = package.get("market_board") or {}
payload = board.get(str(row.get("market") or "")) if isinstance(board, dict) else None
probs = payload.get("probs") if isinstance(payload, dict) else None
if not isinstance(probs, dict):
return None
key = self._prob_key(str(row.get("market") or ""), str(row.get("pick") or ""))
return self._safe_float(probs.get(key)) if key else None
def _v27_probability(self, market: str, pick: str, v27_engine: Dict[str, Any]) -> Optional[float]:
predictions = v27_engine.get("predictions") or {}
ms = predictions.get("ms") or {}
ou25 = predictions.get("ou25") or {}
if market == "MS":
return self._safe_float(ms.get({"1": "home", "X": "draw", "2": "away"}.get(pick, "")))
if market == "DC":
home = self._safe_float(ms.get("home"), 0.0) or 0.0
draw = self._safe_float(ms.get("draw"), 0.0) or 0.0
away = self._safe_float(ms.get("away"), 0.0) or 0.0
return {"1X": home + draw, "X2": draw + away, "12": home + away}.get(pick)
if market == "OU25":
key = self._prob_key(market, pick)
return self._safe_float(ou25.get(key)) if key else None
return None
def _triple_value(self, package: Dict[str, Any], key: Optional[str]) -> Optional[Dict[str, Any]]:
if not key:
return None
value = ((package.get("v27_engine") or {}).get("triple_value") or {}).get(key)
return value if isinstance(value, dict) else None
def _triple_key(self, market: str, pick: str) -> Optional[str]:
prob_key = self._prob_key(market, pick)
if market == "MS":
return {"1": "home", "2": "away"}.get(pick)
if market == "DC" and pick.upper() in {"1X", "X2", "12"}:
return f"dc_{pick.lower()}"
if market in {"OU15", "OU25", "OU35"} and prob_key == "over":
return f"{market.lower()}_over"
if market == "BTTS" and prob_key == "yes":
return "btts_yes"
if market == "HT":
return {"1": "ht_home", "2": "ht_away"}.get(pick)
if market in {"HT_OU05", "HT_OU15"} and prob_key == "over":
return f"{market.lower()}_over"
if market == "OE" and prob_key == "odd":
return "oe_odd"
if market == "CARDS" and prob_key == "over":
return "cards_over"
if market == "HTFT" and "/" in pick:
return f"htft_{pick.replace('/', '').lower()}"
return None
@staticmethod
def _prob_key(market: str, pick: str) -> Optional[str]:
norm = str(pick or "").strip().casefold()
if market in {"MS", "HT", "HCAP"}:
return pick if pick in {"1", "X", "2"} else None
if market == "DC":
return pick.upper() if pick.upper() in {"1X", "X2", "12"} else None
if market in {"OU15", "OU25", "OU35", "HT_OU05", "HT_OU15", "CARDS"}:
if "over" in norm or "ust" in norm or "üst" in norm:
return "over"
if "under" in norm or "alt" in norm:
return "under"
if market == "BTTS":
if "yes" in norm or "var" in norm:
return "yes"
if "no" in norm or "yok" in norm:
return "no"
if market == "OE":
if "odd" in norm or "tek" in norm:
return "odd"
if "even" in norm or "cift" in norm or "çift" in norm:
return "even"
if market == "HTFT" and "/" in pick:
return pick
return None
@staticmethod
def _safe_float(value: Any, default: Optional[float] = None) -> Optional[float]:
try:
return float(value)
except (TypeError, ValueError):
return default
+696 -46
View File
@@ -30,12 +30,18 @@ from models.v20_ensemble import FullMatchPrediction
from models.v25_ensemble import V25Predictor, get_v25_predictor
from models.v27_predictor import V27Predictor, compute_divergence, compute_value_edge
from features.odds_band_analyzer import OddsBandAnalyzer
from models.basketball_v25 import (
try:
from models.basketball_v25 import (
BasketballMatchPrediction,
get_basketball_v25_predictor,
)
)
except ImportError:
BasketballMatchPrediction = Any
def get_basketball_v25_predictor():
raise ImportError("Basketball predictor is not available")
from core.engines.player_predictor import PlayerPrediction, get_player_predictor
from services.feature_enrichment import FeatureEnrichmentService
from services.betting_brain import BettingBrain
from services.v26_shadow_engine import V26ShadowEngine, get_v26_shadow_engine
from utils.top_leagues import load_top_league_ids
from utils.league_reliability import load_league_reliability
@@ -69,6 +75,7 @@ class MatchData:
substate: Optional[str] = None
current_score_home: Optional[int] = None
current_score_away: Optional[int] = None
lineup_confidence: float = 0.0
class SingleMatchOrchestrator:
@@ -144,7 +151,7 @@ class SingleMatchOrchestrator:
self.v26_shadow_engine: Optional[V26ShadowEngine] = None
self.basketball_predictor: Optional[Any] = None
self.dsn = get_clean_dsn()
self.engine_mode = str(os.getenv("AI_ENGINE_MODE", "v25")).strip().lower()
self.engine_mode = str(os.getenv("AI_ENGINE_MODE", "v28-pro-max")).strip().lower()
self.top_league_ids = load_top_league_ids()
self.league_reliability = load_league_reliability()
self.enrichment = FeatureEnrichmentService()
@@ -527,12 +534,18 @@ class SingleMatchOrchestrator:
}
def _get_squad_features(self, data: MatchData) -> Dict[str, float]:
"""Non-fatal squad analysis. Returns zero-defaults on failure."""
"""Non-fatal squad analysis. Returns neutral-average defaults on failure.
Design note (V32-fix): Previous 0.0 defaults caused the model to treat
missing lineups as 'both teams have zero quality', producing overly
conservative predictions (e.g. static 1.5 Under). Neutral averages let
the model fall back on stronger signals (odds, ELO, form, H2H).
"""
defaults = {
'home_squad_quality': 0.0, 'away_squad_quality': 0.0, 'squad_diff': 0.0,
'home_key_players': 0.0, 'away_key_players': 0.0,
'home_squad_quality': 0.50, 'away_squad_quality': 0.50, 'squad_diff': 0.0,
'home_key_players': 3.0, 'away_key_players': 3.0,
'home_missing_impact': 0.0, 'away_missing_impact': 0.0,
'home_goals_form': 0.0, 'away_goals_form': 0.0,
'home_goals_form': 1.3, 'away_goals_form': 1.3,
}
try:
engine = get_player_predictor()
@@ -559,27 +572,186 @@ class SingleMatchOrchestrator:
print(f"⚠️ Squad features failed: {e}")
return defaults
# ── V25 internal key → _build_v25_prediction key mapping ──
_V25_KEY_MAP = {
"ms": "MS",
"ou15": "OU15",
"ou25": "OU25",
"ou35": "OU35",
"btts": "BTTS",
"ht_result": "HT",
"ht_ou05": "HT_OU05",
"ht_ou15": "HT_OU15",
"htft": "HTFT",
"cards_ou45": "CARDS",
"handicap_ms": "HCAP",
"odd_even": "OE",
}
def _get_v25_signal(
self,
data: MatchData,
features: Optional[Dict[str, float]] = None,
) -> Dict[str, Any]:
"""
Get V25 ensemble predictions for all available markets.
Returns a dict keyed by UPPERCASE market name (MS, OU25, BTTS, etc.)
each with a 'probs' sub-dict that _prob_map can consume.
CRITICAL: Keys MUST be uppercase to match _build_v25_prediction lookups.
"""
v25 = self._get_v25_predictor()
feature_row = features or self._build_v25_features(data)
return v25.predict_market_bundle(
features=feature_row,
odds=self._sanitize_v25_odds(data.odds_data or {}),
)
signal: Dict[str, Any] = {}
def _temperature_scale(probs_dict: Dict[str, float], temperature: float = 2.5) -> Dict[str, float]:
"""
Apply temperature scaling to soften overconfident model outputs.
LightGBM often produces extreme probabilities (e.g., 0.999 / 0.001).
Temperature scaling converts to log-odds, divides by T, then re-normalizes.
T=1.0 no change, T>1 softer probabilities.
Standard approach for post-hoc model calibration (Guo et al., 2017).
"""
import math
eps = 1e-7 # numerical stability
n = len(probs_dict)
# Determine appropriate temperature based on market type
# Binary markets (2-class) tend to be more overconfident in LGB
if n <= 2:
T = max(temperature, 2.0)
elif n == 3:
T = max(temperature * 0.8, 1.5) # 3-way slightly less aggressive
else:
T = max(temperature * 0.6, 1.3) # 9-way (HTFT) already spread
# Convert to log-odds and apply temperature
labels = list(probs_dict.keys())
log_odds = []
for label in labels:
p = max(eps, min(1.0 - eps, float(probs_dict[label])))
log_odds.append(math.log(p) / T)
# Softmax re-normalization
max_lo = max(log_odds)
exp_vals = [math.exp(lo - max_lo) for lo in log_odds]
total = sum(exp_vals)
scaled = {}
for i, label in enumerate(labels):
scaled[label] = exp_vals[i] / total
return scaled
def _enrich_signal_entry(probs_dict: Dict[str, float]) -> Dict[str, Any]:
"""Add pick, probability, confidence to a signal entry from its probs.
Applies temperature scaling to convert overconfident LightGBM outputs
into realistic, calibrated probabilities.
"""
# Apply temperature scaling to soften extreme probabilities
scaled_probs = _temperature_scale(probs_dict, temperature=2.5)
best_label = max(scaled_probs, key=scaled_probs.get)
best_prob = float(scaled_probs[best_label])
return {
"probs": scaled_probs,
"raw_probs": probs_dict, # keep originals for debugging
"pick": best_label,
"probability": best_prob,
"confidence": round(best_prob * 100.0, 1),
}
# Core markets using dedicated methods
h, d, a = v25.predict_ms(feature_row)
signal["MS"] = _enrich_signal_entry({"1": h, "X": d, "2": a})
print(f" [V25-SIGNAL] MS → H={h:.4f} D={d:.4f} A={a:.4f}")
over25, under25 = v25.predict_ou25(feature_row)
signal["OU25"] = _enrich_signal_entry({"Over": over25, "Under": under25})
print(f" [V25-SIGNAL] OU25 → O={over25:.4f} U={under25:.4f}")
btts_y, btts_n = v25.predict_btts(feature_row)
signal["BTTS"] = _enrich_signal_entry({"Yes": btts_y, "No": btts_n})
print(f" [V25-SIGNAL] BTTS → Y={btts_y:.4f} N={btts_n:.4f}")
# Additional markets via generic predict_market
for model_key, label_map in [
("ou15", {"Over": 0, "Under": None}),
("ou35", {"Over": 0, "Under": None}),
("ht_result", {"1": 0, "X": 1, "2": 2}),
("ht_ou05", {"Over": 0, "Under": None}),
("ht_ou15", {"Over": 0, "Under": None}),
("htft", None),
("cards_ou45", {"Over": 0, "Under": None}),
("handicap_ms", {"1": 0, "X": 1, "2": 2}),
("odd_even", {"Odd": 0, "Even": None}),
]:
out_key = self._V25_KEY_MAP.get(model_key, model_key.upper())
if not v25.has_market(model_key):
continue
raw = v25.predict_market(model_key, feature_row)
if raw is None:
continue
if label_map is None:
# HTFT — 9 combinations
htft_labels = ["1/1", "1/X", "1/2", "X/1", "X/X", "X/2", "2/1", "2/X", "2/2"]
probs_dict = {}
for i, label in enumerate(htft_labels):
probs_dict[label] = float(raw[i]) if i < len(raw) else 0.0
signal[out_key] = _enrich_signal_entry(probs_dict)
elif len(label_map) == 2:
# Binary market
labels = list(label_map.keys())
p = float(raw[0]) if len(raw) >= 1 else None
if p is None:
print(f" [V25-SIGNAL] {out_key} → EMPTY raw output, skipped")
continue
signal[out_key] = _enrich_signal_entry({labels[0]: p, labels[1]: 1.0 - p})
elif len(label_map) == 3:
# 3-class market
labels = list(label_map.keys())
probs_dict = {}
for i, label in enumerate(labels):
if i >= len(raw):
print(f" [V25-SIGNAL] {out_key} → insufficient probabilities in raw output")
break
probs_dict[label] = float(raw[i])
else:
signal[out_key] = _enrich_signal_entry(probs_dict)
if out_key in signal:
print(f" [V25-SIGNAL] {out_key}{signal[out_key]['probs']}")
print(f" [V25-SIGNAL] Total markets with real predictions: {len(signal)}")
if not signal:
raise RuntimeError("V25 model produced ZERO market predictions — cannot continue")
return signal
@staticmethod
def _prob_map(signal: Optional[Dict[str, Any]], market: str, defaults: Dict[str, float]) -> Dict[str, float]:
"""Extract normalised probabilities from signal.
If the signal contains real model output for this market, use it.
If the market is missing from the signal, log a warning and return
the defaults as a LAST RESORT (so the pipeline doesn't crash).
The defaults are ONLY used for non-core / secondary markets that
may not have a trained model yet (e.g. CARDS, HCAP, OE).
"""
market_payload = signal.get(market, {}) if isinstance(signal, dict) else {}
probs = market_payload.get("probs", {}) if isinstance(market_payload, dict) else {}
if not isinstance(probs, dict) or not probs:
print(f" ⚠️ [PROB_MAP] Market '{market}' NOT found in V25 signal — model output missing")
return dict(defaults)
out = {key: float(probs.get(key, value)) for key, value in defaults.items()}
total = sum(out.values())
if total <= 0:
print(f" ⚠️ [PROB_MAP] Market '{market}' has zero total probability")
return dict(defaults)
return {key: value / total for key, value in out.items()}
@@ -730,7 +902,8 @@ class SingleMatchOrchestrator:
prediction.cards_confidence,
prediction.handicap_confidence,
)
lineup_penalty = 12.0 if data.lineup_source == "none" else 7.0 if data.lineup_source == "probable_xi" else 0.0
lineup_conf = max(0.0, min(1.0, float(getattr(data, "lineup_confidence", 0.0) or 0.0)))
lineup_penalty = 12.0 if data.lineup_source == "none" else max(1.5, (1.0 - lineup_conf) * 8.0) if data.lineup_source == "probable_xi" else 0.0
referee_penalty = 6.0 if not data.referee_name else 0.0
parity_penalty = 8.0 if abs(ms_edge) < 0.08 else 0.0
prediction.risk_score = round(min(100.0, max(10.0, 100.0 - max_market_conf + lineup_penalty + referee_penalty + parity_penalty)), 1)
@@ -747,6 +920,8 @@ class SingleMatchOrchestrator:
prediction.risk_warnings = []
if data.lineup_source == "probable_xi":
prediction.risk_warnings.append("lineup_probable_not_confirmed")
if lineup_conf < 0.65:
prediction.risk_warnings.append("lineup_projection_low_confidence")
if data.lineup_source == "none":
prediction.risk_warnings.append("lineup_unavailable")
if not data.referee_name:
@@ -1142,7 +1317,9 @@ class SingleMatchOrchestrator:
if band_val.get("is_value"):
boost = min(8.0, boost + 3.0) # Triple confirmation extra boost
prediction.ms_confidence = min(95.0, prediction.ms_confidence + boost)
base_package["prediction"]["ms_confidence"] = prediction.ms_confidence
market_board = base_package.get("market_board")
if isinstance(market_board, dict) and isinstance(market_board.get("MS"), dict):
market_board["MS"]["confidence"] = round(float(prediction.ms_confidence), 1)
base_package["v27_engine"]["consensus"] = "AGREE"
else:
base_package["v27_engine"]["consensus"] = "DISAGREE"
@@ -1157,8 +1334,10 @@ class SingleMatchOrchestrator:
base_package.setdefault("analysis_details", {})
base_package["analysis_details"]["v27_loaded"] = False
mode = str(getattr(self, "engine_mode", "v25") or "v25").lower()
if mode not in {"v25", "v26", "dual"}:
base_package = self._apply_upper_brain_guards(base_package)
mode = str(getattr(self, "engine_mode", "v28-pro-max") or "v28-pro-max").lower()
if mode not in {"v25", "v26", "dual", "v28", "v28-pro-max"}:
mode = "v25"
quality = base_package.get("data_quality", self._compute_data_quality(data))
@@ -1185,6 +1364,304 @@ class SingleMatchOrchestrator:
return merged
return base_package
def _apply_upper_brain_guards(self, package: Dict[str, Any]) -> Dict[str, Any]:
return BettingBrain().judge(package)
v27_engine = package.get("v27_engine")
if not isinstance(v27_engine, dict) or not v27_engine.get("triple_value"):
return package
guarded = dict(package)
vetoed_keys = set()
guarded_keys = set()
def mark_guard(item: Dict[str, Any]) -> Dict[str, Any]:
if not isinstance(item, dict):
return item
out = dict(item)
assessment = self._upper_brain_assessment(out, guarded)
if not assessment.get("applies"):
return out
key = f"{out.get('market')}:{out.get('pick')}"
guarded_keys.add(key)
out["upper_brain"] = assessment
reason_key = "decision_reasons" if "decision_reasons" in out else "reasons"
reasons = list(out.get(reason_key) or [])
for reason in assessment.get("reason_codes", []):
if reason not in reasons:
reasons.append(reason)
out[reason_key] = reasons[:6]
if assessment.get("veto"):
vetoed_keys.add(key)
out["playable"] = False
out["stake_units"] = 0.0
out["bet_grade"] = "PASS"
out["is_guaranteed"] = False
out["pick_reason"] = "upper_brain_veto"
if "signal_tier" in out:
out["signal_tier"] = "PASS"
elif assessment.get("downgrade"):
out["is_guaranteed"] = False
if out.get("signal_tier") == "CORE":
out["signal_tier"] = "LEAN"
if out.get("pick_reason") == "high_accuracy_market":
out["pick_reason"] = "upper_brain_downgraded"
return out
main_pick = mark_guard(guarded.get("main_pick") or {})
value_pick = mark_guard(guarded.get("value_pick") or {}) if guarded.get("value_pick") else None
supporting = [
mark_guard(row)
for row in list(guarded.get("supporting_picks") or [])
if isinstance(row, dict)
]
bet_summary = [
mark_guard(row)
for row in list(guarded.get("bet_summary") or [])
if isinstance(row, dict)
]
main_safe = bool(main_pick and main_pick.get("playable") and not main_pick.get("upper_brain", {}).get("veto"))
if not main_safe:
candidates = [
row for row in supporting
if row.get("playable")
and not row.get("upper_brain", {}).get("veto")
and float(row.get("odds", 0.0) or 0.0) >= 1.30
]
candidates.sort(key=lambda row: float(row.get("play_score", 0.0) or 0.0), reverse=True)
if candidates:
main_pick = dict(candidates[0])
main_pick["is_guaranteed"] = False
main_pick["pick_reason"] = "upper_brain_reselected"
reasons = list(main_pick.get("decision_reasons") or [])
if "upper_brain_reselected_after_veto" not in reasons:
reasons.append("upper_brain_reselected_after_veto")
main_pick["decision_reasons"] = reasons[:6]
elif main_pick:
main_pick["is_guaranteed"] = False
main_pick["pick_reason"] = "upper_brain_no_safe_pick"
if main_pick:
supporting = [
row for row in supporting
if not (
row.get("market") == main_pick.get("market")
and row.get("pick") == main_pick.get("pick")
)
][:6]
guarded["main_pick"] = main_pick if main_pick else None
guarded["value_pick"] = value_pick
guarded["supporting_picks"] = supporting
guarded["bet_summary"] = bet_summary
playable = bool(main_pick and main_pick.get("playable") and not main_pick.get("upper_brain", {}).get("veto"))
advice = dict(guarded.get("bet_advice") or {})
advice["playable"] = playable
advice["suggested_stake_units"] = float(main_pick.get("stake_units", 0.0)) if playable else 0.0
if playable:
advice["reason"] = "playable_pick_found"
elif vetoed_keys:
advice["reason"] = "upper_brain_no_safe_pick"
else:
advice["reason"] = "no_bet_conditions_met"
guarded["bet_advice"] = advice
guarded["upper_brain"] = {
"applied": True,
"guarded_count": len(guarded_keys),
"vetoed_count": len(vetoed_keys),
"vetoed": sorted(vetoed_keys)[:8],
"rules": {
"min_band_sample": 8,
"max_v25_v27_divergence": 0.18,
"dc_requires_triple_value": True,
},
}
guarded.setdefault("analysis_details", {})
guarded["analysis_details"]["upper_brain_guards_applied"] = True
guarded["analysis_details"]["upper_brain_vetoed_count"] = len(vetoed_keys)
return guarded
def _upper_brain_assessment(
self,
item: Dict[str, Any],
package: Dict[str, Any],
) -> Dict[str, Any]:
market = str(item.get("market") or "")
pick = str(item.get("pick") or "")
if not market or not pick:
return {"applies": False}
v27_engine = package.get("v27_engine") or {}
triple_value = v27_engine.get("triple_value") or {}
model_prob = self._upper_brain_market_probability(item, package)
v27_prob = self._upper_brain_v27_probability(market, pick, v27_engine)
triple_key = self._upper_brain_triple_key(market, pick)
triple = triple_value.get(triple_key) if triple_key else None
veto = False
downgrade = False
reasons: List[str] = []
divergence = None
if model_prob is not None and v27_prob is not None:
divergence = abs(float(model_prob) - float(v27_prob))
if divergence >= 0.18:
veto = True
reasons.append("upper_brain_v25_v27_divergence")
elif divergence >= 0.12:
downgrade = True
reasons.append("upper_brain_v25_v27_warning")
if isinstance(triple, dict):
band_sample = int(float(triple.get("band_sample", 0) or 0))
is_value = bool(triple.get("is_value"))
if market == "DC":
if band_sample < 8:
veto = True
reasons.append("upper_brain_band_sample_too_low")
elif not is_value:
veto = True
reasons.append("upper_brain_triple_value_rejected")
elif market in {"MS", "OU25"} and band_sample > 0 and band_sample < 8:
downgrade = True
reasons.append("upper_brain_band_sample_thin")
elif market in {"OU15", "HT_OU05"} and band_sample < 8:
downgrade = True
reasons.append("upper_brain_band_sample_thin")
consensus = str(v27_engine.get("consensus") or "").upper()
if consensus == "DISAGREE" and market in {"MS", "DC"} and not veto:
downgrade = True
reasons.append("upper_brain_consensus_disagree")
applies = bool(reasons or triple is not None or v27_prob is not None)
return {
"applies": applies,
"veto": veto,
"downgrade": downgrade,
"reason_codes": reasons,
"model_prob": round(float(model_prob), 4) if model_prob is not None else None,
"v27_prob": round(float(v27_prob), 4) if v27_prob is not None else None,
"divergence": round(float(divergence), 4) if divergence is not None else None,
"triple_key": triple_key,
"triple_value": triple,
}
def _upper_brain_market_probability(
self,
item: Dict[str, Any],
package: Dict[str, Any],
) -> Optional[float]:
raw_prob = item.get("probability")
if raw_prob is not None:
try:
return float(raw_prob)
except (TypeError, ValueError):
pass
market = str(item.get("market") or "")
pick = str(item.get("pick") or "")
board = package.get("market_board") or {}
payload = board.get(market) if isinstance(board, dict) else None
probs = payload.get("probs") if isinstance(payload, dict) else None
if not isinstance(probs, dict):
return None
prob_key = self._upper_brain_prob_key(market, pick)
if prob_key is None:
return None
try:
return float(probs.get(prob_key))
except (TypeError, ValueError):
return None
def _upper_brain_v27_probability(
self,
market: str,
pick: str,
v27_engine: Dict[str, Any],
) -> Optional[float]:
predictions = v27_engine.get("predictions") or {}
ms = predictions.get("ms") or {}
ou25 = predictions.get("ou25") or {}
if market == "MS":
return self._safe_float(ms.get({"1": "home", "X": "draw", "2": "away"}.get(pick, "")))
if market == "DC":
if pick == "1X":
return self._safe_float(ms.get("home"), 0.0) + self._safe_float(ms.get("draw"), 0.0)
if pick == "X2":
return self._safe_float(ms.get("draw"), 0.0) + self._safe_float(ms.get("away"), 0.0)
if pick == "12":
return self._safe_float(ms.get("home"), 0.0) + self._safe_float(ms.get("away"), 0.0)
if market == "OU25":
prob_key = self._upper_brain_prob_key(market, pick)
return self._safe_float(ou25.get(prob_key)) if prob_key else None
return None
@staticmethod
def _upper_brain_prob_key(market: str, pick: str) -> Optional[str]:
pick_norm = str(pick or "").strip().casefold()
if market in {"MS", "HT", "HCAP"}:
return pick if pick in {"1", "X", "2"} else None
if market == "DC":
return pick.upper() if pick.upper() in {"1X", "X2", "12"} else None
if market in {"OU15", "OU25", "OU35", "HT_OU05", "HT_OU15", "CARDS"}:
if "over" in pick_norm or "st" in pick_norm:
return "over"
if "under" in pick_norm or "alt" in pick_norm:
return "under"
if market == "BTTS":
if "yes" in pick_norm or "var" in pick_norm:
return "yes"
if "no" in pick_norm or "yok" in pick_norm:
return "no"
if market == "OE":
if "odd" in pick_norm or "tek" in pick_norm:
return "odd"
if "even" in pick_norm or "ift" in pick_norm:
return "even"
if market == "HTFT" and "/" in pick:
return pick
return None
def _upper_brain_triple_key(self, market: str, pick: str) -> Optional[str]:
prob_key = self._upper_brain_prob_key(market, pick)
if market == "MS":
return {"1": "home", "2": "away"}.get(pick)
if market == "DC":
return f"dc_{pick.lower()}" if pick.upper() in {"1X", "X2", "12"} else None
if market in {"OU15", "OU25", "OU35"} and prob_key == "over":
return f"{market.lower()}_over"
if market == "BTTS" and prob_key == "yes":
return "btts_yes"
if market == "HT":
return {"1": "ht_home", "2": "ht_away"}.get(pick)
if market in {"HT_OU05", "HT_OU15"} and prob_key == "over":
return f"{market.lower()}_over"
if market == "OE" and prob_key == "odd":
return "oe_odd"
if market == "CARDS" and prob_key == "over":
return "cards_over"
if market == "HTFT" and "/" in pick:
return f"htft_{pick.replace('/', '').lower()}"
return None
@staticmethod
def _safe_float(value: Any, default: Optional[float] = None) -> Optional[float]:
try:
return float(value)
except (TypeError, ValueError):
return default
def analyze_match_htms(self, match_id: str) -> Optional[Dict[str, Any]]:
"""
HT/MS focused response for upset-hunting workflows.
@@ -2104,7 +2581,7 @@ class SingleMatchOrchestrator:
return None
odds_data = self._extract_odds(cur, row)
home_lineup, away_lineup, lineup_source = self._extract_lineups(cur, row)
home_lineup, away_lineup, lineup_source, lineup_confidence = self._extract_lineups(cur, row)
sidelined = self._parse_json_dict(row.get("sidelined"))
match_date_ms = int(row.get("match_date_ms") or 0)
league_id = str(row.get("league_id")) if row.get("league_id") else None
@@ -2159,6 +2636,7 @@ class SingleMatchOrchestrator:
status=str(row.get("status") or ""),
state=row.get("state"),
substate=row.get("substate"),
lineup_confidence=lineup_confidence,
current_score_home=(
int(row.get("score_home"))
if row.get("score_home") is not None
@@ -2291,13 +2769,26 @@ class SingleMatchOrchestrator:
self,
cur: RealDictCursor,
row: Dict[str, Any],
) -> Tuple[Optional[List[str]], Optional[List[str]], str]:
) -> Tuple[Optional[List[str]], Optional[List[str]], str, float]:
live_lineups = row.get("lineups")
home, away = self._parse_lineups_json(live_lineups)
status_upper = str(row.get("status") or "").upper()
state_upper = str(row.get("state") or "").upper()
substate_upper = str(row.get("substate") or "").upper()
can_trust_feed_lineups = (
status_upper in {"LIVE", "1H", "2H", "HT", "FT", "FINISHED"}
or state_upper in {"LIVE", "FIRSTHALF", "SECONDHALF", "POSTGAME", "POST_GAME"}
or substate_upper in {"LIVE", "FIRSTHALF", "SECONDHALF"}
)
home, away = self._parse_lineups_json(live_lineups) if can_trust_feed_lineups else (None, None)
if (home and len(home) >= 9) and (away and len(away) >= 9):
return home, away, "confirmed_live"
return home, away, "confirmed_live", 1.0
# fallback 1: current match participation table
home_id = str(row["home_team_id"])
away_id = str(row["away_team_id"])
# fallback 1: current match participation table.
# Trust this only for live/finished matches; pre-match rows can be stale feed snapshots.
if can_trust_feed_lineups:
cur.execute(
"""
SELECT team_id, player_id
@@ -2307,8 +2798,6 @@ class SingleMatchOrchestrator:
""",
(row["match_id"],),
)
home_id = str(row["home_team_id"])
away_id = str(row["away_team_id"])
rows = cur.fetchall()
if rows:
home_players = [str(r["player_id"]) for r in rows if str(r["team_id"]) == home_id]
@@ -2318,21 +2807,40 @@ class SingleMatchOrchestrator:
if not away and away_players:
away = away_players
if (home and len(home) >= 9) and (away and len(away) >= 9):
return home, away, "confirmed_participation"
return home, away, "confirmed_participation", 0.98
# fallback 2: probable XI from historical starts before match date
before_date_ms = int(row.get("match_date_ms") or 0)
sidelined = self._parse_json_dict(row.get("sidelined")) or {}
home_excluded = self._sidelined_player_ids(sidelined.get("homeTeam"))
away_excluded = self._sidelined_player_ids(sidelined.get("awayTeam"))
used_probable = False
if not home:
home = self._build_probable_xi(cur, home_id, before_date_ms)
home_conf = 0.0
away_conf = 0.0
if not home or len(home) < 9:
home, home_conf = self._build_probable_xi(
cur,
home_id,
before_date_ms,
excluded_player_ids=home_excluded,
)
used_probable = used_probable or bool(home)
if not away:
away = self._build_probable_xi(cur, away_id, before_date_ms)
if not away or len(away) < 9:
away, away_conf = self._build_probable_xi(
cur,
away_id,
before_date_ms,
excluded_player_ids=away_excluded,
)
used_probable = used_probable or bool(away)
if used_probable:
return home, away, "probable_xi"
return home, away, "none"
inferred_conf = min(
home_conf if home else 0.0,
away_conf if away else 0.0,
)
return home, away, "probable_xi", inferred_conf
return home, away, "none", 0.0
def _calculate_team_form(
self,
@@ -2445,35 +2953,172 @@ class SingleMatchOrchestrator:
cur: RealDictCursor,
team_id: str,
before_date_ms: int,
max_days: int = 30,
) -> Optional[List[str]]:
match_limit: int = 5,
lookback_days: int = 370,
max_staleness_days: int = 120,
excluded_player_ids: Optional[Set[str]] = None,
) -> Tuple[Optional[List[str]], float]:
if not team_id:
return None
return None, 0.0
min_date_ms = max(0, before_date_ms - (lookback_days * 24 * 60 * 60 * 1000))
min_date_ms = max(0, before_date_ms - (max_days * 24 * 60 * 60 * 1000))
cur.execute(
"""
SELECT
mpp.player_id,
COUNT(*) AS starts,
MAX(m.mst_utc) AS last_start_ms
m.id AS match_id,
m.mst_utc,
m.home_team_id,
m.away_team_id
FROM match_player_participation mpp
JOIN matches m ON m.id = mpp.match_id
WHERE mpp.team_id = %s
AND mpp.is_starting = true
AND m.status = 'FT'
AND m.mst_utc < %s
AND m.mst_utc >= %s
GROUP BY mpp.player_id
ORDER BY starts DESC, last_start_ms DESC
LIMIT 11
AND NOT EXISTS (
SELECT 1
FROM match_player_participation later_mpp
JOIN matches later_m ON later_m.id = later_mpp.match_id
WHERE later_mpp.player_id = mpp.player_id
AND later_mpp.team_id <> %s
AND later_m.mst_utc > m.mst_utc
AND later_m.mst_utc < %s
AND (
later_m.status = 'FT'
OR later_m.state = 'postGame'
OR (later_m.score_home IS NOT NULL AND later_m.score_away IS NOT NULL)
)
)
AND m.id IN (
SELECT m2.id
FROM matches m2
JOIN match_player_participation recent_mpp
ON recent_mpp.match_id = m2.id
AND recent_mpp.team_id = %s
AND recent_mpp.is_starting = true
WHERE (m2.home_team_id = %s OR m2.away_team_id = %s)
AND (
m2.status = 'FT'
OR m2.state = 'postGame'
OR (m2.score_home IS NOT NULL AND m2.score_away IS NOT NULL)
)
AND m2.mst_utc < %s
AND m2.mst_utc >= %s
GROUP BY m2.id
HAVING COUNT(recent_mpp.*) >= 9
ORDER BY MAX(m2.mst_utc) DESC
LIMIT %s
)
ORDER BY m.mst_utc DESC
""",
(team_id, before_date_ms, min_date_ms),
(
team_id,
team_id,
before_date_ms,
team_id,
team_id,
team_id,
before_date_ms,
min_date_ms,
match_limit,
),
)
rows = cur.fetchall()
if not rows:
return None
return [str(r["player_id"]) for r in rows]
return None, 0.0
latest_mst = max(int(row.get("mst_utc") or 0) for row in rows)
age_days = (before_date_ms - latest_mst) / (24 * 60 * 60 * 1000)
stale_projection = age_days > max_staleness_days
excluded = {str(pid) for pid in (excluded_player_ids or set()) if pid}
match_order: Dict[str, int] = {}
for row in rows:
match_id = str(row["match_id"])
if match_id not in match_order:
match_order[match_id] = len(match_order)
player_scores: Dict[str, Dict[str, float]] = {}
for row in rows:
player_id = str(row["player_id"])
if player_id in excluded:
continue
idx = match_order.get(str(row["match_id"]), match_limit)
recency_weight = max(1.0, float(match_limit - idx))
score = recency_weight
if idx == 0:
score += 3.0
elif idx == 1:
score += 1.5
stats = player_scores.setdefault(
player_id,
{
"score": 0.0,
"starts": 0.0,
"last_seen_rank": float(idx),
},
)
stats["score"] += score
stats["starts"] += 1.0
stats["last_seen_rank"] = min(stats["last_seen_rank"], float(idx))
if not player_scores:
return None, 0.0
ranked = sorted(
player_scores.items(),
key=lambda item: (
item[1]["score"],
item[1]["starts"],
-item[1]["last_seen_rank"],
),
reverse=True,
)
lineup = [player_id for player_id, _ in ranked[:11]]
coverage = min(1.0, len(lineup) / 11.0)
available_matches = max(1, len(match_order))
history_score = min(1.0, available_matches / float(match_limit))
core_stability = 0.0
if ranked:
stable_core = sum(1 for _, stats in ranked[:11] if stats["starts"] >= 2.0)
core_stability = stable_core / 11.0
staleness_factor = max(
0.35,
min(1.0, float(max_staleness_days) / max(age_days, 1.0)),
)
confidence = (
(coverage * 0.45) + (history_score * 0.25) + (core_stability * 0.30)
) * staleness_factor
if excluded:
confidence *= 0.92
confidence_cap = 0.58 if stale_projection else 0.88
return lineup or None, round(max(0.0, min(confidence_cap, confidence)), 3)
@staticmethod
def _sidelined_player_ids(team_data: Any) -> Set[str]:
if not isinstance(team_data, dict):
return set()
players = team_data.get("players")
if not isinstance(players, list):
return set()
ids: Set[str] = set()
for player in players:
if not isinstance(player, dict):
continue
player_id = (
player.get("playerId")
or player.get("player_id")
or player.get("id")
or player.get("personId")
)
if player_id:
ids.add(str(player_id))
return ids
def _parse_odds_json(self, odds_json: Any) -> Dict[str, float]:
odds_json = self._parse_json_dict(odds_json)
@@ -4267,7 +4912,8 @@ class SingleMatchOrchestrator:
lineup_sensitive = market in ("MS", "BTTS", "HT", "HTFT")
lineup_penalty = 5.0 if lineup_missing and lineup_sensitive else 0.0
if data.lineup_source == "probable_xi" and lineup_sensitive:
lineup_penalty += 4.0
lineup_conf = max(0.0, min(1.0, float(getattr(data, "lineup_confidence", 0.0) or 0.0)))
lineup_penalty += max(1.0, (1.0 - lineup_conf) * 5.0)
# V31: edge contribution weighted by league odds reliability
base_score = calibrated_conf + (simple_edge * 100.0 * edge_multiplier)
@@ -4438,8 +5084,11 @@ class SingleMatchOrchestrator:
away_n = len(data.away_lineup or [])
lineup_score = min(home_n, away_n) / 11.0 if min(home_n, away_n) > 0 else 0.0
if data.lineup_source == "probable_xi":
lineup_score *= 0.55
lineup_conf = max(0.0, min(1.0, float(getattr(data, "lineup_confidence", 0.0) or 0.0)))
lineup_score *= max(0.45, min(0.88, lineup_conf))
flags.append("lineup_probable_not_confirmed")
if lineup_conf < 0.65:
flags.append("lineup_projection_low_confidence")
elif data.lineup_source == "none":
flags.append("lineup_unavailable")
if lineup_score < 0.7:
@@ -4464,6 +5113,7 @@ class SingleMatchOrchestrator:
"home_lineup_count": home_n,
"away_lineup_count": away_n,
"lineup_source": data.lineup_source,
"lineup_confidence": round(float(getattr(data, "lineup_confidence", 0.0) or 0.0), 3),
"flags": flags,
}
+370
View File
@@ -0,0 +1,370 @@
# V28-Pro-Max Model Architecture Documentation
> **Model Version:** `v28-pro-max`
> **Engine File:** `ai-engine/services/single_match_orchestrator.py` (4656 satır)
> **Son Güncelleme:** 2026-04-24
---
## 1. Genel Bakış
V28-Pro-Max, üç bağımsız tahmin katmanını (V25, V27, V28) tek bir orchestrator içinde birleştiren **üçlü hibrit AI tahmin motorudur**. Her maç için 13+ bahis pazarını analiz eder, olasılık hesaplar, risk değerlendirir ve "Value Bet" tespiti yapar.
```
┌─────────────────────────────────────────────────────┐
│ SingleMatchOrchestrator │
│ │
│ ┌──────────┐ ┌──────────┐ ┌────────────────┐ │
│ │ V25 │ │ V27 │ │ V28 │ │
│ │ Ensemble │ │ Dual-Eng │ │ Odds-Band │ │
│ │ (XGB+LGB)│ │ Divergnce│ │ Historical │ │
│ └────┬─────┘ └────┬─────┘ └───────┬────────┘ │
│ │ │ │ │
│ └──────────────┼────────────────┘ │
│ ▼ │
│ FullMatchPrediction │
│ │ │
│ ┌───────────┼───────────┐ │
│ ▼ ▼ ▼ │
│ Market Rows Risk Calc Triple Value │
│ │ │ │ │
│ └───────────┼───────────┘ │
│ ▼ │
│ _build_prediction_package() │
│ → JSON Response (v28-pro-max) │
└─────────────────────────────────────────────────────┘
```
---
## 2. Katman Detayları
### 2.1 V25 — Ensemble ML Katmanı
**Dosya:** `ai-engine/models/v25_ensemble.py`
- **Algoritmalar:** XGBoost + LightGBM ensemble
- **Girdi:** Pre-match feature vektörü (form, elo, odds, kadro, hakem vb.)
- **Çıktı:** Tüm pazarlar için olasılık dağılımları + confidence skorları
- **Özellik:** Odds-aware (bahis oranlarını feature olarak kullanır)
- **Target leakage koruması:** Maç sonucu bilgisi asla feature olarak kullanılmaz
```python
# V25 çağrılma noktası (orchestrator L310-315)
v25_signal = v25_predictor.predict(features)
# Çıktı: {MS: {home: 0.45, draw: 0.28, away: 0.27}, OU25: {...}, BTTS: {...}, ...}
```
### 2.2 V27 — Dual-Engine Divergence Katmanı
**Dosya:** `ai-engine/models/v27_predictor.py`
- **Amaç:** Odds-FREE temel olasılıkları hesaplar (sadece form/elo/kadro)
- **Mekanizma:** V25 (odds-aware) vs V27 (odds-free) karşılaştırması
- **Divergence Tespiti:** İki motor arasındaki fark → bahisçinin fiyatlandırma hatasını tespit eder
- **Çıktı:** `ms_divergence`, `ou25_divergence`, `is_value` sinyalleri
```python
# Divergence hesaplama (orchestrator L830-863)
ms_divergence = {
"home": v25_home_prob - v27_home_prob, # Pozitif = V25 bahisçiyle hemfikir
"away": v25_away_prob - v27_away_prob, # Negatif = Model bahisçiden farklı düşünüyor
}
ms_value = {
"home": {"is_value": v27_home > implied_home and abs(div) > 0.05},
"away": {"is_value": v27_away > implied_away and abs(div) > 0.05},
}
```
### 2.3 V28 — Odds-Band Historical Performance Katmanı
**Dosya:** `ai-engine/features/odds_band_analyzer.py`
- **Amaç:** "Bu oran bandında tarihsel olarak ne oldu?" sorusunu yanıtlar
- **Mekanizma:** Maçın mevcut oranını bir banda yerleştirir (ör: MS Home 1.70-1.90), ardından veritabanındaki aynı banddaki geçmiş maçları sorgular
- **Sorgu:** PostgreSQL üzerinden takım-spesifik tarihsel performans
```python
# OddsBandAnalyzer.compute_all() çıktısı — 18 pazar için band metrikleri:
{
"home_band_ms_win_rate": 0.62, # Ev sahibi bu oran bandında %62 kazanmış
"home_band_ms_sample": 34, # 34 maçlık örneklem
"band_ou25_over_rate": 0.58, # Bu banddaki maçların %58'i 2.5 üst
"band_btts_yes_rate": 0.51, # KG Var oranı
"band_htft_11_rate": 0.28, # İY/MS 1/1 oranı
"band_cards_referee_avg": 4.2, # Hakem kart ortalaması
# ... toplam 60+ feature
}
```
---
## 3. Analiz Edilen Bahis Pazarları (13+)
| # | Pazar | Kod | Olasılık Alanları | Odds Anahtarları |
|---|-------|-----|-------------------|------------------|
| 1 | Maç Sonucu | `MS` | home/draw/away | ms_h, ms_d, ms_a |
| 2 | Çifte Şans | `DC` | 1X/X2/12 | dc_1x, dc_x2, dc_12 |
| 3 | Üst/Alt 1.5 | `OU15` | over/under | ou15_o, ou15_u |
| 4 | Üst/Alt 2.5 | `OU25` | over/under | ou25_o, ou25_u |
| 5 | Üst/Alt 3.5 | `OU35` | over/under | ou35_o, ou35_u |
| 6 | Karşılıklı Gol | `BTTS` | yes/no | btts_y, btts_n |
| 7 | İlk Yarı Sonucu | `HT` | 1/X/2 | ht_h, ht_d, ht_a |
| 8 | İY/MS (9 kombo) | `HTFT` | 1/1, 1/X, 1/2, X/1, X/X, X/2, 2/1, 2/X, 2/2 | htft_11..htft_22 |
| 9 | Tek/Çift | `OE` | odd/even | oe_odd, oe_even |
| 10 | İY Üst/Alt 0.5 | `HT_OU05` | over/under | ht_ou05_o, ht_ou05_u |
| 11 | İY Üst/Alt 1.5 | `HT_OU15` | over/under | ht_ou15_o, ht_ou15_u |
| 12 | Kartlar | `CARDS` | over/under | cards_o, cards_u |
| 13 | Handikap | `HCAP` | 1/X/2 | hcap_h, hcap_d, hcap_a |
---
## 4. Triple Value Detection (V28 Ana Yeniliği)
V28'in en kritik özelliği: **3 bağımsız kaynağı çapraz kontrol ederek "gerçek değer" tespiti yapması.**
```
Triple Value = V27 Divergence + V28 Band Rate + Odds Implied Probability
Koşullar (hepsi sağlanmalı):
1. V27 olasılığı > bahisçi implied olasılığı (v27_confirms)
2. Band tarihsel oranı > implied olasılık (band_confirms)
3. Kombine edge > %5 (edge > 0.05)
4. Band örneklem >= 8 maç (band_sample >= 8)
→ Tüm koşullar sağlanırsa: is_value = True
```
**Örnek:**
```
Galatasaray vs Beşiktaş — MS Home (1.85 oran)
├── Implied Prob: 1/1.85 = 0.54 (%54)
├── V27 (odds-free): 0.61 (%61) → ✅ V27 confirms (0.61 > 0.54)
├── V28 Band Rate: 0.62 (%62, 34 maç) → ✅ Band confirms (0.62 > 0.54)
├── Combined Prob: (0.61 + 0.62) / 2 = 0.615
├── Edge: 0.615 - 0.54 = 0.075 (%7.5) → ✅ Edge > 5%
└── is_value = TRUE → "Bu bahis değerli!"
```
---
## 5. Market Row Dekorasyon Pipeline'ı
Her pazar aşağıdaki pipeline'dan geçer:
```
_build_market_rows() → Ham market row'ları oluştur (13 pazar)
_apply_market_consistency() → Pazarlar arası tutarlılık kontrolü
_decorate_market_row() → Her row'a playability, grading, staking ekle
Sort by (playable, play_score) → En iyi pick'ler başa gelir
```
### 5.1 Decorate Market Row — Quant Hybrid Sistemi
Her market row şu metriklerle dekore edilir:
| Metrik | Formül | Açıklama |
|--------|--------|----------|
| `calibrated_confidence` | `raw_conf × market_calibration` | Kalibre edilmiş güven |
| `ev_edge` | `(prob × odds) - 1.0` | Expected Value edge |
| `simple_edge` | `prob - (1/odds)` | Basit olasılık farkı |
| `play_score` | `cal_conf + (edge × 100 × edge_mult) - penalties` | Oynanabilirlik skoru |
| `stake_units` | Quarter-Kelly Criterion | Önerilen bahis miktarı |
| `bet_grade` | A/B/C/PASS | EV edge bazlı not |
### 5.2 Playability Gates (Güvenlik Kapıları)
Bir market row'un "playable" olması için tüm kapılardan geçmesi gerekir:
1. **Confidence Gate:** `calibrated_conf >= min_conf` (pazar bazlı eşik)
2. **Odds Gate:** Odds-required pazarlarda `odds > 1.01`
3. **Risk-Quality Gate:** HIGH/EXTREME risk + LOW kalite → BLOK
4. **Negative Edge Gate:** `simple_edge < neg_threshold` → BLOK
5. **EV Edge Gate:** `ev_edge < min_edge` → BLOK
6. **Play Score Gate:** `play_score < min_play_score` → BLOK
### 5.3 Kelly Criterion Staking
```python
# Quarter-Kelly (¼ Kelly, 10-unit bankroll)
f* = ((b × p) - q) / b # Full Kelly
stake = f* × 0.25 × 10 # Quarter Kelly × bankroll
stake = min(stake, 3.0) # Cap: max 3 unit
stake = max(stake, 0.25) # Floor: min 0.25 unit
```
---
## 6. Guaranteed Pick Logic (V32 Calibration-Aware)
Ana pick seçimi 4 öncelik sırasıyla yapılır:
```
Priority 1: HIGH_ACCURACY markets (DC, OU15, HT_OU05)
+ Odds >= 1.30 + Confidence >= 44%
→ is_guaranteed = True, reason = "high_accuracy_market"
Priority 2: Any playable + Odds >= 1.30 + Conf >= 44%
→ is_guaranteed = True, reason = "confidence_threshold_met"
Priority 3: Any playable + Odds >= 1.30
→ is_guaranteed = False, reason = "odds_only_fallback"
Priority 4: Best non-playable (last resort)
→ is_guaranteed = False, reason = "last_resort"
```
**Value Pick:** `main_pick`'ten farklı, odds >= 1.60, confidence >= %40 olan en iyi alternatif.
**Aggressive Pick:** HT/FT reversal senaryoları (1/2, 2/1, X/1, X/2) arasından en yüksek olasılıklı.
---
## 7. Risk Assessment Sistemi
```python
risk_score = 100 - max_market_conf + lineup_penalty + referee_penalty + parity_penalty
# Penalty'ler:
lineup_penalty = 12.0 (kadro yok) | 7.0 (probable_xi) | 0.0 (confirmed)
referee_penalty = 6.0 (hakem yok) | 0.0
parity_penalty = 8.0 (|ms_edge| < 0.08) | 0.0
# Risk seviyeleri:
EXTREME: score >= 78
HIGH: score >= 62
MEDIUM: score >= 40
LOW: score < 40
```
### Surprise Risk Tespiti
- `is_surprise_risk = True` → Risk HIGH/EXTREME VEYA draw_prob >= %30
- `surprise_type`: `balanced_match_risk` veya `draw_pressure`
---
## 8. xG ve Skor Tahmini
```python
base_home_xg = (home_goals_avg + away_xga) / 2
base_away_xg = (away_goals_avg + home_xga) / 2
# MS edge ve BTTS etkisiyle düzeltme:
home_xg = base_home_xg + (ms_edge × 0.55) + (btts_prob - 0.5) × 0.18
away_xg = base_away_xg - (ms_edge × 0.55) + (btts_prob - 0.5) × 0.18
# Liga ortalamasıyla ölçekleme:
total_target = league_avg_goals × 0.55 + team_avgs × 0.45 + ou25_signal × 1.15
scale = total_target / (home_xg + away_xg)
final_home_xg = home_xg × scale
final_away_xg = away_xg × scale
# Skor tahmini:
FT = round(home_xg) - round(away_xg)
HT = round(home_xg × 0.45) - round(away_xg × 0.45)
Top5 = Poisson dağılımı ile en olası 5 skor
```
---
## 9. Data Quality Skoru
```python
quality_score = odds_score × 0.35 + lineup_score × 0.35 + ref_score × 0.15 + form_score × 0.15
# Etiketleme:
HIGH: score >= 0.75
MEDIUM: score >= 0.45
LOW: score < 0.45
```
---
## 10. Çıktı JSON Kontratı
```json
{
"model_version": "v28-pro-max",
"match_info": { "match_id", "home_team", "away_team", "league", ... },
"data_quality": { "label", "score", "lineup_source", "flags" },
"risk": { "level", "score", "is_surprise_risk", "warnings" },
"engine_breakdown": { "team", "player", "odds", "referee" },
"main_pick": { "market", "pick", "confidence", "odds", "ev_edge", "bet_grade", "is_guaranteed" },
"value_pick": { ... },
"aggressive_pick": { "market": "HT/FT", "pick": "1/2", ... },
"bet_advice": { "playable", "suggested_stake_units", "reason" },
"bet_summary": [ { "market", "pick", "calibrated_confidence", "bet_grade", "ev_edge", ... } ],
"supporting_picks": [ ... ],
"score_prediction": { "ft", "ht", "xg_home", "xg_away", "xg_total" },
"scenario_top5": [ "1-0", "2-1", ... ],
"market_board": { "MS": {...}, "DC": {...}, "OU25": {...}, ... },
"v25_signal": { "available", "markets", "value_bets" },
"reasoning_factors": [ ... ]
}
```
---
## 11. League-Specific Odds Reliability (V31)
Bazı liglerin bahis oranları daha güvenilirdir. Bu bilgi `_decorate_market_row` içinde edge ağırlıklandırmasında kullanılır:
```python
odds_rel = league_reliability.get(league_id, 0.35) # 0.0 - 1.0
edge_multiplier = 0.60 + (odds_rel × 0.60) # 0.60 - 1.20
# Güvenilir lig → edge daha fazla ağırlık alır
# Güvenilsiz lig → model confidence'a daha çok güvenilir
```
---
## 12. Dosya Haritası
```
ai-engine/
├── services/
│ └── single_match_orchestrator.py ← Ana orchestrator (4656 satır)
├── models/
│ ├── v25_ensemble.py ← XGBoost + LightGBM ensemble
│ └── v27_predictor.py ← Odds-free fundamental predictor
├── features/
│ └── odds_band_analyzer.py ← V28 tarihsel band analizi
└── main.py ← FastAPI endpoint (/predict)
```
---
## 13. Akış Özeti
```
HTTP POST /predict {match_id}
SingleMatchOrchestrator.analyze_match(match_id)
├── _load_match_data() → DB'den maç + odds + kadro + form
├── V25: v25_predictor.predict(features)
│ → 13 pazar olasılık + confidence
├── V27: v27_predictor.predict(features)
│ → Odds-free MS/OU25 olasılıkları
│ → Divergence hesaplama
├── V28: odds_band_analyzer.compute_all()
│ → 18 pazar için tarihsel band metrikleri
├── Triple Value Detection
│ → V27 + V28 + Implied çapraz kontrol
├── _enrich_prediction() → xG, risk, skor tahmini
├── _build_market_rows() → 13+ ham market row
├── _apply_market_consistency()
├── _decorate_market_row() → EV, Kelly, grading
├── Guaranteed Pick Selection → main_pick, value_pick, aggressive_pick
└── _build_prediction_package() → Final JSON kontratı
```
+1 -1
View File
@@ -22,7 +22,7 @@
"ai:backtest": "python ai-engine/scripts/backtest_v2_runtime.py",
"ai:train:vqwen": "python ai-engine/scripts/train_vqwen_v3.py",
"feeder:historical": "ts-node -r tsconfig-paths/register src/scripts/run-feeder.ts",
"feeder:previous-day": "ts-node -r tsconfig-paths/register src/scripts/run-feeder.ts",
"feeder:previous-day": "ts-node -r tsconfig-paths/register src/scripts/run-feeder-previous-day.ts",
"feeder:fill-gaps": "ts-node -r tsconfig-paths/register src/scripts/run-feeder-filtered.ts",
"feeder:basketball": "ts-node -r tsconfig-paths/register src/scripts/run-feeder-basketball.ts",
"feeder:live": "ts-node -r tsconfig-paths/register src/scripts/run-live-feeder.ts",
@@ -856,19 +856,46 @@ export class FeederPersistenceService {
const matches = await this.prisma.match.findMany({
where: {
id: { in: matchIds },
AND: [
{ oddCategories: { some: {} } },
{
oddCategories: { some: {} },
OR: [
{ footballTeamStats: { some: {} } },
{ basketballTeamStats: { some: {} } },
],
{
sport: "football",
footballTeamStats: { some: {} },
playerParticipations: { some: { isStarting: true } },
},
{
sport: "basketball",
basketballTeamStats: { some: {} },
basketballPlayerStats: { some: {} },
},
],
},
select: { id: true },
select: { id: true, sport: true },
});
return matches.map((m) => m.id);
const footballIds = matches
.filter((m) => m.sport === "football")
.map((m) => m.id);
const completeFootballIds = new Set<string>();
if (footballIds.length > 0) {
const starterCounts = await this.prisma.matchPlayerParticipation.groupBy({
by: ["matchId"],
where: {
matchId: { in: footballIds },
isStarting: true,
},
_count: { _all: true },
});
for (const row of starterCounts) {
if (row._count._all >= 18) completeFootballIds.add(row.matchId);
}
}
return matches
.filter((m) => m.sport !== "football" || completeFootballIds.has(m.id))
.map((m) => m.id);
}
async hasOdds(matchId: string): Promise<boolean> {
+19 -10
View File
@@ -168,7 +168,7 @@ export class FeederService {
// writing to live_matches. Historical scan should only fill matches table.
endDate.setDate(endDate.getDate() - 2);
const stateKey = `historical_scan_state_${sports.join("_")}${targetLeagueIds.length > 0 ? "_filtered" : ""}_desc`;
const stateKey = `historical_full_data_v2_state_${sports.join("_")}${targetLeagueIds.length > 0 ? "_filtered" : ""}_desc`;
let currentDate: Date | null = null;
// Resume from saved state
@@ -310,9 +310,20 @@ export class FeederService {
const { startTs: targetDateStartTs, endTs: targetDateEndTs } =
this.getDayBoundsForTimeZone(dateString, this.DAILY_SYNC_TIME_ZONE);
// DEBUG: Log sample mstUtc values vs target bounds to diagnose filtering
if (allMatches.length > 0) {
const sample = allMatches.slice(0, 3);
this.logger.warn(
`[${sport}] [${dateString}] DEBUG: bounds=[${targetDateStartTs}, ${targetDateEndTs}] ` +
`(${new Date(targetDateStartTs * 1000).toISOString()} - ${new Date(targetDateEndTs * 1000).toISOString()}) | ` +
`sampleMstUtc=[${sample.map((m) => `${m.mstUtc} (asSec=${new Date(m.mstUtc * 1000).toISOString()}, asMs=${new Date(m.mstUtc).toISOString()})`).join(', ')}]`,
);
}
const dateFilteredMatches = allMatches.filter((m) => {
const matchTs = m.mstUtc;
return matchTs >= targetDateStartTs && matchTs <= targetDateEndTs;
// mstUtc is in milliseconds from API, bounds are in seconds
const matchTsSec = Math.floor(m.mstUtc / 1000);
return matchTsSec >= targetDateStartTs && matchTsSec <= targetDateEndTs;
});
const apiReturnedCount = allMatches.length;
@@ -753,10 +764,7 @@ export class FeederService {
}
// Starting Formation & Substitutes (Always for lineups or all)
// V20 OPTIMIZATION: Disabled to speed up feeder and reduce 502 errors.
// We only use Team Stats for V20 model.
/*
if (scope === 'all' || scope === 'lineups') {
if (scope === "all" || scope === "lineups") {
// Starting Formation
try {
const formationData =
@@ -780,7 +788,7 @@ export class FeederService {
);
}
} catch (e: any) {
if (e.message?.includes('502')) hasCriticalError = true;
if (e.message?.includes("502")) hasCriticalError = true;
this.logger.warn(`[${matchId}] Formation failed: ${e.message}`);
}
@@ -807,11 +815,10 @@ export class FeederService {
);
}
} catch (e: any) {
if (e.message?.includes('502')) hasCriticalError = true;
if (e.message?.includes("502")) hasCriticalError = true;
this.logger.warn(`[${matchId}] Subs failed: ${e.message}`);
}
}
*/
// Game Stats & Officials
if (scope === "all") {
@@ -935,6 +942,8 @@ export class FeederService {
const missingParts: string[] = [];
if (scope === "all" && completedMatch) {
if (sport === "football" && !stats) missingParts.push("Stats");
if (sport === "football" && participationData.length < 18)
missingParts.push("Lineups");
if (sport === "basketball" && !basketballTeamStats)
missingParts.push("BoxScore");
if (oddsArray.length === 0) missingParts.push("Odds");
+303 -1
View File
@@ -586,7 +586,44 @@ export class MatchesService {
date: new Date(Number(liveMatch.mstUtc)),
// Fill missing relations with empty arrays
teamStats: [],
playerParticipations: [],
playerParticipations: (() => {
const parsed: Array<{ teamId: string; isStarting: boolean; shirtNumber: string | number | null; position: string | null; player: { id: string; name: string } }> = [];
const canTrustFeedLineups = displayStatus === "LIVE" || displayStatus === "Finished";
if (!canTrustFeedLineups) {
return parsed;
}
if (liveMatch.lineups && typeof liveMatch.lineups === 'object') {
const lu = liveMatch.lineups as Record<string, any>;
const addPlayers = (teamLu: any, teamId: string | null) => {
if (!teamLu || !teamId) return;
if (teamLu.xi && Array.isArray(teamLu.xi)) {
teamLu.xi.forEach((p: any) => {
parsed.push({
teamId,
isStarting: true,
shirtNumber: p.shirtNumber || p.number,
position: p.position || p.pos,
player: { id: p.personId || p.id || p.playerId || 'unknown', name: p.matchName || p.name || p.playerName || 'Bilinmiyor' }
});
});
}
if (teamLu.subs && Array.isArray(teamLu.subs)) {
teamLu.subs.forEach((p: any) => {
parsed.push({
teamId,
isStarting: false,
shirtNumber: p.shirtNumber || p.number,
position: p.position || p.pos,
player: { id: p.personId || p.id || p.playerId || 'unknown', name: p.matchName || p.name || p.playerName || 'Bilinmiyor' }
});
});
}
};
addPlayers(lu.home, liveMatch.homeTeamId);
addPlayers(lu.away, liveMatch.awayTeamId);
}
return parsed;
})(),
playerEvents: [],
oddCategories: [], // Will handle odds parsing below
officials: [],
@@ -597,6 +634,64 @@ export class MatchesService {
if (!match) return null;
const detailDisplayStatus = getDisplayMatchStatus({
state: match.state,
status: match.status,
substate: match.substate,
scoreHome: match.scoreHome,
scoreAway: match.scoreAway,
});
const canTrustStoredLineups = this.canTrustStoredLineups(detailDisplayStatus);
if (Array.isArray(match.playerParticipations)) {
if (!canTrustStoredLineups) {
match.playerParticipations = [];
}
const hasHomeLineup = match.playerParticipations.some(
(p: any) => p.teamId === match.homeTeamId && p.isStarting,
);
const hasAwayLineup = match.playerParticipations.some(
(p: any) => p.teamId === match.awayTeamId && p.isStarting,
);
if (!hasHomeLineup || !hasAwayLineup) {
const sidelined =
match.sidelined && typeof match.sidelined === "object"
? (match.sidelined as Record<string, any>)
: {};
const matchDateMs = Number(match.mstUtc || Date.now());
const probableLineups: any[] = [];
if (!hasHomeLineup && match.homeTeamId) {
probableLineups.push(
...(await this.buildProbableLineupForTeam({
teamId: match.homeTeamId,
beforeDateMs: matchDateMs,
sidelinedTeamData: sidelined.homeTeam,
})),
);
}
if (!hasAwayLineup && match.awayTeamId) {
probableLineups.push(
...(await this.buildProbableLineupForTeam({
teamId: match.awayTeamId,
beforeDateMs: matchDateMs,
sidelinedTeamData: sidelined.awayTeam,
})),
);
}
if (probableLineups.length > 0) {
match.playerParticipations = canTrustStoredLineups
? [...match.playerParticipations, ...probableLineups]
: probableLineups;
match.lineupSource = "probable_xi";
}
}
}
// Structure odds
const odds: Record<
string,
@@ -699,4 +794,211 @@ export class MatchesService {
return team?.id || null;
}
private async buildProbableLineupForTeam(params: {
teamId: string;
beforeDateMs: number;
sidelinedTeamData?: any;
matchLimit?: number;
lookbackDays?: number;
maxStalenessDays?: number;
}) {
const matchLimit = params.matchLimit ?? 5;
const lookbackDays = params.lookbackDays ?? 370;
const maxStalenessDays = params.maxStalenessDays ?? 120;
const beforeDateMs = params.beforeDateMs || Date.now();
const minDateMs = Math.max(
0,
beforeDateMs - lookbackDays * 24 * 60 * 60 * 1000,
);
const excluded = this.extractSidelinedPlayerIds(params.sidelinedTeamData);
const rows = await this.prisma.$queryRaw<any[]>`
SELECT
mpp.player_id AS "playerId",
p.name AS "playerName",
mpp.position AS "position",
mpp.shirt_number AS "shirtNumber",
m.id AS "matchId",
m.mst_utc AS "mstUtc"
FROM match_player_participation mpp
JOIN matches m ON m.id = mpp.match_id
JOIN players p ON p.id = mpp.player_id
WHERE mpp.team_id = ${params.teamId}
AND mpp.is_starting = true
AND NOT EXISTS (
SELECT 1
FROM match_player_participation later_mpp
JOIN matches later_m ON later_m.id = later_mpp.match_id
WHERE later_mpp.player_id = mpp.player_id
AND later_mpp.team_id <> ${params.teamId}
AND later_m.mst_utc > m.mst_utc
AND later_m.mst_utc < ${BigInt(beforeDateMs)}
AND (
later_m.status = 'FT'
OR later_m.state = 'postGame'
OR (later_m.score_home IS NOT NULL AND later_m.score_away IS NOT NULL)
)
)
AND m.id IN (
SELECT m2.id
FROM matches m2
JOIN match_player_participation recent_mpp
ON recent_mpp.match_id = m2.id
AND recent_mpp.team_id = ${params.teamId}
AND recent_mpp.is_starting = true
WHERE (m2.home_team_id = ${params.teamId} OR m2.away_team_id = ${params.teamId})
AND (
m2.status = 'FT'
OR m2.state = 'postGame'
OR (m2.score_home IS NOT NULL AND m2.score_away IS NOT NULL)
)
AND m2.mst_utc < ${BigInt(beforeDateMs)}
AND m2.mst_utc >= ${BigInt(minDateMs)}
GROUP BY m2.id
HAVING COUNT(recent_mpp.*) >= 9
ORDER BY MAX(m2.mst_utc) DESC
LIMIT ${matchLimit}
)
ORDER BY m.mst_utc DESC
`;
if (!rows.length) return [];
const latestMst = Math.max(
...rows.map((row) => Number(row.mstUtc || 0)),
);
const ageDays =
latestMst > 0
? (beforeDateMs - latestMst) / (24 * 60 * 60 * 1000)
: Number.POSITIVE_INFINITY;
const staleProjection = ageDays > maxStalenessDays;
const matchOrder = new Map<string, number>();
for (const row of rows) {
const matchId = String(row.matchId);
if (!matchOrder.has(matchId)) {
matchOrder.set(matchId, matchOrder.size);
}
}
const playerMap = new Map<
string,
{
playerId: string;
playerName: string;
position: string | null;
shirtNumber: number | null;
score: number;
starts: number;
lastSeenRank: number;
}
>();
for (const row of rows) {
const playerId = String(row.playerId);
if (excluded.has(playerId)) continue;
const rank = matchOrder.get(String(row.matchId)) ?? matchLimit;
const recencyWeight = Math.max(1, matchLimit - rank);
const score =
recencyWeight + (rank === 0 ? 3 : rank === 1 ? 1.5 : 0);
const existing = playerMap.get(playerId);
if (!existing) {
playerMap.set(playerId, {
playerId,
playerName: row.playerName || "Bilinmiyor",
position: row.position ?? null,
shirtNumber:
row.shirtNumber === null || row.shirtNumber === undefined
? null
: Number(row.shirtNumber),
score,
starts: 1,
lastSeenRank: rank,
});
} else {
existing.score += score;
existing.starts += 1;
existing.lastSeenRank = Math.min(existing.lastSeenRank, rank);
existing.position = existing.position || row.position || null;
existing.shirtNumber =
existing.shirtNumber ??
(row.shirtNumber === null || row.shirtNumber === undefined
? null
: Number(row.shirtNumber));
}
}
const ranked = [...playerMap.values()]
.sort((a, b) => {
if (b.score !== a.score) return b.score - a.score;
if (b.starts !== a.starts) return b.starts - a.starts;
return a.lastSeenRank - b.lastSeenRank;
})
.slice(0, 11);
const coverage = Math.min(1, ranked.length / 11);
const historyScore = Math.min(1, matchOrder.size / matchLimit);
const stableCore = ranked.filter((p) => p.starts >= 2).length / 11;
const stalenessFactor = Math.max(
0.35,
Math.min(1, maxStalenessDays / Math.max(ageDays, 1)),
);
const confidence = Math.max(
0,
Math.min(
staleProjection ? 0.58 : 0.88,
(coverage * 0.45 + historyScore * 0.25 + stableCore * 0.3) *
stalenessFactor,
),
);
return ranked.map((p) => ({
teamId: params.teamId,
isStarting: true,
shirtNumber: p.shirtNumber,
position: p.position,
isProbable: true,
lineupSource: "probable_xi",
projectionConfidence: Number(confidence.toFixed(3)),
projectionAgeDays: Number(ageDays.toFixed(1)),
projectionStale: staleProjection,
projectionMatchLimit: matchLimit,
projectionLookbackDays: lookbackDays,
projectionMaxStalenessDays: maxStalenessDays,
player: {
id: p.playerId,
name: p.playerName,
},
}));
}
private extractSidelinedPlayerIds(teamData: any): Set<string> {
if (!teamData || typeof teamData !== "object") return new Set();
const players = Array.isArray(teamData.players) ? teamData.players : [];
return new Set(
players
.map((player: any) =>
String(
player?.playerId ??
player?.player_id ??
player?.id ??
player?.personId ??
"",
),
)
.filter(Boolean),
);
}
private canTrustStoredLineups(displayStatus?: string): boolean {
const normalized = String(displayStatus || "").toLowerCase();
return (
normalized === "live" ||
normalized === "finished" ||
normalized === "ft"
);
}
}
@@ -96,11 +96,10 @@ export class PredictionsController {
async getPrediction(
@Param("matchId") matchId: string,
): Promise<MatchPredictionDto> {
// Check cache first - DISABLED per user request to always fetch from scratch
// const cached = await this.predictionsService.getCachedPrediction(matchId);
// if (cached) {
// return cached;
// }
const cached = await this.predictionsService.getCachedPrediction(matchId);
if (cached) {
return cached;
}
// Get from AI Engine
const prediction = await this.predictionsService.getPredictionById(matchId);
+74 -5
View File
@@ -223,11 +223,13 @@ export class PredictionsService implements OnModuleInit, OnModuleDestroy {
`/v20plus/analyze/${matchId}`,
{ simulate: true, is_simulation: true, pre_match_only: true },
);
await this.recordPredictionRun(matchId, response.data);
return this.enrichPredictionResponse(
response.data as MatchPredictionDto,
const prediction = this.enrichPredictionResponse(
response.data,
matchContext,
);
await this.recordPredictionRun(matchId, response.data);
await this.cachePrediction(matchId, prediction);
return prediction;
} catch (e: unknown) {
const requestError =
e instanceof AiEngineRequestError
@@ -235,6 +237,20 @@ export class PredictionsService implements OnModuleInit, OnModuleDestroy {
: new AiEngineRequestError("AI Engine request failed");
const status = requestError.status;
const detail = requestError.detail || requestError.message;
if (
status === HttpStatus.SERVICE_UNAVAILABLE &&
this.hasCooldown(detail)
) {
const storedPrediction = await this.getStoredPrediction(matchId);
if (storedPrediction) {
this.logger.warn(
`AI Engine cooldown for ${matchId}; returning stored prediction`,
);
return this.enrichPredictionResponse(storedPrediction, matchContext);
}
}
this.logger.error(
`Direct AI Engine call failed for ${matchId}: status=${status}, detail=${JSON.stringify(detail)}`,
);
@@ -674,6 +690,11 @@ export class PredictionsService implements OnModuleInit, OnModuleDestroy {
odds: this.normalizeDisplayOdds(odds, impliedProb),
implied_prob: impliedProb,
ev_edge: evEdge,
playable: Boolean(record.playable) && interval.threshold_met,
stake_units:
Boolean(record.playable) && interval.threshold_met
? this.asNumber(record.stake_units)
: 0,
reasons: Array.isArray(record.reasons)
? record.reasons.map((reason) => this.translateReason(String(reason)))
: [],
@@ -919,15 +940,39 @@ export class PredictionsService implements OnModuleInit, OnModuleDestroy {
return 0;
}
const normalizedPick = pickName.toUpperCase();
const normalizedPick = this.normalizePickKey(pickName);
for (const [key, value] of Object.entries(probabilities)) {
if (key.toUpperCase() === normalizedPick) {
if (this.normalizePickKey(key) === normalizedPick) {
return this.asNumber(value);
}
}
return 0;
}
private normalizePickKey(value: string): string {
const normalized = value.trim().toUpperCase();
const aliases: Record<string, string> = {
ÜST: "OVER",
UST: "OVER",
OVER: "OVER",
ALT: "UNDER",
UNDER: "UNDER",
"KG VAR": "YES",
VAR: "YES",
YES: "YES",
"KG YOK": "NO",
YOK: "NO",
NO: "NO",
TEK: "ODD",
ODD: "ODD",
ÇİFT: "EVEN",
CIFT: "EVEN",
EVEN: "EVEN",
};
return aliases[normalized] ?? normalized;
}
private impliedProbabilityFromOdds(odds: number): number {
if (odds <= 1) {
return 0;
@@ -1132,6 +1177,30 @@ export class PredictionsService implements OnModuleInit, OnModuleDestroy {
return prediction.predictionJson as unknown as MatchPredictionDto;
}
private async getStoredPrediction(
matchId: string,
): Promise<MatchPredictionDto | null> {
const prediction = await this.prisma.prediction.findUnique({
where: { matchId },
});
return prediction
? (prediction.predictionJson as unknown as MatchPredictionDto)
: null;
}
private hasCooldown(detail: unknown): boolean {
if (typeof detail === "string") {
return detail.includes("cooldownRemainingMs");
}
if (detail && typeof detail === "object") {
return "cooldownRemainingMs" in detail;
}
return false;
}
private async ensureSmartCouponDataReady(matchIds: string[]): Promise<void> {
const uniqueMatchIds = [...new Set(matchIds.filter((id) => !!id))];
if (uniqueMatchIds.length === 0) {
+39
View File
@@ -0,0 +1,39 @@
/**
* Run Previous-Day Completed Match Sync
* Usage: npm run feeder:previous-day
*/
import { NestFactory } from "@nestjs/core";
import { FeederService } from "../modules/feeder/feeder.service";
import { Logger } from "@nestjs/common";
async function bootstrap() {
process.env.FEEDER_MODE = "historical";
const logger = new Logger("FeederPreviousDayScript");
logger.log("🚀 Starting previous-day completed match sync...");
// Load AppModule after FEEDER_MODE is set so cron imports can be disabled.
// eslint-disable-next-line @typescript-eslint/no-require-imports
const { AppModule } = require("../app.module");
const app = await NestFactory.createApplicationContext(AppModule, {
logger: ["log", "error", "warn"],
});
try {
const feederService = app.get(FeederService);
await feederService.runPreviousDayCompletedMatchesScan();
logger.log("✅ Previous-day completed match sync completed successfully!");
} catch (error: any) {
logger.error(`❌ Feeder failed: ${error.message}`);
logger.error(error.stack);
process.exit(1);
} finally {
await app.close();
}
process.exit(0);
}
void bootstrap();
+10 -4
View File
@@ -1,5 +1,5 @@
/**
* Run Previous-Day Completed Match Sync
* Run Full Historical Feeder
* Usage: npm run feeder:historical
*/
@@ -12,7 +12,7 @@ async function bootstrap() {
const logger = new Logger("FeederScript");
logger.log("🚀 Starting previous-day completed match sync...");
logger.log("🚀 Starting full historical feeder...");
// Load AppModule after FEEDER_MODE is set so cron imports can be disabled.
// eslint-disable-next-line @typescript-eslint/no-require-imports
@@ -23,8 +23,14 @@ async function bootstrap() {
try {
const feederService = app.get(FeederService);
await feederService.runPreviousDayCompletedMatchesScan();
logger.log("✅ Previous-day completed match sync completed successfully!");
const startDate = process.env.FEEDER_START_DATE || "2023-06-01";
const sports = (process.env.FEEDER_SPORTS || "football,basketball")
.split(",")
.map((sport) => sport.trim())
.filter(Boolean) as Array<"football" | "basketball">;
await feederService.runHistoricalScan(sports, startDate);
logger.log("✅ Full historical feeder completed successfully!");
} catch (error: any) {
logger.error(`❌ Feeder failed: ${error.message}`);
logger.error(error.stack);
+27 -6
View File
@@ -1,4 +1,4 @@
import { Injectable, Logger } from "@nestjs/common";
import { Injectable, Logger } from "@nestjs/common";
import { Cron } from "@nestjs/schedule";
import { HttpService } from "@nestjs/axios";
import { PrismaService } from "../database/prisma.service";
@@ -182,7 +182,9 @@ export class DataFetcherTask {
this.logger.log("syncLiveMatches START");
const today = getDateStringInTimeZone(new Date(), this.timeZone);
const tomorrow = getShiftedDateStringInTimeZone(1, this.timeZone);
await this.syncMatchList(today);
await this.syncMatchList(tomorrow);
await this.updateLiveScores();
await this.fetchOddsForMatches();
await this.fillMissingLineups();
@@ -432,7 +434,10 @@ export class DataFetcherTask {
for (const match of toUpdate) {
try {
const formation = await this.scraper.fetchStartingFormation(match.id);
const [formation, substitutions] = await Promise.all([
this.scraper.fetchStartingFormation(match.id),
this.scraper.fetchSubstitutions(match.id),
]);
const sidelined = match.matchSlug
? await this.scraper.fetchSidelinedPlayers(
match.id,
@@ -440,11 +445,26 @@ export class DataFetcherTask {
)
: null;
// Normalize to same home.xi/away.xi format used by processMatchOdds
let normalizedLineups: Record<string, unknown> | null = null;
if (formation || substitutions) {
normalizedLineups = {
home: {
xi: formation?.stats?.home || [],
subs: substitutions?.stats?.home || [],
},
away: {
xi: formation?.stats?.away || [],
subs: substitutions?.stats?.away || [],
},
};
}
await this.prisma.liveMatch.update({
where: { id: match.id },
data: {
lineups: formation
? JSON.parse(JSON.stringify(formation))
lineups: normalizedLineups
? JSON.parse(JSON.stringify(normalizedLineups))
: Prisma.JsonNull,
sidelined: sidelined
? JSON.parse(JSON.stringify(sidelined))
@@ -810,8 +830,8 @@ export class DataFetcherTask {
const matchTime = Number(match.mstUtc);
const diffHours = (matchTime - now) / (1000 * 60 * 60);
// Fetch if between -3 hours (started) and +4 hours (upcoming)
if (diffHours < 4 && diffHours > -3) {
// Fetch if between -3 hours (started) and +24 hours (upcoming)
if (diffHours < 24 && diffHours > -3) {
// Lineups
try {
const [startingFormation, substitutions] = await Promise.all([
@@ -1269,3 +1289,4 @@ export class DataFetcherTask {
return new Promise((resolve) => setTimeout(resolve, ms));
}
}