Compare commits
6 Commits
v26-shadow
...
eab95c4e5c
| Author | SHA1 | Date | |
|---|---|---|---|
| eab95c4e5c | |||
| 9027cc9900 | |||
| 3875f2a512 | |||
| 300dceeb4b | |||
| ad01976fb9 | |||
| 6880eb92f5 |
+4
-2
@@ -42,7 +42,9 @@ uploads/
|
||||
public/uploads/
|
||||
|
||||
# Large Datasets and ML Models
|
||||
ai-engine/models/
|
||||
models/
|
||||
ai-engine/models/*
|
||||
!ai-engine/models/*.py
|
||||
models/*
|
||||
!models/*.py
|
||||
colab_export/
|
||||
|
||||
|
||||
@@ -323,8 +323,8 @@ class OddsBandAnalyzer:
|
||||
m.home_team_id,
|
||||
m.away_team_id,
|
||||
CASE
|
||||
WHEN m.home_team_id = %(team_id)s THEN os_sel.odd_value
|
||||
ELSE os_sel2.odd_value
|
||||
WHEN m.home_team_id = %(team_id)s THEN os_sel.odd_value::numeric
|
||||
ELSE os_sel2.odd_value::numeric
|
||||
END AS team_odds
|
||||
FROM matches m
|
||||
JOIN odd_categories oc
|
||||
@@ -344,7 +344,7 @@ class OddsBandAnalyzer:
|
||||
AND m.score_home IS NOT NULL
|
||||
AND m.score_away IS NOT NULL
|
||||
AND m.mst_utc < %(before_ts)s
|
||||
AND COALESCE(os_sel.odd_value, os_sel2.odd_value)
|
||||
AND COALESCE(os_sel.odd_value::numeric, os_sel2.odd_value::numeric)
|
||||
BETWEEN %(band_low)s AND %(band_high)s
|
||||
ORDER BY m.mst_utc DESC
|
||||
LIMIT %(max_lookback)s
|
||||
@@ -432,7 +432,7 @@ class OddsBandAnalyzer:
|
||||
AND m.score_home IS NOT NULL
|
||||
AND m.score_away IS NOT NULL
|
||||
AND m.mst_utc < %(before_ts)s
|
||||
AND os_h.odd_value BETWEEN %(band_low)s AND %(band_high)s
|
||||
AND os_h.odd_value::numeric BETWEEN %(band_low)s AND %(band_high)s
|
||||
ORDER BY m.mst_utc DESC
|
||||
LIMIT %(max_lookback)s
|
||||
)
|
||||
@@ -508,7 +508,7 @@ class OddsBandAnalyzer:
|
||||
f"İlk Yarı {line_str} Alt/Üst",
|
||||
f"Ilk Yari {line_str} Alt/Ust",
|
||||
]
|
||||
score_expr = "COALESCE(m.score_ht_home, 0) + COALESCE(m.score_ht_away, 0)"
|
||||
score_expr = "COALESCE(m.ht_score_home, 0) + COALESCE(m.ht_score_away, 0)"
|
||||
else:
|
||||
cat_names = [
|
||||
f"{line_str} Alt/Üst",
|
||||
@@ -535,7 +535,7 @@ class OddsBandAnalyzer:
|
||||
AND m.status = 'FT'
|
||||
AND m.score_home IS NOT NULL
|
||||
AND m.mst_utc < %(before_ts)s
|
||||
AND os_over.odd_value BETWEEN %(band_low)s AND %(band_high)s
|
||||
AND os_over.odd_value::numeric BETWEEN %(band_low)s AND %(band_high)s
|
||||
ORDER BY m.mst_utc DESC
|
||||
LIMIT %(max_lookback)s
|
||||
)
|
||||
@@ -620,7 +620,7 @@ class OddsBandAnalyzer:
|
||||
AND m.status = 'FT'
|
||||
AND m.score_home IS NOT NULL
|
||||
AND m.mst_utc < %(before_ts)s
|
||||
AND os_yes.odd_value BETWEEN %(band_low)s AND %(band_high)s
|
||||
AND os_yes.odd_value::numeric BETWEEN %(band_low)s AND %(band_high)s
|
||||
ORDER BY m.mst_utc DESC
|
||||
LIMIT %(max_lookback)s
|
||||
)
|
||||
@@ -696,7 +696,7 @@ class OddsBandAnalyzer:
|
||||
AND m.sport = 'football' AND m.status = 'FT'
|
||||
AND m.score_home IS NOT NULL
|
||||
AND m.mst_utc < %(before_ts)s
|
||||
AND os_sel.odd_value BETWEEN %(bl)s AND %(bh)s
|
||||
AND os_sel.odd_value::numeric BETWEEN %(bl)s AND %(bh)s
|
||||
ORDER BY m.mst_utc DESC LIMIT %(ml)s
|
||||
)
|
||||
SELECT COUNT(*) AS ss,
|
||||
@@ -748,7 +748,7 @@ class OddsBandAnalyzer:
|
||||
try:
|
||||
cur.execute("""
|
||||
WITH ht_matches AS (
|
||||
SELECT m.score_ht_home, m.score_ht_away,
|
||||
SELECT m.ht_score_home, m.ht_score_away,
|
||||
m.home_team_id, m.away_team_id
|
||||
FROM matches m
|
||||
JOIN odd_categories oc ON oc.match_id = m.id
|
||||
@@ -761,18 +761,18 @@ class OddsBandAnalyzer:
|
||||
AND os2.name = '2' AND m.away_team_id = %(tid)s
|
||||
WHERE (m.home_team_id = %(tid)s OR m.away_team_id = %(tid)s)
|
||||
AND m.sport = 'football' AND m.status = 'FT'
|
||||
AND m.score_ht_home IS NOT NULL
|
||||
AND m.ht_score_home IS NOT NULL
|
||||
AND m.mst_utc < %(before_ts)s
|
||||
AND COALESCE(os1.odd_value, os2.odd_value)
|
||||
AND COALESCE(os1.odd_value::numeric, os2.odd_value::numeric)
|
||||
BETWEEN %(bl)s AND %(bh)s
|
||||
ORDER BY m.mst_utc DESC LIMIT %(ml)s
|
||||
)
|
||||
SELECT COUNT(*) AS ss,
|
||||
COALESCE(AVG(CASE
|
||||
WHEN (home_team_id = %(tid)s AND score_ht_home > score_ht_away)
|
||||
OR (away_team_id = %(tid)s AND score_ht_away > score_ht_home)
|
||||
WHEN (home_team_id = %(tid)s AND ht_score_home > ht_score_away)
|
||||
OR (away_team_id = %(tid)s AND ht_score_away > ht_score_home)
|
||||
THEN 1.0 ELSE 0.0 END), 0.33) AS win_rate,
|
||||
COALESCE(AVG(CASE WHEN score_ht_home = score_ht_away
|
||||
COALESCE(AVG(CASE WHEN ht_score_home = ht_score_away
|
||||
THEN 1.0 ELSE 0.0 END), 0.40) AS draw_rate
|
||||
FROM ht_matches
|
||||
""", {
|
||||
@@ -824,7 +824,7 @@ class OddsBandAnalyzer:
|
||||
AND m.sport = 'football' AND m.status = 'FT'
|
||||
AND m.score_home IS NOT NULL
|
||||
AND m.mst_utc < %(before_ts)s
|
||||
AND os_odd.odd_value BETWEEN %(bl)s AND %(bh)s
|
||||
AND os_odd.odd_value::numeric BETWEEN %(bl)s AND %(bh)s
|
||||
ORDER BY m.mst_utc DESC LIMIT %(ml)s
|
||||
)
|
||||
SELECT COUNT(*) AS ss,
|
||||
@@ -1185,7 +1185,7 @@ class OddsBandAnalyzer:
|
||||
'IY/MS'
|
||||
)
|
||||
JOIN odd_selections os ON os.odd_category_db_id = oc.db_id
|
||||
AND os.odd_value BETWEEN %(bl)s AND %(bh)s
|
||||
AND os.odd_value::numeric BETWEEN %(bl)s AND %(bh)s
|
||||
WHERE m.sport = 'football'
|
||||
AND m.status = 'FT'
|
||||
AND m.score_home IS NOT NULL
|
||||
|
||||
+11
-5
@@ -14,10 +14,13 @@ from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse
|
||||
from pydantic import BaseModel
|
||||
|
||||
try:
|
||||
from models.basketball_v25 import get_basketball_v25_predictor
|
||||
HAS_BASKETBALL = True
|
||||
except ImportError:
|
||||
HAS_BASKETBALL = False
|
||||
from services.single_match_orchestrator import get_single_match_orchestrator
|
||||
from services.v26_shadow_engine import get_v26_shadow_engine
|
||||
from data.database import dispose_engine
|
||||
|
||||
load_dotenv()
|
||||
|
||||
@@ -49,9 +52,6 @@ async def lifespan(_: FastAPI):
|
||||
|
||||
yield
|
||||
|
||||
# Cleanup async DB connections on shutdown
|
||||
await dispose_engine()
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
title="Suggest-Bet AI Engine",
|
||||
@@ -123,9 +123,15 @@ def health_check() -> dict[str, Any]:
|
||||
try:
|
||||
orchestrator = get_single_match_orchestrator()
|
||||
shadow_engine = get_v26_shadow_engine()
|
||||
|
||||
if HAS_BASKETBALL:
|
||||
basketball_predictor = get_basketball_v25_predictor()
|
||||
basketball_readiness = basketball_predictor.readiness_summary()
|
||||
ready = bool(basketball_readiness["fully_loaded"])
|
||||
ready = bool(basketball_readiness.get("fully_loaded", True))
|
||||
else:
|
||||
basketball_readiness = {"fully_loaded": False, "error": "Basketball module not found"}
|
||||
ready = True
|
||||
|
||||
return {
|
||||
"status": "healthy" if ready else "degraded",
|
||||
"engine": "v28.main",
|
||||
|
||||
@@ -0,0 +1,413 @@
|
||||
"""
|
||||
Calibration Module for XGBoost Models
|
||||
=====================================
|
||||
Calibrates raw probabilities from XGBoost models using Isotonic Regression.
|
||||
Ensures that a predicted probability of 70% actually corresponds to a 70% win rate.
|
||||
|
||||
Usage:
|
||||
from ai_engine.models.calibration import Calibrator
|
||||
calibrator = Calibrator()
|
||||
calibrated_prob = calibrator.calibrate("ms", raw_prob)
|
||||
|
||||
# Training new calibration models:
|
||||
calibrator.train_calibration(valid_df, market="ms")
|
||||
"""
|
||||
|
||||
import os
|
||||
import pickle
|
||||
import json
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional, Tuple, Any
|
||||
from sklearn.isotonic import IsotonicRegression
|
||||
from sklearn.calibration import calibration_curve
|
||||
from sklearn.metrics import brier_score_loss
|
||||
|
||||
AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
CALIBRATION_DIR = os.path.join(AI_ENGINE_DIR, "models", "calibration")
|
||||
|
||||
os.makedirs(CALIBRATION_DIR, exist_ok=True)
|
||||
|
||||
# Supported markets for calibration
|
||||
SUPPORTED_MARKETS = [
|
||||
"ms", # Match Result (1X2) - multi-class, calibrated per class
|
||||
"ms_home", # Standard Home win probability
|
||||
"ms_home_heavy_fav", # Context: home odds <= 1.40
|
||||
"ms_home_fav", # Context: 1.40 < home odds <= 1.80
|
||||
"ms_home_balanced", # Context: 1.80 < home odds <= 2.50
|
||||
"ms_home_underdog", # Context: home odds > 2.50
|
||||
"ms_draw", # Draw probability
|
||||
"ms_away", # Away win probability
|
||||
"ou15", # Over/Under 1.5
|
||||
"ou25", # Over/Under 2.5
|
||||
"ou35", # Over/Under 3.5
|
||||
"btts", # Both Teams to Score
|
||||
"ht_ft", # Half-Time/Full-Time
|
||||
"dc", # Double Chance
|
||||
"ht", # Half-Time Result
|
||||
]
|
||||
|
||||
|
||||
class CalibrationMetrics:
|
||||
"""Stores calibration quality metrics for a market."""
|
||||
|
||||
def __init__(self):
|
||||
self.brier_score: float = 0.0
|
||||
self.calibration_error: float = 0.0
|
||||
self.sample_count: int = 0
|
||||
self.last_trained: str = ""
|
||||
self.mean_predicted: float = 0.0
|
||||
self.mean_actual: float = 0.0
|
||||
|
||||
def to_dict(self) -> Dict:
|
||||
return {
|
||||
"brier_score": round(self.brier_score, 4),
|
||||
"calibration_error": round(self.calibration_error, 4),
|
||||
"sample_count": self.sample_count,
|
||||
"last_trained": self.last_trained,
|
||||
"mean_predicted": round(self.mean_predicted, 4),
|
||||
"mean_actual": round(self.mean_actual, 4),
|
||||
}
|
||||
|
||||
|
||||
class Calibrator:
|
||||
"""
|
||||
Probability calibration using Isotonic Regression.
|
||||
|
||||
Isotonic Regression is a non-parametric method that fits a piecewise
|
||||
constant function that is monotonically increasing. It's ideal for
|
||||
calibrating probabilities because:
|
||||
|
||||
1. It preserves ranking (if P(A) > P(B) before, P(A) > P(B) after)
|
||||
2. It doesn't assume a specific distribution shape
|
||||
3. It can correct systematic over/under-confidence
|
||||
|
||||
Example:
|
||||
# Before calibration: model predicts 70% but actual win rate is 60%
|
||||
# After calibration: model predicts 70% → calibrated to 60%
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.calibrators: Dict[str, IsotonicRegression] = {}
|
||||
self.metrics: Dict[str, CalibrationMetrics] = {}
|
||||
self.heuristic_fallback: Dict[str, float] = {
|
||||
"ms": 0.90,
|
||||
"ms_home": 0.90,
|
||||
"ms_home_heavy_fav": 0.95,
|
||||
"ms_home_fav": 0.90,
|
||||
"ms_home_balanced": 0.85,
|
||||
"ms_home_underdog": 0.80,
|
||||
"ms_draw": 0.90,
|
||||
"ms_away": 0.90,
|
||||
"ou15": 0.90,
|
||||
"ou25": 0.90,
|
||||
"ou35": 0.90,
|
||||
"btts": 0.90,
|
||||
"ht_ft": 0.85,
|
||||
"dc": 0.93,
|
||||
"ht": 0.85,
|
||||
}
|
||||
self._load_calibrators()
|
||||
|
||||
def _load_calibrators(self):
|
||||
"""Load trained calibrators for each market from disk."""
|
||||
for market in SUPPORTED_MARKETS:
|
||||
model_path = os.path.join(CALIBRATION_DIR, f"{market}_calibrator.pkl")
|
||||
metrics_path = os.path.join(CALIBRATION_DIR, f"{market}_metrics.json")
|
||||
|
||||
if os.path.exists(model_path):
|
||||
try:
|
||||
with open(model_path, "rb") as f:
|
||||
self.calibrators[market] = pickle.load(f)
|
||||
print(f"[Calibrator] Loaded calibration model for {market}")
|
||||
except Exception as e:
|
||||
print(f"[Calibrator] Warning: Failed to load {market}: {e}")
|
||||
|
||||
if os.path.exists(metrics_path):
|
||||
try:
|
||||
with open(metrics_path, "r") as f:
|
||||
data = json.load(f)
|
||||
metrics = CalibrationMetrics()
|
||||
metrics.brier_score = data.get("brier_score", 0.0)
|
||||
metrics.calibration_error = data.get("calibration_error", 0.0)
|
||||
metrics.sample_count = data.get("sample_count", 0)
|
||||
metrics.last_trained = data.get("last_trained", "")
|
||||
metrics.mean_predicted = data.get("mean_predicted", 0.0)
|
||||
metrics.mean_actual = data.get("mean_actual", 0.0)
|
||||
self.metrics[market] = metrics
|
||||
except Exception as e:
|
||||
print(f"[Calibrator] Warning: Failed to load metrics for {market}: {e}")
|
||||
|
||||
def calibrate(self, market_type: str, raw_prob: float, odds_val: Optional[float] = None) -> float:
|
||||
"""
|
||||
Calibrate a raw probability using Isotonic Regression.
|
||||
|
||||
Args:
|
||||
market_type (str): 'ms_home', 'ou25', 'btts', 'ht_ft', etc.
|
||||
raw_prob (float): The raw probability from XGBoost (0.0 - 1.0)
|
||||
odds_val (float, optional): The pre-match odds, used for context-aware bucket mapping
|
||||
|
||||
Returns:
|
||||
float: Calibrated probability (0.0 - 1.0)
|
||||
"""
|
||||
# Normalize market type
|
||||
market_key = market_type.lower().replace("-", "_")
|
||||
|
||||
# Route to bucket if ms_home and odds provided
|
||||
if market_key == "ms_home" and odds_val is not None and odds_val > 1.0:
|
||||
if odds_val <= 1.40:
|
||||
bucket_key = "ms_home_heavy_fav"
|
||||
elif odds_val <= 1.80:
|
||||
bucket_key = "ms_home_fav"
|
||||
elif odds_val <= 2.50:
|
||||
bucket_key = "ms_home_balanced"
|
||||
else:
|
||||
bucket_key = "ms_home_underdog"
|
||||
|
||||
if bucket_key in self.calibrators:
|
||||
market_key = bucket_key
|
||||
|
||||
# If we have a trained Isotonic Regression model, use it
|
||||
if market_key in self.calibrators:
|
||||
try:
|
||||
calibrated = self.calibrators[market_key].predict([raw_prob])[0]
|
||||
# Ensure output is valid probability
|
||||
return float(np.clip(calibrated, 0.01, 0.99))
|
||||
except Exception as e:
|
||||
print(f"[Calibrator] Warning: Isotonic failed for {market_key}: {e}")
|
||||
# Fall through to heuristic
|
||||
|
||||
# Fallback to heuristic calibration
|
||||
return self._heuristic_calibrate(market_key, raw_prob)
|
||||
|
||||
def _heuristic_calibrate(self, market_type: str, raw_prob: float) -> float:
|
||||
"""
|
||||
Heuristic calibration fallback when no trained model exists.
|
||||
|
||||
This applies a conservative shrinkage towards the mean:
|
||||
- Binary markets (OU, BTTS): shrink towards 0.5
|
||||
- Multi-class (MS): shrink towards 0.33
|
||||
- HT/FT: stronger shrinkage due to higher variance
|
||||
"""
|
||||
# Get shrinkage factor for this market
|
||||
shrinkage = self.heuristic_fallback.get(market_type, 0.90)
|
||||
|
||||
if market_type in ["ms", "ms_home", "ms_home_heavy_fav", "ms_home_fav", "ms_home_balanced", "ms_home_underdog", "ms_draw", "ms_away"]:
|
||||
# Pull towards 0.33 (uniform for 3-class)
|
||||
return (raw_prob * shrinkage) + (0.33 * (1.0 - shrinkage))
|
||||
|
||||
elif market_type in ["ou15", "ou25", "ou35", "btts"]:
|
||||
# Pull towards 0.5 (uniform for binary)
|
||||
return (raw_prob * shrinkage) + (0.5 * (1.0 - shrinkage))
|
||||
|
||||
elif market_type in ["ht_ft", "ht"]:
|
||||
# Stronger shrinkage for high-variance markets
|
||||
return raw_prob * shrinkage
|
||||
|
||||
elif market_type == "dc":
|
||||
# Double chance is more reliable
|
||||
return (raw_prob * shrinkage) + (0.66 * (1.0 - shrinkage))
|
||||
|
||||
return raw_prob
|
||||
|
||||
def train_calibration(
|
||||
self,
|
||||
df: pd.DataFrame,
|
||||
market: str,
|
||||
prob_col: str,
|
||||
actual_col: str,
|
||||
min_samples: int = 100,
|
||||
save: bool = True,
|
||||
) -> CalibrationMetrics:
|
||||
"""
|
||||
Train an Isotonic Regression calibration model for a specific market.
|
||||
|
||||
Args:
|
||||
df: DataFrame with predictions and actual outcomes
|
||||
market: Market identifier (e.g., 'ms_home', 'ou25', 'btts')
|
||||
prob_col: Column name for raw probabilities
|
||||
actual_col: Column name for actual outcomes (0 or 1)
|
||||
min_samples: Minimum samples required to train
|
||||
save: Whether to save the model to disk
|
||||
|
||||
Returns:
|
||||
CalibrationMetrics with quality metrics
|
||||
"""
|
||||
# Filter valid data
|
||||
valid_df = df[[prob_col, actual_col]].dropna()
|
||||
n_samples = len(valid_df)
|
||||
|
||||
if n_samples < min_samples:
|
||||
print(f"[Calibrator] Warning: Only {n_samples} samples for {market}, "
|
||||
f"need at least {min_samples}")
|
||||
metrics = CalibrationMetrics()
|
||||
metrics.sample_count = n_samples
|
||||
return metrics
|
||||
|
||||
# Extract arrays
|
||||
raw_probs = valid_df[prob_col].values
|
||||
actuals = valid_df[actual_col].values
|
||||
|
||||
# Train Isotonic Regression
|
||||
iso = IsotonicRegression(out_of_bounds="clip", increasing=True)
|
||||
iso.fit(raw_probs, actuals)
|
||||
|
||||
# Calculate calibrated probabilities
|
||||
calibrated_probs = iso.predict(raw_probs)
|
||||
|
||||
# Calculate metrics
|
||||
metrics = CalibrationMetrics()
|
||||
metrics.sample_count = n_samples
|
||||
metrics.last_trained = datetime.utcnow().isoformat()
|
||||
metrics.brier_score = brier_score_loss(actuals, calibrated_probs)
|
||||
metrics.mean_predicted = np.mean(raw_probs)
|
||||
metrics.mean_actual = np.mean(actuals)
|
||||
|
||||
# Calculate Expected Calibration Error (ECE)
|
||||
metrics.calibration_error = self._calculate_ece(
|
||||
calibrated_probs, actuals, n_bins=10
|
||||
)
|
||||
|
||||
# Store in memory
|
||||
self.calibrators[market] = iso
|
||||
self.metrics[market] = metrics
|
||||
|
||||
# Save to disk
|
||||
if save:
|
||||
self._save_calibration(market, iso, metrics)
|
||||
|
||||
print(f"[Calibrator] Trained {market}: "
|
||||
f"Brier={metrics.brier_score:.4f}, "
|
||||
f"ECE={metrics.calibration_error:.4f}, "
|
||||
f"n={n_samples}")
|
||||
|
||||
return metrics
|
||||
|
||||
def train_all_markets(
|
||||
self,
|
||||
df: pd.DataFrame,
|
||||
market_config: Dict[str, Tuple[str, str]],
|
||||
min_samples: int = 100,
|
||||
) -> Dict[str, CalibrationMetrics]:
|
||||
"""
|
||||
Train calibration models for multiple markets at once.
|
||||
|
||||
Args:
|
||||
df: DataFrame with all predictions and outcomes
|
||||
market_config: Dict mapping market -> (prob_col, actual_col)
|
||||
e.g., {'ou25': ('ou25_over_prob', 'ou25_over_actual')}
|
||||
min_samples: Minimum samples per market
|
||||
|
||||
Returns:
|
||||
Dict of market -> CalibrationMetrics
|
||||
"""
|
||||
results = {}
|
||||
|
||||
for market, (prob_col, actual_col) in market_config.items():
|
||||
print(f"\n[Calibrator] Training {market}...")
|
||||
try:
|
||||
metrics = self.train_calibration(
|
||||
df=df,
|
||||
market=market,
|
||||
prob_col=prob_col,
|
||||
actual_col=actual_col,
|
||||
min_samples=min_samples,
|
||||
save=True,
|
||||
)
|
||||
results[market] = metrics
|
||||
except Exception as e:
|
||||
print(f"[Calibrator] Failed to train {market}: {e}")
|
||||
|
||||
return results
|
||||
|
||||
def _calculate_ece(
|
||||
self,
|
||||
probs: np.ndarray,
|
||||
actuals: np.ndarray,
|
||||
n_bins: int = 10
|
||||
) -> float:
|
||||
"""
|
||||
Calculate Expected Calibration Error (ECE).
|
||||
|
||||
ECE = sum(|bin_accuracy - bin_confidence| * bin_weight)
|
||||
|
||||
Lower is better. Perfect calibration = 0.
|
||||
"""
|
||||
bin_boundaries = np.linspace(0, 1, n_bins + 1)
|
||||
ece = 0.0
|
||||
|
||||
for i in range(n_bins):
|
||||
in_bin = (probs >= bin_boundaries[i]) & (probs < bin_boundaries[i + 1])
|
||||
prop_in_bin = np.mean(in_bin)
|
||||
|
||||
if prop_in_bin > 0:
|
||||
accuracy_in_bin = np.mean(actuals[in_bin])
|
||||
avg_confidence_in_bin = np.mean(probs[in_bin])
|
||||
ece += np.abs(accuracy_in_bin - avg_confidence_in_bin) * prop_in_bin
|
||||
|
||||
return ece
|
||||
|
||||
def _save_calibration(
|
||||
self,
|
||||
market: str,
|
||||
calibrator: IsotonicRegression,
|
||||
metrics: CalibrationMetrics
|
||||
):
|
||||
"""Save calibration model and metrics to disk."""
|
||||
# Save model
|
||||
model_path = os.path.join(CALIBRATION_DIR, f"{market}_calibrator.pkl")
|
||||
with open(model_path, "wb") as f:
|
||||
pickle.dump(calibrator, f)
|
||||
|
||||
# Save metrics
|
||||
metrics_path = os.path.join(CALIBRATION_DIR, f"{market}_metrics.json")
|
||||
with open(metrics_path, "w") as f:
|
||||
json.dump(metrics.to_dict(), f, indent=2)
|
||||
|
||||
print(f"[Calibrator] Saved {market} to {CALIBRATION_DIR}")
|
||||
|
||||
def get_calibration_report(self) -> Dict[str, Any]:
|
||||
"""Generate a summary report of all calibration models."""
|
||||
report = {
|
||||
"trained_markets": list(self.calibrators.keys()),
|
||||
"metrics": {},
|
||||
"heuristic_only": [],
|
||||
}
|
||||
|
||||
for market in SUPPORTED_MARKETS:
|
||||
if market in self.metrics:
|
||||
report["metrics"][market] = self.metrics[market].to_dict()
|
||||
elif market not in self.calibrators:
|
||||
report["heuristic_only"].append(market)
|
||||
|
||||
return report
|
||||
|
||||
def get_calibrated_probabilities(
|
||||
self,
|
||||
market: str,
|
||||
raw_probs: np.ndarray
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
Batch calibration for array of probabilities.
|
||||
|
||||
Args:
|
||||
market: Market type
|
||||
raw_probs: Array of raw probabilities
|
||||
|
||||
Returns:
|
||||
Array of calibrated probabilities
|
||||
"""
|
||||
return np.array([self.calibrate(market, p) for p in raw_probs])
|
||||
|
||||
|
||||
# Singleton instance
|
||||
_calibrator_instance: Optional[Calibrator] = None
|
||||
|
||||
|
||||
def get_calibrator() -> Calibrator:
|
||||
"""Get or create the global Calibrator instance."""
|
||||
global _calibrator_instance
|
||||
if _calibrator_instance is None:
|
||||
_calibrator_instance = Calibrator()
|
||||
return _calibrator_instance
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,645 @@
|
||||
"""
|
||||
V25 Ensemble Predictor - NO TARGET LEAKAGE
|
||||
===========================================
|
||||
Multi-model ensemble for match prediction using XGBoost and LightGBM.
|
||||
|
||||
Features:
|
||||
- 73 engineered features (NO target leakage)
|
||||
- Market-specific models (MS, OU25, BTTS)
|
||||
- Weighted ensemble predictions
|
||||
- Value bet detection
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from typing import Dict, List, Optional, Any
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
import xgboost as xgb
|
||||
import lightgbm as lgb
|
||||
|
||||
# CatBoost is optional
|
||||
try:
|
||||
from catboost import CatBoostClassifier
|
||||
CATBOOST_AVAILABLE = True
|
||||
except ImportError:
|
||||
CatBoostClassifier = None
|
||||
CATBOOST_AVAILABLE = False
|
||||
|
||||
# Paths
|
||||
MODELS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'v25')
|
||||
|
||||
|
||||
@dataclass
|
||||
class MarketPrediction:
|
||||
"""Prediction for a single betting market."""
|
||||
market_type: str
|
||||
pick: str
|
||||
probability: float
|
||||
confidence: float
|
||||
odds: float = 0.0
|
||||
is_value_bet: bool = False
|
||||
edge: float = 0.0
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
'market_type': self.market_type,
|
||||
'pick': self.pick,
|
||||
'probability': round(self.probability * 100, 1),
|
||||
'confidence': round(self.confidence, 1),
|
||||
'odds': self.odds,
|
||||
'is_value_bet': self.is_value_bet,
|
||||
'edge': round(self.edge * 100, 1),
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class ValueBet:
|
||||
"""Detected value bet opportunity."""
|
||||
market_type: str
|
||||
pick: str
|
||||
probability: float
|
||||
odds: float
|
||||
edge: float
|
||||
confidence: float
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
'market_type': self.market_type,
|
||||
'pick': self.pick,
|
||||
'probability': round(self.probability * 100, 1),
|
||||
'odds': self.odds,
|
||||
'edge': round(self.edge * 100, 1),
|
||||
'confidence': round(self.confidence, 1),
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class MatchPrediction:
|
||||
"""Complete match prediction with all markets."""
|
||||
match_id: str
|
||||
home_team: str
|
||||
away_team: str
|
||||
|
||||
# MS predictions
|
||||
home_prob: float = 0.0
|
||||
draw_prob: float = 0.0
|
||||
away_prob: float = 0.0
|
||||
ms_pick: str = ''
|
||||
ms_confidence: float = 0.0
|
||||
|
||||
# OU25 predictions
|
||||
over_prob: float = 0.0
|
||||
under_prob: float = 0.0
|
||||
ou25_pick: str = ''
|
||||
ou25_confidence: float = 0.0
|
||||
|
||||
# BTTS predictions
|
||||
btts_yes_prob: float = 0.0
|
||||
btts_no_prob: float = 0.0
|
||||
btts_pick: str = ''
|
||||
btts_confidence: float = 0.0
|
||||
|
||||
# Value bets
|
||||
value_bets: List[ValueBet] = field(default_factory=list)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
'match_id': self.match_id,
|
||||
'home_team': self.home_team,
|
||||
'away_team': self.away_team,
|
||||
'ms': {
|
||||
'home_prob': round(self.home_prob * 100, 1),
|
||||
'draw_prob': round(self.draw_prob * 100, 1),
|
||||
'away_prob': round(self.away_prob * 100, 1),
|
||||
'pick': self.ms_pick,
|
||||
'confidence': round(self.ms_confidence, 1),
|
||||
},
|
||||
'ou25': {
|
||||
'over_prob': round(self.over_prob * 100, 1),
|
||||
'under_prob': round(self.under_prob * 100, 1),
|
||||
'pick': self.ou25_pick,
|
||||
'confidence': round(self.ou25_confidence, 1),
|
||||
},
|
||||
'btts': {
|
||||
'yes_prob': round(self.btts_yes_prob * 100, 1),
|
||||
'no_prob': round(self.btts_no_prob * 100, 1),
|
||||
'pick': self.btts_pick,
|
||||
'confidence': round(self.btts_confidence, 1),
|
||||
},
|
||||
'value_bets': [vb.to_dict() for vb in self.value_bets],
|
||||
}
|
||||
|
||||
|
||||
class V25Predictor:
|
||||
"""
|
||||
V25 Ensemble Predictor - NO TARGET LEAKAGE
|
||||
|
||||
Uses market-specific XGBoost and LightGBM models.
|
||||
Each market (MS, OU25, BTTS) has its own trained models.
|
||||
"""
|
||||
|
||||
# Feature columns (82 features, NO target leakage)
|
||||
FEATURE_COLS = [
|
||||
# ELO Features (8)
|
||||
'home_overall_elo', 'away_overall_elo', 'elo_diff',
|
||||
'home_home_elo', 'away_away_elo',
|
||||
'home_form_elo', 'away_form_elo', 'form_elo_diff',
|
||||
|
||||
# Form Features (12)
|
||||
'home_goals_avg', 'home_conceded_avg',
|
||||
'away_goals_avg', 'away_conceded_avg',
|
||||
'home_clean_sheet_rate', 'away_clean_sheet_rate',
|
||||
'home_scoring_rate', 'away_scoring_rate',
|
||||
'home_winning_streak', 'away_winning_streak',
|
||||
'home_unbeaten_streak', 'away_unbeaten_streak',
|
||||
|
||||
# H2H Features (6)
|
||||
'h2h_total_matches', 'h2h_home_win_rate', 'h2h_draw_rate',
|
||||
'h2h_avg_goals', 'h2h_btts_rate', 'h2h_over25_rate',
|
||||
|
||||
# Team Stats Features (8)
|
||||
'home_avg_possession', 'away_avg_possession',
|
||||
'home_avg_shots_on_target', 'away_avg_shots_on_target',
|
||||
'home_shot_conversion', 'away_shot_conversion',
|
||||
'home_avg_corners', 'away_avg_corners',
|
||||
|
||||
# Odds Features (24)
|
||||
'odds_ms_h', 'odds_ms_d', 'odds_ms_a',
|
||||
'implied_home', 'implied_draw', 'implied_away',
|
||||
'odds_ht_ms_h', 'odds_ht_ms_d', 'odds_ht_ms_a',
|
||||
'odds_ou05_o', 'odds_ou05_u',
|
||||
'odds_ou15_o', 'odds_ou15_u',
|
||||
'odds_ou25_o', 'odds_ou25_u',
|
||||
'odds_ou35_o', 'odds_ou35_u',
|
||||
'odds_ht_ou05_o', 'odds_ht_ou05_u',
|
||||
'odds_ht_ou15_o', 'odds_ht_ou15_u',
|
||||
'odds_btts_y', 'odds_btts_n',
|
||||
|
||||
# League Features (4)
|
||||
'home_xga', 'away_xga',
|
||||
'league_avg_goals', 'league_zero_goal_rate',
|
||||
|
||||
# Upset Engine (4)
|
||||
'upset_atmosphere', 'upset_motivation', 'upset_fatigue', 'upset_potential',
|
||||
|
||||
# Referee Engine (5)
|
||||
'referee_home_bias', 'referee_avg_goals', 'referee_cards_total',
|
||||
'referee_avg_yellow', 'referee_experience',
|
||||
|
||||
# Momentum Engine (3)
|
||||
'home_momentum_score', 'away_momentum_score', 'momentum_diff',
|
||||
|
||||
# Squad Features (9)
|
||||
'home_squad_quality', 'away_squad_quality', 'squad_diff',
|
||||
'home_key_players', 'away_key_players',
|
||||
'home_missing_impact', 'away_missing_impact',
|
||||
'home_goals_form', 'away_goals_form',
|
||||
]
|
||||
|
||||
# Model weights for ensemble
|
||||
DEFAULT_WEIGHTS = {
|
||||
'xgb': 0.50,
|
||||
'lgb': 0.50,
|
||||
}
|
||||
|
||||
def __init__(self, models_dir: str = None):
|
||||
"""
|
||||
Initialize V25 Predictor.
|
||||
|
||||
Args:
|
||||
models_dir: Directory containing model files. Defaults to v25/ directory.
|
||||
"""
|
||||
self.models_dir = models_dir or MODELS_DIR
|
||||
self.models = {} # market -> {'xgb': model, 'lgb': model}
|
||||
self._loaded = False
|
||||
|
||||
# All trained market models available in V25
|
||||
ALL_MARKETS = [
|
||||
'ms', 'ou25', 'btts', # Core markets
|
||||
'ou15', 'ou35', # Additional OU lines
|
||||
'ht_result', 'ht_ou05', 'ht_ou15', # HT markets
|
||||
'htft', # HT/FT combo
|
||||
'cards_ou45', # Cards market
|
||||
'handicap_ms', # Handicap
|
||||
'odd_even', # Odd/Even goals
|
||||
]
|
||||
|
||||
# Multi-class markets (output > 2 classes)
|
||||
MULTICLASS_MARKETS = {'ms', 'ht_result', 'htft', 'handicap_ms'}
|
||||
|
||||
def load_models(self) -> bool:
|
||||
"""Load all market-specific models from disk."""
|
||||
try:
|
||||
loaded_count = 0
|
||||
|
||||
for market in self.ALL_MARKETS:
|
||||
self.models[market] = {}
|
||||
|
||||
# Load XGBoost (read content in Python to avoid non-ASCII path issues)
|
||||
xgb_path = os.path.join(self.models_dir, f'xgb_v25_{market}.json')
|
||||
if os.path.exists(xgb_path) and os.path.getsize(xgb_path) > 0:
|
||||
with open(xgb_path, 'r', encoding='utf-8') as f:
|
||||
xgb_content = f.read()
|
||||
booster = xgb.Booster()
|
||||
booster.load_model(bytearray(xgb_content, 'utf-8'))
|
||||
self.models[market]['xgb'] = booster
|
||||
loaded_count += 1
|
||||
|
||||
# Load LightGBM (read content in Python to avoid non-ASCII path issues)
|
||||
lgb_path = os.path.join(self.models_dir, f'lgb_v25_{market}.txt')
|
||||
if os.path.exists(lgb_path) and os.path.getsize(lgb_path) > 0:
|
||||
with open(lgb_path, 'r', encoding='utf-8') as f:
|
||||
model_str = f.read()
|
||||
self.models[market]['lgb'] = lgb.Booster(model_str=model_str)
|
||||
loaded_count += 1
|
||||
|
||||
# Remove empty entries
|
||||
if not self.models[market]:
|
||||
del self.models[market]
|
||||
|
||||
print(f"[V25] Loaded {loaded_count} model files across {len(self.models)} markets: {list(self.models.keys())}")
|
||||
self._loaded = loaded_count > 0
|
||||
return self._loaded
|
||||
|
||||
except Exception as e:
|
||||
print(f"[ERROR] Error loading models: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
def _ensure_loaded(self):
|
||||
"""Ensure models are loaded before prediction."""
|
||||
if not self._loaded:
|
||||
if not self.load_models():
|
||||
raise RuntimeError("Failed to load V25 models")
|
||||
|
||||
def _prepare_features(self, features: Dict[str, float]) -> pd.DataFrame:
|
||||
"""Prepare feature vector for prediction."""
|
||||
X = pd.DataFrame([{col: features.get(col, 0.0) for col in self.FEATURE_COLS}])
|
||||
return X
|
||||
|
||||
def predict_ms(self, features: Dict[str, float]) -> tuple:
|
||||
"""
|
||||
Predict match result (1X2).
|
||||
|
||||
Returns:
|
||||
(home_prob, draw_prob, away_prob)
|
||||
"""
|
||||
self._ensure_loaded()
|
||||
|
||||
X = self._prepare_features(features)
|
||||
probs = []
|
||||
|
||||
# XGBoost
|
||||
if 'xgb' in self.models.get('ms', {}):
|
||||
dmat = xgb.DMatrix(X)
|
||||
xgb_proba = self.models['ms']['xgb'].predict(dmat)
|
||||
if len(xgb_proba.shape) == 1:
|
||||
xgb_proba = np.array([xgb_proba])
|
||||
probs.append(xgb_proba[0] * self.DEFAULT_WEIGHTS['xgb'])
|
||||
|
||||
# LightGBM
|
||||
if 'lgb' in self.models.get('ms', {}):
|
||||
lgb_proba = self.models['ms']['lgb'].predict(X)
|
||||
if len(lgb_proba.shape) == 2:
|
||||
probs.append(lgb_proba[0] * self.DEFAULT_WEIGHTS['lgb'])
|
||||
|
||||
if not probs:
|
||||
return 0.33, 0.33, 0.33
|
||||
|
||||
ensemble_proba = np.sum(probs, axis=0)
|
||||
ensemble_proba = ensemble_proba / ensemble_proba.sum()
|
||||
|
||||
return float(ensemble_proba[0]), float(ensemble_proba[1]), float(ensemble_proba[2])
|
||||
|
||||
def predict_ou25(self, features: Dict[str, float]) -> tuple:
|
||||
"""
|
||||
Predict Over/Under 2.5 goals.
|
||||
|
||||
Returns:
|
||||
(over_prob, under_prob)
|
||||
"""
|
||||
self._ensure_loaded()
|
||||
|
||||
X = self._prepare_features(features)
|
||||
probs = []
|
||||
|
||||
# XGBoost
|
||||
if 'xgb' in self.models.get('ou25', {}):
|
||||
dmat = xgb.DMatrix(X)
|
||||
xgb_proba = self.models['ou25']['xgb'].predict(dmat)
|
||||
if isinstance(xgb_proba, np.ndarray) and len(xgb_proba.shape) == 1:
|
||||
probs.append(xgb_proba[0])
|
||||
|
||||
# LightGBM
|
||||
if 'lgb' in self.models.get('ou25', {}):
|
||||
lgb_proba = self.models['ou25']['lgb'].predict(X)
|
||||
if isinstance(lgb_proba, np.ndarray):
|
||||
probs.append(lgb_proba[0])
|
||||
|
||||
if not probs:
|
||||
return 0.5, 0.5
|
||||
|
||||
# Average probability
|
||||
avg_prob = np.mean(probs)
|
||||
|
||||
return float(avg_prob), float(1 - avg_prob)
|
||||
|
||||
def predict_btts(self, features: Dict[str, float]) -> tuple:
|
||||
"""
|
||||
Predict Both Teams To Score.
|
||||
|
||||
Returns:
|
||||
(yes_prob, no_prob)
|
||||
"""
|
||||
self._ensure_loaded()
|
||||
|
||||
X = self._prepare_features(features)
|
||||
probs = []
|
||||
|
||||
# XGBoost
|
||||
if 'xgb' in self.models.get('btts', {}):
|
||||
dmat = xgb.DMatrix(X)
|
||||
xgb_proba = self.models['btts']['xgb'].predict(dmat)
|
||||
if isinstance(xgb_proba, np.ndarray) and len(xgb_proba.shape) == 1:
|
||||
probs.append(xgb_proba[0])
|
||||
|
||||
# LightGBM
|
||||
if 'lgb' in self.models.get('btts', {}):
|
||||
lgb_proba = self.models['btts']['lgb'].predict(X)
|
||||
if isinstance(lgb_proba, np.ndarray):
|
||||
probs.append(lgb_proba[0])
|
||||
|
||||
if not probs:
|
||||
return 0.5, 0.5
|
||||
|
||||
# Average probability
|
||||
avg_prob = np.mean(probs)
|
||||
|
||||
return float(avg_prob), float(1 - avg_prob)
|
||||
|
||||
def predict_market(self, market: str, features: Dict[str, float]) -> np.ndarray:
|
||||
"""
|
||||
Generic prediction for any loaded market.
|
||||
|
||||
Args:
|
||||
market: Market key (e.g. 'ht_result', 'htft', 'cards_ou45')
|
||||
features: Feature dictionary.
|
||||
|
||||
Returns:
|
||||
numpy array of probabilities.
|
||||
For binary markets: [positive_prob]
|
||||
For multi-class markets: [class0_prob, class1_prob, ...]
|
||||
"""
|
||||
self._ensure_loaded()
|
||||
|
||||
if market not in self.models:
|
||||
return None
|
||||
|
||||
X = self._prepare_features(features)
|
||||
probs = []
|
||||
weights = []
|
||||
is_multiclass = market in self.MULTICLASS_MARKETS
|
||||
|
||||
# XGBoost
|
||||
if 'xgb' in self.models[market]:
|
||||
dmat = xgb.DMatrix(X)
|
||||
xgb_proba = self.models[market]['xgb'].predict(dmat)
|
||||
if isinstance(xgb_proba, np.ndarray):
|
||||
if is_multiclass and len(xgb_proba.shape) == 2:
|
||||
probs.append(xgb_proba[0])
|
||||
elif is_multiclass and len(xgb_proba.shape) == 1:
|
||||
probs.append(xgb_proba)
|
||||
else:
|
||||
probs.append(np.array([xgb_proba[0]]))
|
||||
weights.append(self.DEFAULT_WEIGHTS['xgb'])
|
||||
|
||||
# LightGBM
|
||||
if 'lgb' in self.models[market]:
|
||||
lgb_proba = self.models[market]['lgb'].predict(X)
|
||||
if isinstance(lgb_proba, np.ndarray):
|
||||
if is_multiclass and len(lgb_proba.shape) == 2:
|
||||
probs.append(lgb_proba[0])
|
||||
elif is_multiclass and len(lgb_proba.shape) == 1:
|
||||
probs.append(lgb_proba)
|
||||
else:
|
||||
probs.append(np.array([lgb_proba[0]]))
|
||||
weights.append(self.DEFAULT_WEIGHTS['lgb'])
|
||||
|
||||
if not probs:
|
||||
return None
|
||||
|
||||
# Weighted average
|
||||
if len(probs) == 1:
|
||||
return probs[0]
|
||||
|
||||
total_w = sum(weights[:len(probs)])
|
||||
result = np.zeros_like(probs[0])
|
||||
for p, w in zip(probs, weights):
|
||||
result += p * (w / total_w)
|
||||
|
||||
# Normalize multi-class
|
||||
if is_multiclass and result.sum() > 0:
|
||||
result = result / result.sum()
|
||||
|
||||
return result
|
||||
|
||||
def has_market(self, market: str) -> bool:
|
||||
"""Check if a specific market model is loaded."""
|
||||
return market in self.models
|
||||
|
||||
def predict_match(
|
||||
self,
|
||||
match_id: str,
|
||||
home_team: str,
|
||||
away_team: str,
|
||||
features: Dict[str, float],
|
||||
odds: Optional[Dict[str, float]] = None,
|
||||
) -> MatchPrediction:
|
||||
"""
|
||||
Predict all markets for a match.
|
||||
|
||||
Args:
|
||||
match_id: Match identifier.
|
||||
home_team: Home team name.
|
||||
away_team: Away team name.
|
||||
features: Feature dictionary.
|
||||
odds: Optional odds dictionary for value bet detection.
|
||||
|
||||
Returns:
|
||||
MatchPrediction object.
|
||||
"""
|
||||
# Get predictions for each market
|
||||
home_prob, draw_prob, away_prob = self.predict_ms(features)
|
||||
over_prob, under_prob = self.predict_ou25(features)
|
||||
btts_yes_prob, btts_no_prob = self.predict_btts(features)
|
||||
|
||||
# Determine picks
|
||||
ms_probs = {'1': home_prob, 'X': draw_prob, '2': away_prob}
|
||||
ms_pick = max(ms_probs, key=ms_probs.get)
|
||||
ms_confidence = ms_probs[ms_pick] * 100
|
||||
|
||||
ou25_probs = {'Over': over_prob, 'Under': under_prob}
|
||||
ou25_pick = max(ou25_probs, key=ou25_probs.get)
|
||||
ou25_confidence = ou25_probs[ou25_pick] * 100
|
||||
|
||||
btts_probs = {'Yes': btts_yes_prob, 'No': btts_no_prob}
|
||||
btts_pick = max(btts_probs, key=btts_probs.get)
|
||||
btts_confidence = btts_probs[btts_pick] * 100
|
||||
|
||||
# Create prediction
|
||||
prediction = MatchPrediction(
|
||||
match_id=match_id,
|
||||
home_team=home_team,
|
||||
away_team=away_team,
|
||||
home_prob=home_prob,
|
||||
draw_prob=draw_prob,
|
||||
away_prob=away_prob,
|
||||
ms_pick=ms_pick,
|
||||
ms_confidence=ms_confidence,
|
||||
over_prob=over_prob,
|
||||
under_prob=under_prob,
|
||||
ou25_pick=ou25_pick,
|
||||
ou25_confidence=ou25_confidence,
|
||||
btts_yes_prob=btts_yes_prob,
|
||||
btts_no_prob=btts_no_prob,
|
||||
btts_pick=btts_pick,
|
||||
btts_confidence=btts_confidence,
|
||||
)
|
||||
|
||||
# Detect value bets
|
||||
if odds:
|
||||
prediction.value_bets = self._detect_value_bets(
|
||||
prediction, odds, home_prob, draw_prob, away_prob,
|
||||
over_prob, under_prob, btts_yes_prob, btts_no_prob
|
||||
)
|
||||
|
||||
return prediction
|
||||
|
||||
def _detect_value_bets(
|
||||
self,
|
||||
prediction: MatchPrediction,
|
||||
odds: Dict[str, float],
|
||||
home_prob: float,
|
||||
draw_prob: float,
|
||||
away_prob: float,
|
||||
over_prob: float,
|
||||
under_prob: float,
|
||||
btts_yes_prob: float,
|
||||
btts_no_prob: float,
|
||||
) -> List[ValueBet]:
|
||||
"""Detect value bets based on model vs market odds."""
|
||||
value_bets = []
|
||||
min_edge = 0.05 # 5% minimum edge
|
||||
|
||||
# MS value bets
|
||||
if 'ms_h' in odds and odds['ms_h'] > 0:
|
||||
implied = 1 / odds['ms_h']
|
||||
edge = home_prob - implied
|
||||
if edge > min_edge:
|
||||
value_bets.append(ValueBet(
|
||||
market_type='MS',
|
||||
pick='1',
|
||||
probability=home_prob,
|
||||
odds=odds['ms_h'],
|
||||
edge=edge,
|
||||
confidence=home_prob * 100,
|
||||
))
|
||||
|
||||
if 'ms_d' in odds and odds['ms_d'] > 0:
|
||||
implied = 1 / odds['ms_d']
|
||||
edge = draw_prob - implied
|
||||
if edge > min_edge:
|
||||
value_bets.append(ValueBet(
|
||||
market_type='MS',
|
||||
pick='X',
|
||||
probability=draw_prob,
|
||||
odds=odds['ms_d'],
|
||||
edge=edge,
|
||||
confidence=draw_prob * 100,
|
||||
))
|
||||
|
||||
if 'ms_a' in odds and odds['ms_a'] > 0:
|
||||
implied = 1 / odds['ms_a']
|
||||
edge = away_prob - implied
|
||||
if edge > min_edge:
|
||||
value_bets.append(ValueBet(
|
||||
market_type='MS',
|
||||
pick='2',
|
||||
probability=away_prob,
|
||||
odds=odds['ms_a'],
|
||||
edge=edge,
|
||||
confidence=away_prob * 100,
|
||||
))
|
||||
|
||||
# OU25 value bets
|
||||
if 'ou25_o' in odds and odds['ou25_o'] > 0:
|
||||
implied = 1 / odds['ou25_o']
|
||||
edge = over_prob - implied
|
||||
if edge > min_edge:
|
||||
value_bets.append(ValueBet(
|
||||
market_type='OU25',
|
||||
pick='Over',
|
||||
probability=over_prob,
|
||||
odds=odds['ou25_o'],
|
||||
edge=edge,
|
||||
confidence=over_prob * 100,
|
||||
))
|
||||
|
||||
if 'ou25_u' in odds and odds['ou25_u'] > 0:
|
||||
implied = 1 / odds['ou25_u']
|
||||
edge = under_prob - implied
|
||||
if edge > min_edge:
|
||||
value_bets.append(ValueBet(
|
||||
market_type='OU25',
|
||||
pick='Under',
|
||||
probability=under_prob,
|
||||
odds=odds['ou25_u'],
|
||||
edge=edge,
|
||||
confidence=under_prob * 100,
|
||||
))
|
||||
|
||||
# BTTS value bets
|
||||
if 'btts_y' in odds and odds['btts_y'] > 0:
|
||||
implied = 1 / odds['btts_y']
|
||||
edge = btts_yes_prob - implied
|
||||
if edge > min_edge:
|
||||
value_bets.append(ValueBet(
|
||||
market_type='BTTS',
|
||||
pick='Yes',
|
||||
probability=btts_yes_prob,
|
||||
odds=odds['btts_y'],
|
||||
edge=edge,
|
||||
confidence=btts_yes_prob * 100,
|
||||
))
|
||||
|
||||
if 'btts_n' in odds and odds['btts_n'] > 0:
|
||||
implied = 1 / odds['btts_n']
|
||||
edge = btts_no_prob - implied
|
||||
if edge > min_edge:
|
||||
value_bets.append(ValueBet(
|
||||
market_type='BTTS',
|
||||
pick='No',
|
||||
probability=btts_no_prob,
|
||||
odds=odds['btts_n'],
|
||||
edge=edge,
|
||||
confidence=btts_no_prob * 100,
|
||||
))
|
||||
|
||||
return value_bets
|
||||
|
||||
|
||||
# Singleton instance
|
||||
_v25_predictor: Optional[V25Predictor] = None
|
||||
|
||||
|
||||
def get_v25_predictor() -> V25Predictor:
|
||||
"""Get or create V25 predictor instance."""
|
||||
global _v25_predictor
|
||||
if _v25_predictor is None:
|
||||
_v25_predictor = V25Predictor()
|
||||
_v25_predictor.load_models()
|
||||
return _v25_predictor
|
||||
@@ -0,0 +1,291 @@
|
||||
"""
|
||||
V27 Pro Predictor — Odds-Free Fundamentals + Value Edge Detection
|
||||
|
||||
This module loads V27 ensemble models (XGBoost, LightGBM, CatBoost)
|
||||
and produces market-independent probability estimates.
|
||||
|
||||
The key insight: V27 is trained WITHOUT odds features, so it produces
|
||||
"true" probabilities unbiased by market pricing. The divergence between
|
||||
V25 (odds-aware) and V27 (odds-free) predictions signals market mispricing.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import pickle
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
V27_DIR = Path(__file__).parent / "v27"
|
||||
|
||||
|
||||
class V27Predictor:
|
||||
"""
|
||||
Loads V27 ensemble models and provides predictions using the
|
||||
82-feature odds-free vector.
|
||||
"""
|
||||
|
||||
MARKETS = ["ms", "ou25"]
|
||||
|
||||
def __init__(self):
|
||||
self.models: Dict[str, Dict[str, object]] = {}
|
||||
self.feature_cols: List[str] = []
|
||||
self._loaded = False
|
||||
|
||||
def load_models(self) -> bool:
|
||||
"""Load all V27 ensemble models and feature column spec."""
|
||||
if self._loaded:
|
||||
return True
|
||||
|
||||
# Feature columns
|
||||
cols_path = V27_DIR / "v27_feature_cols.json"
|
||||
if not cols_path.exists():
|
||||
logger.error("[V27] Feature columns file not found: %s", cols_path)
|
||||
return False
|
||||
|
||||
try:
|
||||
with open(cols_path, "r", encoding="utf-8") as f:
|
||||
self.feature_cols = json.load(f)
|
||||
logger.info("[V27] Loaded %d feature columns", len(self.feature_cols))
|
||||
except Exception as e:
|
||||
logger.error("[V27] Failed to load feature columns: %s", e)
|
||||
return False
|
||||
|
||||
# Load models per market
|
||||
model_types = {"xgb": "xgb", "lgb": "lgb", "cb": "cb"}
|
||||
|
||||
for market in self.MARKETS:
|
||||
self.models[market] = {}
|
||||
for short, label in model_types.items():
|
||||
# Try market-specific file first: v27_ms_xgb.pkl
|
||||
path = V27_DIR / f"v27_{market}_{short}.pkl"
|
||||
if not path.exists():
|
||||
# Fallback to generic: v27_xgboost.pkl (for MS only)
|
||||
generic_names = {"xgb": "v27_xgboost.pkl", "lgb": "v27_lightgbm.pkl", "cb": "v27_catboost.pkl"}
|
||||
path = V27_DIR / generic_names.get(short, "")
|
||||
if not path.exists():
|
||||
logger.warning("[V27] Model file not found for %s/%s", market, short)
|
||||
continue
|
||||
|
||||
try:
|
||||
with open(path, "rb") as f:
|
||||
model = pickle.load(f)
|
||||
self.models[market][label] = model
|
||||
logger.info("[V27] ✓ Loaded %s/%s from %s", market, label, path.name)
|
||||
except Exception as e:
|
||||
logger.error("[V27] ✗ Failed to load %s/%s: %s", market, label, e)
|
||||
|
||||
loaded_count = sum(len(v) for v in self.models.values())
|
||||
if loaded_count == 0:
|
||||
logger.error("[V27] No models loaded!")
|
||||
return False
|
||||
|
||||
self._loaded = True
|
||||
logger.info("[V27] Total models loaded: %d across %d markets", loaded_count, len(self.models))
|
||||
return True
|
||||
|
||||
def _build_feature_array(self, features: Dict[str, float]) -> np.ndarray:
|
||||
"""
|
||||
Build ordered feature array from the full feature dict.
|
||||
V27 uses only its 82 features (odds-free subset).
|
||||
"""
|
||||
row = []
|
||||
for col in self.feature_cols:
|
||||
row.append(float(features.get(col, 0.0)))
|
||||
return np.array([row])
|
||||
|
||||
def _predict_with_model(self, model, X: np.ndarray, label: str, expected_classes: int) -> Optional[np.ndarray]:
|
||||
"""
|
||||
Predict probabilities from a model, handling both sklearn wrappers
|
||||
(predict_proba) and raw Booster objects (predict).
|
||||
|
||||
For raw XGBoost Boosters, DMatrix is created WITH feature_names
|
||||
to match the training schema.
|
||||
"""
|
||||
import xgboost as xgb
|
||||
import lightgbm as lgbm
|
||||
import pandas as pd
|
||||
|
||||
# 1. Try sklearn-style predict_proba first
|
||||
if hasattr(model, 'predict_proba'):
|
||||
try:
|
||||
proba = model.predict_proba(X)[0]
|
||||
if len(proba) == expected_classes:
|
||||
return proba
|
||||
logger.warning("[V27] %s predict_proba returned %d classes, expected %d", label, len(proba), expected_classes)
|
||||
except Exception:
|
||||
pass # Fall through to raw predict
|
||||
|
||||
# 2. Raw xgboost.Booster — MUST pass feature_names
|
||||
if isinstance(model, xgb.Booster):
|
||||
try:
|
||||
feature_names = self.feature_cols if self.feature_cols else None
|
||||
dmat = xgb.DMatrix(X, feature_names=feature_names)
|
||||
raw = model.predict(dmat)
|
||||
if isinstance(raw, np.ndarray):
|
||||
if raw.ndim == 2 and raw.shape[1] == expected_classes:
|
||||
return raw[0]
|
||||
elif raw.ndim == 1 and expected_classes == 2:
|
||||
p = float(raw[0])
|
||||
return np.array([1.0 - p, p])
|
||||
elif raw.ndim == 1 and len(raw) == expected_classes:
|
||||
return raw
|
||||
except Exception as e:
|
||||
logger.warning("[V27] %s xgb.Booster predict failed: %s", label, e)
|
||||
return None
|
||||
|
||||
# 3. Raw lightgbm.Booster — pass as DataFrame with column names
|
||||
if isinstance(model, lgbm.Booster):
|
||||
try:
|
||||
if self.feature_cols:
|
||||
X_named = pd.DataFrame(X, columns=self.feature_cols)
|
||||
raw = model.predict(X_named)
|
||||
else:
|
||||
raw = model.predict(X)
|
||||
if isinstance(raw, np.ndarray):
|
||||
if raw.ndim == 2 and raw.shape[1] == expected_classes:
|
||||
return raw[0]
|
||||
elif raw.ndim == 1 and expected_classes == 2:
|
||||
p = float(raw[0])
|
||||
return np.array([1.0 - p, p])
|
||||
elif raw.ndim == 1 and len(raw) == expected_classes:
|
||||
return raw
|
||||
except Exception as e:
|
||||
logger.warning("[V27] %s lgb.Booster predict failed: %s", label, e)
|
||||
return None
|
||||
|
||||
# 4. Generic fallback (CatBoost, etc.)
|
||||
try:
|
||||
if hasattr(model, 'predict'):
|
||||
raw = model.predict(X)
|
||||
if isinstance(raw, np.ndarray):
|
||||
if raw.ndim == 2 and raw.shape[1] == expected_classes:
|
||||
return raw[0]
|
||||
elif raw.ndim == 1 and expected_classes == 2:
|
||||
p = float(raw[0])
|
||||
return np.array([1.0 - p, p])
|
||||
elif raw.ndim == 1 and len(raw) == expected_classes:
|
||||
return raw
|
||||
except Exception as e:
|
||||
logger.warning("[V27] %s generic predict failed: %s", label, e)
|
||||
|
||||
return None
|
||||
|
||||
def predict_ms(self, features: Dict[str, float]) -> Optional[Dict[str, float]]:
|
||||
"""
|
||||
Predict Match Score probabilities (Home/Draw/Away).
|
||||
Returns dict with keys: home, draw, away.
|
||||
"""
|
||||
if not self._loaded or "ms" not in self.models or not self.models["ms"]:
|
||||
return None
|
||||
|
||||
X = self._build_feature_array(features)
|
||||
probs_list = []
|
||||
|
||||
for label, model in self.models["ms"].items():
|
||||
proba = self._predict_with_model(model, X, f"MS/{label}", expected_classes=3)
|
||||
if proba is not None and len(proba) == 3:
|
||||
probs_list.append(proba)
|
||||
|
||||
if not probs_list:
|
||||
return None
|
||||
|
||||
# Ensemble average
|
||||
avg = np.mean(probs_list, axis=0)
|
||||
return {
|
||||
"home": float(avg[0]),
|
||||
"draw": float(avg[1]),
|
||||
"away": float(avg[2]),
|
||||
}
|
||||
|
||||
def predict_ou25(self, features: Dict[str, float]) -> Optional[Dict[str, float]]:
|
||||
"""
|
||||
Predict Over/Under 2.5 probabilities.
|
||||
Returns dict with keys: under, over.
|
||||
"""
|
||||
if not self._loaded or "ou25" not in self.models or not self.models["ou25"]:
|
||||
return None
|
||||
|
||||
X = self._build_feature_array(features)
|
||||
probs_list = []
|
||||
|
||||
for label, model in self.models["ou25"].items():
|
||||
proba = self._predict_with_model(model, X, f"OU25/{label}", expected_classes=2)
|
||||
if proba is not None and len(proba) == 2:
|
||||
probs_list.append(proba)
|
||||
|
||||
if not probs_list:
|
||||
return None
|
||||
|
||||
avg = np.mean(probs_list, axis=0)
|
||||
return {
|
||||
"under": float(avg[0]),
|
||||
"over": float(avg[1]),
|
||||
}
|
||||
|
||||
def predict_all(self, features: Dict[str, float]) -> Dict[str, Optional[Dict[str, float]]]:
|
||||
"""Run predictions for all supported markets."""
|
||||
return {
|
||||
"ms": self.predict_ms(features),
|
||||
"ou25": self.predict_ou25(features),
|
||||
}
|
||||
|
||||
|
||||
def compute_divergence(
|
||||
v25_probs: Dict[str, float],
|
||||
v27_probs: Dict[str, float],
|
||||
) -> Dict[str, float]:
|
||||
"""
|
||||
Compute the divergence signal between V25 (odds-aware) and V27 (odds-free).
|
||||
|
||||
Positive divergence = V27 thinks it's MORE likely than the market → VALUE BET
|
||||
Negative divergence = V27 thinks it's LESS likely than the market → PASS
|
||||
|
||||
Returns per-outcome divergence values.
|
||||
"""
|
||||
divergence = {}
|
||||
for key in v27_probs:
|
||||
v25_val = v25_probs.get(key, 0.33)
|
||||
v27_val = v27_probs.get(key, 0.33)
|
||||
divergence[key] = round(v27_val - v25_val, 4)
|
||||
return divergence
|
||||
|
||||
|
||||
def compute_value_edge(
|
||||
v25_probs: Dict[str, float],
|
||||
v27_probs: Dict[str, float],
|
||||
odds: Dict[str, float],
|
||||
) -> Dict[str, Dict]:
|
||||
"""
|
||||
Detect value bets by combining V25/V27 divergence with odds.
|
||||
|
||||
A value bet exists when:
|
||||
1. V27 (odds-free) probability > implied odds probability (model says it's underpriced)
|
||||
2. V27 and V25 divergence is positive (V27 sees more signal than the market)
|
||||
|
||||
Returns per-outcome: { probability, implied_prob, edge, is_value }
|
||||
"""
|
||||
results = {}
|
||||
for key in v27_probs:
|
||||
v27_p = v27_probs[key]
|
||||
v25_p = v25_probs.get(key, 0.33)
|
||||
odds_val = odds.get(key, 0.0)
|
||||
|
||||
implied_p = (1.0 / odds_val) if odds_val > 1.01 else 0.0
|
||||
divergence = v27_p - v25_p
|
||||
edge = v27_p - implied_p if implied_p > 0 else 0.0
|
||||
|
||||
results[key] = {
|
||||
"v27_prob": round(v27_p, 4),
|
||||
"v25_prob": round(v25_p, 4),
|
||||
"implied_prob": round(implied_p, 4),
|
||||
"divergence": round(divergence, 4),
|
||||
"edge": round(edge, 4),
|
||||
"is_value": edge > 0.05 and divergence > 0.02, # 5% edge + 2% divergence
|
||||
}
|
||||
|
||||
return results
|
||||
@@ -0,0 +1,497 @@
|
||||
"""
|
||||
Deterministic betting judge for prediction packages.
|
||||
|
||||
The model layer estimates event probabilities. BettingBrain decides whether
|
||||
those probabilities are trustworthy enough to risk money.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
|
||||
class BettingBrain:
|
||||
MIN_ODDS = 1.30
|
||||
MIN_BET_SCORE = 72.0
|
||||
MIN_WATCH_SCORE = 62.0
|
||||
MIN_BAND_SAMPLE = 8
|
||||
HARD_DIVERGENCE = 0.22
|
||||
SOFT_DIVERGENCE = 0.14
|
||||
EXTREME_MODEL_PROB = 0.85
|
||||
EXTREME_GAP = 0.30
|
||||
|
||||
MARKET_PRIORS = {
|
||||
"DC": 4.0,
|
||||
"OU15": 3.0,
|
||||
"OU25": 2.0,
|
||||
"BTTS": 0.0,
|
||||
"MS": -2.0,
|
||||
"OU35": -2.0,
|
||||
"HT": -6.0,
|
||||
"HTFT": -12.0,
|
||||
"CARDS": -5.0,
|
||||
"OE": -8.0,
|
||||
}
|
||||
|
||||
def judge(self, package: Dict[str, Any]) -> Dict[str, Any]:
|
||||
v27_engine = package.get("v27_engine")
|
||||
if not isinstance(v27_engine, dict):
|
||||
return package
|
||||
|
||||
guarded = dict(package)
|
||||
rows = self._collect_rows(guarded)
|
||||
if not rows:
|
||||
return guarded
|
||||
|
||||
judged_rows: Dict[str, Dict[str, Any]] = {}
|
||||
decisions: List[Dict[str, Any]] = []
|
||||
for row in rows:
|
||||
key = self._row_key(row)
|
||||
judged = self._judge_row(dict(row), guarded)
|
||||
judged_rows[key] = judged
|
||||
decisions.append(judged["betting_brain"])
|
||||
|
||||
approved = [
|
||||
row for row in judged_rows.values()
|
||||
if row.get("betting_brain", {}).get("action") == "BET"
|
||||
]
|
||||
watchlist = [
|
||||
row for row in judged_rows.values()
|
||||
if row.get("betting_brain", {}).get("action") == "WATCH"
|
||||
]
|
||||
approved.sort(key=self._candidate_sort_key, reverse=True)
|
||||
watchlist.sort(key=self._candidate_sort_key, reverse=True)
|
||||
|
||||
original_main = guarded.get("main_pick") or {}
|
||||
main_pick = None
|
||||
decision = "NO_BET"
|
||||
decision_reason = "No candidate passed the betting brain evidence gates."
|
||||
|
||||
if approved:
|
||||
main_pick = dict(approved[0])
|
||||
main_pick["is_guaranteed"] = bool(main_pick.get("betting_brain", {}).get("score", 0.0) >= 82.0)
|
||||
main_pick["pick_reason"] = "betting_brain_approved"
|
||||
decision = "BET"
|
||||
decision_reason = main_pick.get("betting_brain", {}).get("summary", "Evidence is aligned.")
|
||||
elif watchlist:
|
||||
main_pick = dict(watchlist[0])
|
||||
self._force_no_bet(main_pick, "betting_brain_watchlist")
|
||||
decision = "WATCHLIST"
|
||||
decision_reason = main_pick.get("betting_brain", {}).get("summary", "Interesting but not clean enough.")
|
||||
elif original_main:
|
||||
main_pick = dict(judged_rows.get(self._row_key(original_main), original_main))
|
||||
self._force_no_bet(main_pick, "betting_brain_no_safe_pick")
|
||||
|
||||
main_key = self._row_key(main_pick) if main_pick else ""
|
||||
supporting = [
|
||||
dict(row)
|
||||
for row in judged_rows.values()
|
||||
if self._row_key(row) != main_key
|
||||
]
|
||||
supporting.sort(key=self._candidate_sort_key, reverse=True)
|
||||
|
||||
bet_summary = [
|
||||
self._summary_item(row)
|
||||
for row in sorted(judged_rows.values(), key=self._candidate_sort_key, reverse=True)
|
||||
]
|
||||
|
||||
guarded["main_pick"] = main_pick
|
||||
guarded["value_pick"] = self._pick_value_candidate(judged_rows, main_key)
|
||||
guarded["supporting_picks"] = supporting[:6]
|
||||
guarded["bet_summary"] = bet_summary
|
||||
|
||||
playable = decision == "BET" and bool(main_pick and main_pick.get("playable"))
|
||||
advice = dict(guarded.get("bet_advice") or {})
|
||||
advice["playable"] = playable
|
||||
advice["suggested_stake_units"] = float(main_pick.get("stake_units", 0.0)) if playable else 0.0
|
||||
advice["reason"] = "betting_brain_approved" if playable else "betting_brain_no_bet"
|
||||
advice["decision"] = decision
|
||||
advice["confidence_band"] = self._decision_band(main_pick)
|
||||
guarded["bet_advice"] = advice
|
||||
|
||||
rejected = [d for d in decisions if d.get("action") == "REJECT"]
|
||||
guarded["betting_brain"] = {
|
||||
"version": "judge-v1",
|
||||
"decision": decision,
|
||||
"reason": decision_reason,
|
||||
"main_pick_key": main_key or None,
|
||||
"approved_count": len(approved),
|
||||
"watchlist_count": len(watchlist),
|
||||
"rejected_count": len(rejected),
|
||||
"top_candidates": self._top_decisions(decisions),
|
||||
"rules": {
|
||||
"min_bet_score": self.MIN_BET_SCORE,
|
||||
"min_watch_score": self.MIN_WATCH_SCORE,
|
||||
"min_band_sample": self.MIN_BAND_SAMPLE,
|
||||
"hard_divergence": self.HARD_DIVERGENCE,
|
||||
"soft_divergence": self.SOFT_DIVERGENCE,
|
||||
"extreme_model_probability": self.EXTREME_MODEL_PROB,
|
||||
"extreme_model_market_gap": self.EXTREME_GAP,
|
||||
},
|
||||
}
|
||||
guarded["upper_brain"] = guarded["betting_brain"]
|
||||
guarded.setdefault("analysis_details", {})
|
||||
guarded["analysis_details"]["betting_brain_applied"] = True
|
||||
guarded["analysis_details"]["betting_brain_decision"] = decision
|
||||
return guarded
|
||||
|
||||
def _judge_row(self, row: Dict[str, Any], package: Dict[str, Any]) -> Dict[str, Any]:
|
||||
market = str(row.get("market") or "")
|
||||
pick = str(row.get("pick") or "")
|
||||
model_prob = self._market_probability(row, package)
|
||||
odds = self._safe_float(row.get("odds"), 0.0) or 0.0
|
||||
implied = (1.0 / odds) if odds > 1.0 else 0.0
|
||||
model_gap = (model_prob - implied) if model_prob is not None and implied > 0 else None
|
||||
calibrated_conf = self._safe_float(row.get("calibrated_confidence", row.get("confidence")), 0.0) or 0.0
|
||||
play_score = self._safe_float(row.get("play_score"), 0.0) or 0.0
|
||||
ev_edge = self._safe_float(row.get("ev_edge", row.get("edge")), 0.0) or 0.0
|
||||
v27_prob = self._v27_probability(market, pick, package.get("v27_engine") or {})
|
||||
divergence = abs(model_prob - v27_prob) if model_prob is not None and v27_prob is not None else None
|
||||
triple_key = self._triple_key(market, pick)
|
||||
triple = self._triple_value(package, triple_key)
|
||||
band_sample = int(self._safe_float((triple or {}).get("band_sample"), 0.0) or 0.0)
|
||||
triple_is_value = bool((triple or {}).get("is_value"))
|
||||
consensus = str((package.get("v27_engine") or {}).get("consensus") or "").upper()
|
||||
|
||||
positives: List[str] = []
|
||||
issues: List[str] = []
|
||||
vetoes: List[str] = []
|
||||
score = 0.0
|
||||
|
||||
if row.get("playable"):
|
||||
score += 18.0
|
||||
positives.append("base_model_playable")
|
||||
else:
|
||||
score -= 18.0
|
||||
issues.append("base_model_not_playable")
|
||||
|
||||
score += max(0.0, min(20.0, calibrated_conf * 0.22))
|
||||
score += max(-8.0, min(16.0, ev_edge * 45.0))
|
||||
score += max(0.0, min(14.0, play_score * 0.12))
|
||||
score += self.MARKET_PRIORS.get(market, -3.0)
|
||||
|
||||
data_quality = package.get("data_quality") or {}
|
||||
quality_score = self._safe_float(data_quality.get("score"), 0.6) or 0.6
|
||||
score += max(-8.0, min(6.0, (quality_score - 0.55) * 16.0))
|
||||
risk = str((package.get("risk") or {}).get("level") or "MEDIUM").upper()
|
||||
score += {"LOW": 5.0, "MEDIUM": 0.0, "HIGH": -12.0, "EXTREME": -22.0}.get(risk, -4.0)
|
||||
|
||||
if odds < self.MIN_ODDS:
|
||||
vetoes.append("odds_below_minimum")
|
||||
if calibrated_conf < 38.0:
|
||||
vetoes.append("calibrated_confidence_too_low")
|
||||
if play_score < 50.0:
|
||||
vetoes.append("play_score_too_low")
|
||||
|
||||
if divergence is not None:
|
||||
if divergence >= self.HARD_DIVERGENCE:
|
||||
score -= 42.0
|
||||
vetoes.append("v25_v27_hard_disagreement")
|
||||
elif divergence >= self.SOFT_DIVERGENCE:
|
||||
score -= 18.0
|
||||
issues.append("v25_v27_soft_disagreement")
|
||||
else:
|
||||
score += 11.0
|
||||
positives.append("v25_v27_aligned")
|
||||
|
||||
if isinstance(triple, dict):
|
||||
if triple_is_value:
|
||||
score += 18.0
|
||||
positives.append("triple_value_confirmed")
|
||||
elif market in {"DC", "MS", "OU25", "BTTS"}:
|
||||
score -= 18.0
|
||||
issues.append("triple_value_not_confirmed")
|
||||
|
||||
if band_sample >= 25:
|
||||
score += 8.0
|
||||
positives.append("strong_historical_sample")
|
||||
elif band_sample >= self.MIN_BAND_SAMPLE:
|
||||
score += 3.0
|
||||
positives.append("usable_historical_sample")
|
||||
else:
|
||||
score -= 16.0
|
||||
issues.append("historical_sample_too_low")
|
||||
if market == "DC":
|
||||
vetoes.append("dc_without_historical_sample")
|
||||
elif market in {"MS", "DC", "OU25"}:
|
||||
score -= 10.0
|
||||
issues.append("missing_triple_value_evidence")
|
||||
|
||||
if consensus == "DISAGREE" and market in {"MS", "DC"}:
|
||||
score -= 12.0
|
||||
issues.append("engine_consensus_disagree")
|
||||
|
||||
if (
|
||||
model_prob is not None
|
||||
and model_gap is not None
|
||||
and model_prob >= self.EXTREME_MODEL_PROB
|
||||
and model_gap >= self.EXTREME_GAP
|
||||
and not triple_is_value
|
||||
):
|
||||
score -= 24.0
|
||||
vetoes.append("extreme_probability_without_evidence")
|
||||
|
||||
if market in {"HT", "HTFT", "OE"} and score < 86.0:
|
||||
vetoes.append("volatile_market_requires_exceptional_evidence")
|
||||
|
||||
score = max(0.0, min(100.0, score))
|
||||
action = "BET"
|
||||
if vetoes:
|
||||
action = "REJECT"
|
||||
elif score < self.MIN_WATCH_SCORE:
|
||||
action = "REJECT"
|
||||
elif score < self.MIN_BET_SCORE:
|
||||
action = "WATCH"
|
||||
|
||||
row["betting_brain"] = {
|
||||
"action": action,
|
||||
"score": round(score, 1),
|
||||
"summary": self._summary(action, market, pick, positives, issues, vetoes),
|
||||
"positives": positives[:5],
|
||||
"issues": issues[:6],
|
||||
"vetoes": vetoes[:6],
|
||||
"model_prob": round(model_prob, 4) if model_prob is not None else None,
|
||||
"implied_prob": round(implied, 4),
|
||||
"model_market_gap": round(model_gap, 4) if model_gap is not None else None,
|
||||
"v27_prob": round(v27_prob, 4) if v27_prob is not None else None,
|
||||
"divergence": round(divergence, 4) if divergence is not None else None,
|
||||
"triple_key": triple_key,
|
||||
"triple_value": triple,
|
||||
}
|
||||
|
||||
if action != "BET":
|
||||
self._force_no_bet(row, f"betting_brain_{action.lower()}")
|
||||
else:
|
||||
row["is_guaranteed"] = bool(score >= 82.0)
|
||||
row["pick_reason"] = "betting_brain_approved"
|
||||
row["stake_units"] = self._brain_stake(row, score)
|
||||
row["bet_grade"] = "A" if score >= 82.0 else "B"
|
||||
row["playable"] = True
|
||||
|
||||
self._append_reason(row, f"betting_brain_{action.lower()}_{round(score)}")
|
||||
return row
|
||||
|
||||
def _collect_rows(self, package: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
rows: Dict[str, Dict[str, Any]] = {}
|
||||
for source in ("main_pick", "value_pick"):
|
||||
item = package.get(source)
|
||||
if isinstance(item, dict) and item.get("market"):
|
||||
rows[self._row_key(item)] = dict(item)
|
||||
|
||||
for source in ("supporting_picks", "bet_summary"):
|
||||
for item in package.get(source) or []:
|
||||
if isinstance(item, dict) and item.get("market"):
|
||||
key = self._row_key(item)
|
||||
rows[key] = self._merge_row(rows.get(key), item)
|
||||
return list(rows.values())
|
||||
|
||||
@staticmethod
|
||||
def _merge_row(existing: Optional[Dict[str, Any]], incoming: Dict[str, Any]) -> Dict[str, Any]:
|
||||
if existing is None:
|
||||
return dict(incoming)
|
||||
merged = dict(incoming)
|
||||
merged.update({k: v for k, v in existing.items() if v is not None})
|
||||
for key in ("decision_reasons", "reasons"):
|
||||
reasons = list(existing.get(key) or []) + list(incoming.get(key) or [])
|
||||
if reasons:
|
||||
merged[key] = list(dict.fromkeys(reasons))
|
||||
return merged
|
||||
|
||||
def _pick_value_candidate(self, rows: Dict[str, Dict[str, Any]], main_key: str) -> Optional[Dict[str, Any]]:
|
||||
candidates = [
|
||||
row for key, row in rows.items()
|
||||
if key != main_key
|
||||
and row.get("betting_brain", {}).get("action") in {"BET", "WATCH"}
|
||||
and (self._safe_float(row.get("odds"), 0.0) or 0.0) >= 1.60
|
||||
]
|
||||
candidates.sort(key=self._candidate_sort_key, reverse=True)
|
||||
return dict(candidates[0]) if candidates else None
|
||||
|
||||
def _summary_item(self, row: Dict[str, Any]) -> Dict[str, Any]:
|
||||
reasons = list(row.get("decision_reasons") or row.get("reasons") or [])
|
||||
return {
|
||||
"market": row.get("market"),
|
||||
"pick": row.get("pick"),
|
||||
"raw_confidence": row.get("raw_confidence", row.get("confidence")),
|
||||
"calibrated_confidence": row.get("calibrated_confidence", row.get("confidence")),
|
||||
"bet_grade": row.get("bet_grade", "PASS"),
|
||||
"playable": bool(row.get("playable")),
|
||||
"stake_units": float(row.get("stake_units", 0.0) or 0.0),
|
||||
"play_score": row.get("play_score", 0.0),
|
||||
"ev_edge": row.get("ev_edge", row.get("edge", 0.0)),
|
||||
"implied_prob": row.get("implied_prob", 0.0),
|
||||
"odds_reliability": row.get("odds_reliability", 0.35),
|
||||
"odds": row.get("odds", 0.0),
|
||||
"reasons": reasons[:6],
|
||||
"betting_brain": row.get("betting_brain"),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _candidate_sort_key(row: Dict[str, Any]) -> Tuple[float, float, float]:
|
||||
brain = row.get("betting_brain") or {}
|
||||
action_boost = {"BET": 2.0, "WATCH": 1.0, "REJECT": 0.0}.get(str(brain.get("action")), 0.0)
|
||||
return (
|
||||
action_boost,
|
||||
float(brain.get("score", 0.0) or 0.0),
|
||||
float(row.get("play_score", 0.0) or 0.0),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _row_key(row: Optional[Dict[str, Any]]) -> str:
|
||||
if not isinstance(row, dict):
|
||||
return ""
|
||||
return f"{row.get('market')}:{row.get('pick')}"
|
||||
|
||||
def _force_no_bet(self, row: Dict[str, Any], reason: str) -> None:
|
||||
row["playable"] = False
|
||||
row["stake_units"] = 0.0
|
||||
row["bet_grade"] = "PASS"
|
||||
row["is_guaranteed"] = False
|
||||
row["pick_reason"] = reason
|
||||
if row.get("signal_tier") == "CORE":
|
||||
row["signal_tier"] = "PASS"
|
||||
self._append_reason(row, reason)
|
||||
|
||||
@staticmethod
|
||||
def _append_reason(row: Dict[str, Any], reason: str) -> None:
|
||||
key = "decision_reasons" if "decision_reasons" in row else "reasons"
|
||||
reasons = list(row.get(key) or [])
|
||||
if reason not in reasons:
|
||||
reasons.append(reason)
|
||||
row[key] = reasons[:6]
|
||||
|
||||
def _brain_stake(self, row: Dict[str, Any], score: float) -> float:
|
||||
existing = self._safe_float(row.get("stake_units"), 0.0) or 0.0
|
||||
odds = self._safe_float(row.get("odds"), 0.0) or 0.0
|
||||
if odds <= 1.0:
|
||||
return 0.0
|
||||
cap = 2.0 if score >= 82.0 else 1.2
|
||||
if score < 78.0:
|
||||
cap = 0.8
|
||||
return round(max(0.25, min(existing if existing > 0 else cap, cap)), 1)
|
||||
|
||||
@staticmethod
|
||||
def _decision_band(main_pick: Optional[Dict[str, Any]]) -> str:
|
||||
if not main_pick:
|
||||
return "LOW"
|
||||
score = float((main_pick.get("betting_brain") or {}).get("score", 0.0) or 0.0)
|
||||
if score >= 82.0:
|
||||
return "HIGH"
|
||||
if score >= 72.0:
|
||||
return "MEDIUM"
|
||||
return "LOW"
|
||||
|
||||
@staticmethod
|
||||
def _top_decisions(decisions: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
ordered = sorted(decisions, key=lambda d: float(d.get("score", 0.0) or 0.0), reverse=True)
|
||||
return [
|
||||
{
|
||||
"action": item.get("action"),
|
||||
"score": item.get("score"),
|
||||
"summary": item.get("summary"),
|
||||
"vetoes": item.get("vetoes", []),
|
||||
"issues": item.get("issues", []),
|
||||
}
|
||||
for item in ordered[:5]
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _summary(action: str, market: str, pick: str, positives: List[str], issues: List[str], vetoes: List[str]) -> str:
|
||||
if action == "BET":
|
||||
return f"{market} {pick} approved: evidence is aligned enough for a controlled stake."
|
||||
if action == "WATCH":
|
||||
return f"{market} {pick} is interesting but not clean enough for stake."
|
||||
if vetoes:
|
||||
return f"{market} {pick} rejected: {', '.join(vetoes[:3])}."
|
||||
if issues:
|
||||
return f"{market} {pick} rejected: {', '.join(issues[:3])}."
|
||||
return f"{market} {pick} rejected by evidence score."
|
||||
|
||||
def _market_probability(self, row: Dict[str, Any], package: Dict[str, Any]) -> Optional[float]:
|
||||
direct = self._safe_float(row.get("probability"))
|
||||
if direct is not None:
|
||||
return direct
|
||||
board = package.get("market_board") or {}
|
||||
payload = board.get(str(row.get("market") or "")) if isinstance(board, dict) else None
|
||||
probs = payload.get("probs") if isinstance(payload, dict) else None
|
||||
if not isinstance(probs, dict):
|
||||
return None
|
||||
key = self._prob_key(str(row.get("market") or ""), str(row.get("pick") or ""))
|
||||
return self._safe_float(probs.get(key)) if key else None
|
||||
|
||||
def _v27_probability(self, market: str, pick: str, v27_engine: Dict[str, Any]) -> Optional[float]:
|
||||
predictions = v27_engine.get("predictions") or {}
|
||||
ms = predictions.get("ms") or {}
|
||||
ou25 = predictions.get("ou25") or {}
|
||||
if market == "MS":
|
||||
return self._safe_float(ms.get({"1": "home", "X": "draw", "2": "away"}.get(pick, "")))
|
||||
if market == "DC":
|
||||
home = self._safe_float(ms.get("home"), 0.0) or 0.0
|
||||
draw = self._safe_float(ms.get("draw"), 0.0) or 0.0
|
||||
away = self._safe_float(ms.get("away"), 0.0) or 0.0
|
||||
return {"1X": home + draw, "X2": draw + away, "12": home + away}.get(pick)
|
||||
if market == "OU25":
|
||||
key = self._prob_key(market, pick)
|
||||
return self._safe_float(ou25.get(key)) if key else None
|
||||
return None
|
||||
|
||||
def _triple_value(self, package: Dict[str, Any], key: Optional[str]) -> Optional[Dict[str, Any]]:
|
||||
if not key:
|
||||
return None
|
||||
value = ((package.get("v27_engine") or {}).get("triple_value") or {}).get(key)
|
||||
return value if isinstance(value, dict) else None
|
||||
|
||||
def _triple_key(self, market: str, pick: str) -> Optional[str]:
|
||||
prob_key = self._prob_key(market, pick)
|
||||
if market == "MS":
|
||||
return {"1": "home", "2": "away"}.get(pick)
|
||||
if market == "DC" and pick.upper() in {"1X", "X2", "12"}:
|
||||
return f"dc_{pick.lower()}"
|
||||
if market in {"OU15", "OU25", "OU35"} and prob_key == "over":
|
||||
return f"{market.lower()}_over"
|
||||
if market == "BTTS" and prob_key == "yes":
|
||||
return "btts_yes"
|
||||
if market == "HT":
|
||||
return {"1": "ht_home", "2": "ht_away"}.get(pick)
|
||||
if market in {"HT_OU05", "HT_OU15"} and prob_key == "over":
|
||||
return f"{market.lower()}_over"
|
||||
if market == "OE" and prob_key == "odd":
|
||||
return "oe_odd"
|
||||
if market == "CARDS" and prob_key == "over":
|
||||
return "cards_over"
|
||||
if market == "HTFT" and "/" in pick:
|
||||
return f"htft_{pick.replace('/', '').lower()}"
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _prob_key(market: str, pick: str) -> Optional[str]:
|
||||
norm = str(pick or "").strip().casefold()
|
||||
if market in {"MS", "HT", "HCAP"}:
|
||||
return pick if pick in {"1", "X", "2"} else None
|
||||
if market == "DC":
|
||||
return pick.upper() if pick.upper() in {"1X", "X2", "12"} else None
|
||||
if market in {"OU15", "OU25", "OU35", "HT_OU05", "HT_OU15", "CARDS"}:
|
||||
if "over" in norm or "ust" in norm or "üst" in norm:
|
||||
return "over"
|
||||
if "under" in norm or "alt" in norm:
|
||||
return "under"
|
||||
if market == "BTTS":
|
||||
if "yes" in norm or "var" in norm:
|
||||
return "yes"
|
||||
if "no" in norm or "yok" in norm:
|
||||
return "no"
|
||||
if market == "OE":
|
||||
if "odd" in norm or "tek" in norm:
|
||||
return "odd"
|
||||
if "even" in norm or "cift" in norm or "çift" in norm:
|
||||
return "even"
|
||||
if market == "HTFT" and "/" in pick:
|
||||
return pick
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _safe_float(value: Any, default: Optional[float] = None) -> Optional[float]:
|
||||
try:
|
||||
return float(value)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
@@ -30,12 +30,18 @@ from models.v20_ensemble import FullMatchPrediction
|
||||
from models.v25_ensemble import V25Predictor, get_v25_predictor
|
||||
from models.v27_predictor import V27Predictor, compute_divergence, compute_value_edge
|
||||
from features.odds_band_analyzer import OddsBandAnalyzer
|
||||
try:
|
||||
from models.basketball_v25 import (
|
||||
BasketballMatchPrediction,
|
||||
get_basketball_v25_predictor,
|
||||
)
|
||||
except ImportError:
|
||||
BasketballMatchPrediction = Any
|
||||
def get_basketball_v25_predictor():
|
||||
raise ImportError("Basketball predictor is not available")
|
||||
from core.engines.player_predictor import PlayerPrediction, get_player_predictor
|
||||
from services.feature_enrichment import FeatureEnrichmentService
|
||||
from services.betting_brain import BettingBrain
|
||||
from services.v26_shadow_engine import V26ShadowEngine, get_v26_shadow_engine
|
||||
from utils.top_leagues import load_top_league_ids
|
||||
from utils.league_reliability import load_league_reliability
|
||||
@@ -69,6 +75,7 @@ class MatchData:
|
||||
substate: Optional[str] = None
|
||||
current_score_home: Optional[int] = None
|
||||
current_score_away: Optional[int] = None
|
||||
lineup_confidence: float = 0.0
|
||||
|
||||
|
||||
class SingleMatchOrchestrator:
|
||||
@@ -144,7 +151,7 @@ class SingleMatchOrchestrator:
|
||||
self.v26_shadow_engine: Optional[V26ShadowEngine] = None
|
||||
self.basketball_predictor: Optional[Any] = None
|
||||
self.dsn = get_clean_dsn()
|
||||
self.engine_mode = str(os.getenv("AI_ENGINE_MODE", "v25")).strip().lower()
|
||||
self.engine_mode = str(os.getenv("AI_ENGINE_MODE", "v28-pro-max")).strip().lower()
|
||||
self.top_league_ids = load_top_league_ids()
|
||||
self.league_reliability = load_league_reliability()
|
||||
self.enrichment = FeatureEnrichmentService()
|
||||
@@ -527,12 +534,18 @@ class SingleMatchOrchestrator:
|
||||
}
|
||||
|
||||
def _get_squad_features(self, data: MatchData) -> Dict[str, float]:
|
||||
"""Non-fatal squad analysis. Returns zero-defaults on failure."""
|
||||
"""Non-fatal squad analysis. Returns neutral-average defaults on failure.
|
||||
|
||||
Design note (V32-fix): Previous 0.0 defaults caused the model to treat
|
||||
missing lineups as 'both teams have zero quality', producing overly
|
||||
conservative predictions (e.g. static 1.5 Under). Neutral averages let
|
||||
the model fall back on stronger signals (odds, ELO, form, H2H).
|
||||
"""
|
||||
defaults = {
|
||||
'home_squad_quality': 0.0, 'away_squad_quality': 0.0, 'squad_diff': 0.0,
|
||||
'home_key_players': 0.0, 'away_key_players': 0.0,
|
||||
'home_squad_quality': 0.50, 'away_squad_quality': 0.50, 'squad_diff': 0.0,
|
||||
'home_key_players': 3.0, 'away_key_players': 3.0,
|
||||
'home_missing_impact': 0.0, 'away_missing_impact': 0.0,
|
||||
'home_goals_form': 0.0, 'away_goals_form': 0.0,
|
||||
'home_goals_form': 1.3, 'away_goals_form': 1.3,
|
||||
}
|
||||
try:
|
||||
engine = get_player_predictor()
|
||||
@@ -559,27 +572,186 @@ class SingleMatchOrchestrator:
|
||||
print(f"⚠️ Squad features failed: {e}")
|
||||
return defaults
|
||||
|
||||
# ── V25 internal key → _build_v25_prediction key mapping ──
|
||||
_V25_KEY_MAP = {
|
||||
"ms": "MS",
|
||||
"ou15": "OU15",
|
||||
"ou25": "OU25",
|
||||
"ou35": "OU35",
|
||||
"btts": "BTTS",
|
||||
"ht_result": "HT",
|
||||
"ht_ou05": "HT_OU05",
|
||||
"ht_ou15": "HT_OU15",
|
||||
"htft": "HTFT",
|
||||
"cards_ou45": "CARDS",
|
||||
"handicap_ms": "HCAP",
|
||||
"odd_even": "OE",
|
||||
}
|
||||
|
||||
def _get_v25_signal(
|
||||
self,
|
||||
data: MatchData,
|
||||
features: Optional[Dict[str, float]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Get V25 ensemble predictions for all available markets.
|
||||
Returns a dict keyed by UPPERCASE market name (MS, OU25, BTTS, etc.)
|
||||
each with a 'probs' sub-dict that _prob_map can consume.
|
||||
|
||||
CRITICAL: Keys MUST be uppercase to match _build_v25_prediction lookups.
|
||||
"""
|
||||
v25 = self._get_v25_predictor()
|
||||
feature_row = features or self._build_v25_features(data)
|
||||
return v25.predict_market_bundle(
|
||||
features=feature_row,
|
||||
odds=self._sanitize_v25_odds(data.odds_data or {}),
|
||||
)
|
||||
|
||||
signal: Dict[str, Any] = {}
|
||||
|
||||
def _temperature_scale(probs_dict: Dict[str, float], temperature: float = 2.5) -> Dict[str, float]:
|
||||
"""
|
||||
Apply temperature scaling to soften overconfident model outputs.
|
||||
|
||||
LightGBM often produces extreme probabilities (e.g., 0.999 / 0.001).
|
||||
Temperature scaling converts to log-odds, divides by T, then re-normalizes.
|
||||
T=1.0 → no change, T>1 → softer probabilities.
|
||||
|
||||
Standard approach for post-hoc model calibration (Guo et al., 2017).
|
||||
"""
|
||||
import math
|
||||
eps = 1e-7 # numerical stability
|
||||
n = len(probs_dict)
|
||||
|
||||
# Determine appropriate temperature based on market type
|
||||
# Binary markets (2-class) tend to be more overconfident in LGB
|
||||
if n <= 2:
|
||||
T = max(temperature, 2.0)
|
||||
elif n == 3:
|
||||
T = max(temperature * 0.8, 1.5) # 3-way slightly less aggressive
|
||||
else:
|
||||
T = max(temperature * 0.6, 1.3) # 9-way (HTFT) already spread
|
||||
|
||||
# Convert to log-odds and apply temperature
|
||||
labels = list(probs_dict.keys())
|
||||
log_odds = []
|
||||
for label in labels:
|
||||
p = max(eps, min(1.0 - eps, float(probs_dict[label])))
|
||||
log_odds.append(math.log(p) / T)
|
||||
|
||||
# Softmax re-normalization
|
||||
max_lo = max(log_odds)
|
||||
exp_vals = [math.exp(lo - max_lo) for lo in log_odds]
|
||||
total = sum(exp_vals)
|
||||
|
||||
scaled = {}
|
||||
for i, label in enumerate(labels):
|
||||
scaled[label] = exp_vals[i] / total
|
||||
|
||||
return scaled
|
||||
|
||||
def _enrich_signal_entry(probs_dict: Dict[str, float]) -> Dict[str, Any]:
|
||||
"""Add pick, probability, confidence to a signal entry from its probs.
|
||||
|
||||
Applies temperature scaling to convert overconfident LightGBM outputs
|
||||
into realistic, calibrated probabilities.
|
||||
"""
|
||||
# Apply temperature scaling to soften extreme probabilities
|
||||
scaled_probs = _temperature_scale(probs_dict, temperature=2.5)
|
||||
|
||||
best_label = max(scaled_probs, key=scaled_probs.get)
|
||||
best_prob = float(scaled_probs[best_label])
|
||||
return {
|
||||
"probs": scaled_probs,
|
||||
"raw_probs": probs_dict, # keep originals for debugging
|
||||
"pick": best_label,
|
||||
"probability": best_prob,
|
||||
"confidence": round(best_prob * 100.0, 1),
|
||||
}
|
||||
|
||||
# Core markets using dedicated methods
|
||||
h, d, a = v25.predict_ms(feature_row)
|
||||
signal["MS"] = _enrich_signal_entry({"1": h, "X": d, "2": a})
|
||||
print(f" [V25-SIGNAL] MS → H={h:.4f} D={d:.4f} A={a:.4f}")
|
||||
|
||||
over25, under25 = v25.predict_ou25(feature_row)
|
||||
signal["OU25"] = _enrich_signal_entry({"Over": over25, "Under": under25})
|
||||
print(f" [V25-SIGNAL] OU25 → O={over25:.4f} U={under25:.4f}")
|
||||
|
||||
btts_y, btts_n = v25.predict_btts(feature_row)
|
||||
signal["BTTS"] = _enrich_signal_entry({"Yes": btts_y, "No": btts_n})
|
||||
print(f" [V25-SIGNAL] BTTS → Y={btts_y:.4f} N={btts_n:.4f}")
|
||||
|
||||
# Additional markets via generic predict_market
|
||||
for model_key, label_map in [
|
||||
("ou15", {"Over": 0, "Under": None}),
|
||||
("ou35", {"Over": 0, "Under": None}),
|
||||
("ht_result", {"1": 0, "X": 1, "2": 2}),
|
||||
("ht_ou05", {"Over": 0, "Under": None}),
|
||||
("ht_ou15", {"Over": 0, "Under": None}),
|
||||
("htft", None),
|
||||
("cards_ou45", {"Over": 0, "Under": None}),
|
||||
("handicap_ms", {"1": 0, "X": 1, "2": 2}),
|
||||
("odd_even", {"Odd": 0, "Even": None}),
|
||||
]:
|
||||
out_key = self._V25_KEY_MAP.get(model_key, model_key.upper())
|
||||
if not v25.has_market(model_key):
|
||||
continue
|
||||
raw = v25.predict_market(model_key, feature_row)
|
||||
if raw is None:
|
||||
continue
|
||||
|
||||
if label_map is None:
|
||||
# HTFT — 9 combinations
|
||||
htft_labels = ["1/1", "1/X", "1/2", "X/1", "X/X", "X/2", "2/1", "2/X", "2/2"]
|
||||
probs_dict = {}
|
||||
for i, label in enumerate(htft_labels):
|
||||
probs_dict[label] = float(raw[i]) if i < len(raw) else 0.0
|
||||
signal[out_key] = _enrich_signal_entry(probs_dict)
|
||||
elif len(label_map) == 2:
|
||||
# Binary market
|
||||
labels = list(label_map.keys())
|
||||
p = float(raw[0]) if len(raw) >= 1 else None
|
||||
if p is None:
|
||||
print(f" [V25-SIGNAL] {out_key} → EMPTY raw output, skipped")
|
||||
continue
|
||||
signal[out_key] = _enrich_signal_entry({labels[0]: p, labels[1]: 1.0 - p})
|
||||
elif len(label_map) == 3:
|
||||
# 3-class market
|
||||
labels = list(label_map.keys())
|
||||
probs_dict = {}
|
||||
for i, label in enumerate(labels):
|
||||
if i >= len(raw):
|
||||
print(f" [V25-SIGNAL] {out_key} → insufficient probabilities in raw output")
|
||||
break
|
||||
probs_dict[label] = float(raw[i])
|
||||
else:
|
||||
signal[out_key] = _enrich_signal_entry(probs_dict)
|
||||
|
||||
if out_key in signal:
|
||||
print(f" [V25-SIGNAL] {out_key} → {signal[out_key]['probs']}")
|
||||
|
||||
print(f" [V25-SIGNAL] Total markets with real predictions: {len(signal)}")
|
||||
if not signal:
|
||||
raise RuntimeError("V25 model produced ZERO market predictions — cannot continue")
|
||||
|
||||
return signal
|
||||
|
||||
@staticmethod
|
||||
def _prob_map(signal: Optional[Dict[str, Any]], market: str, defaults: Dict[str, float]) -> Dict[str, float]:
|
||||
"""Extract normalised probabilities from signal.
|
||||
|
||||
If the signal contains real model output for this market, use it.
|
||||
If the market is missing from the signal, log a warning and return
|
||||
the defaults as a LAST RESORT (so the pipeline doesn't crash).
|
||||
The defaults are ONLY used for non-core / secondary markets that
|
||||
may not have a trained model yet (e.g. CARDS, HCAP, OE).
|
||||
"""
|
||||
market_payload = signal.get(market, {}) if isinstance(signal, dict) else {}
|
||||
probs = market_payload.get("probs", {}) if isinstance(market_payload, dict) else {}
|
||||
if not isinstance(probs, dict) or not probs:
|
||||
print(f" ⚠️ [PROB_MAP] Market '{market}' NOT found in V25 signal — model output missing")
|
||||
return dict(defaults)
|
||||
out = {key: float(probs.get(key, value)) for key, value in defaults.items()}
|
||||
total = sum(out.values())
|
||||
if total <= 0:
|
||||
print(f" ⚠️ [PROB_MAP] Market '{market}' has zero total probability")
|
||||
return dict(defaults)
|
||||
return {key: value / total for key, value in out.items()}
|
||||
|
||||
@@ -730,7 +902,8 @@ class SingleMatchOrchestrator:
|
||||
prediction.cards_confidence,
|
||||
prediction.handicap_confidence,
|
||||
)
|
||||
lineup_penalty = 12.0 if data.lineup_source == "none" else 7.0 if data.lineup_source == "probable_xi" else 0.0
|
||||
lineup_conf = max(0.0, min(1.0, float(getattr(data, "lineup_confidence", 0.0) or 0.0)))
|
||||
lineup_penalty = 12.0 if data.lineup_source == "none" else max(1.5, (1.0 - lineup_conf) * 8.0) if data.lineup_source == "probable_xi" else 0.0
|
||||
referee_penalty = 6.0 if not data.referee_name else 0.0
|
||||
parity_penalty = 8.0 if abs(ms_edge) < 0.08 else 0.0
|
||||
prediction.risk_score = round(min(100.0, max(10.0, 100.0 - max_market_conf + lineup_penalty + referee_penalty + parity_penalty)), 1)
|
||||
@@ -747,6 +920,8 @@ class SingleMatchOrchestrator:
|
||||
prediction.risk_warnings = []
|
||||
if data.lineup_source == "probable_xi":
|
||||
prediction.risk_warnings.append("lineup_probable_not_confirmed")
|
||||
if lineup_conf < 0.65:
|
||||
prediction.risk_warnings.append("lineup_projection_low_confidence")
|
||||
if data.lineup_source == "none":
|
||||
prediction.risk_warnings.append("lineup_unavailable")
|
||||
if not data.referee_name:
|
||||
@@ -1142,7 +1317,9 @@ class SingleMatchOrchestrator:
|
||||
if band_val.get("is_value"):
|
||||
boost = min(8.0, boost + 3.0) # Triple confirmation extra boost
|
||||
prediction.ms_confidence = min(95.0, prediction.ms_confidence + boost)
|
||||
base_package["prediction"]["ms_confidence"] = prediction.ms_confidence
|
||||
market_board = base_package.get("market_board")
|
||||
if isinstance(market_board, dict) and isinstance(market_board.get("MS"), dict):
|
||||
market_board["MS"]["confidence"] = round(float(prediction.ms_confidence), 1)
|
||||
base_package["v27_engine"]["consensus"] = "AGREE"
|
||||
else:
|
||||
base_package["v27_engine"]["consensus"] = "DISAGREE"
|
||||
@@ -1157,8 +1334,10 @@ class SingleMatchOrchestrator:
|
||||
base_package.setdefault("analysis_details", {})
|
||||
base_package["analysis_details"]["v27_loaded"] = False
|
||||
|
||||
mode = str(getattr(self, "engine_mode", "v25") or "v25").lower()
|
||||
if mode not in {"v25", "v26", "dual"}:
|
||||
base_package = self._apply_upper_brain_guards(base_package)
|
||||
|
||||
mode = str(getattr(self, "engine_mode", "v28-pro-max") or "v28-pro-max").lower()
|
||||
if mode not in {"v25", "v26", "dual", "v28", "v28-pro-max"}:
|
||||
mode = "v25"
|
||||
|
||||
quality = base_package.get("data_quality", self._compute_data_quality(data))
|
||||
@@ -1185,6 +1364,304 @@ class SingleMatchOrchestrator:
|
||||
return merged
|
||||
return base_package
|
||||
|
||||
def _apply_upper_brain_guards(self, package: Dict[str, Any]) -> Dict[str, Any]:
|
||||
return BettingBrain().judge(package)
|
||||
|
||||
v27_engine = package.get("v27_engine")
|
||||
if not isinstance(v27_engine, dict) or not v27_engine.get("triple_value"):
|
||||
return package
|
||||
|
||||
guarded = dict(package)
|
||||
vetoed_keys = set()
|
||||
guarded_keys = set()
|
||||
|
||||
def mark_guard(item: Dict[str, Any]) -> Dict[str, Any]:
|
||||
if not isinstance(item, dict):
|
||||
return item
|
||||
|
||||
out = dict(item)
|
||||
assessment = self._upper_brain_assessment(out, guarded)
|
||||
if not assessment.get("applies"):
|
||||
return out
|
||||
|
||||
key = f"{out.get('market')}:{out.get('pick')}"
|
||||
guarded_keys.add(key)
|
||||
out["upper_brain"] = assessment
|
||||
|
||||
reason_key = "decision_reasons" if "decision_reasons" in out else "reasons"
|
||||
reasons = list(out.get(reason_key) or [])
|
||||
for reason in assessment.get("reason_codes", []):
|
||||
if reason not in reasons:
|
||||
reasons.append(reason)
|
||||
out[reason_key] = reasons[:6]
|
||||
|
||||
if assessment.get("veto"):
|
||||
vetoed_keys.add(key)
|
||||
out["playable"] = False
|
||||
out["stake_units"] = 0.0
|
||||
out["bet_grade"] = "PASS"
|
||||
out["is_guaranteed"] = False
|
||||
out["pick_reason"] = "upper_brain_veto"
|
||||
if "signal_tier" in out:
|
||||
out["signal_tier"] = "PASS"
|
||||
elif assessment.get("downgrade"):
|
||||
out["is_guaranteed"] = False
|
||||
if out.get("signal_tier") == "CORE":
|
||||
out["signal_tier"] = "LEAN"
|
||||
if out.get("pick_reason") == "high_accuracy_market":
|
||||
out["pick_reason"] = "upper_brain_downgraded"
|
||||
|
||||
return out
|
||||
|
||||
main_pick = mark_guard(guarded.get("main_pick") or {})
|
||||
value_pick = mark_guard(guarded.get("value_pick") or {}) if guarded.get("value_pick") else None
|
||||
supporting = [
|
||||
mark_guard(row)
|
||||
for row in list(guarded.get("supporting_picks") or [])
|
||||
if isinstance(row, dict)
|
||||
]
|
||||
bet_summary = [
|
||||
mark_guard(row)
|
||||
for row in list(guarded.get("bet_summary") or [])
|
||||
if isinstance(row, dict)
|
||||
]
|
||||
|
||||
main_safe = bool(main_pick and main_pick.get("playable") and not main_pick.get("upper_brain", {}).get("veto"))
|
||||
if not main_safe:
|
||||
candidates = [
|
||||
row for row in supporting
|
||||
if row.get("playable")
|
||||
and not row.get("upper_brain", {}).get("veto")
|
||||
and float(row.get("odds", 0.0) or 0.0) >= 1.30
|
||||
]
|
||||
candidates.sort(key=lambda row: float(row.get("play_score", 0.0) or 0.0), reverse=True)
|
||||
if candidates:
|
||||
main_pick = dict(candidates[0])
|
||||
main_pick["is_guaranteed"] = False
|
||||
main_pick["pick_reason"] = "upper_brain_reselected"
|
||||
reasons = list(main_pick.get("decision_reasons") or [])
|
||||
if "upper_brain_reselected_after_veto" not in reasons:
|
||||
reasons.append("upper_brain_reselected_after_veto")
|
||||
main_pick["decision_reasons"] = reasons[:6]
|
||||
elif main_pick:
|
||||
main_pick["is_guaranteed"] = False
|
||||
main_pick["pick_reason"] = "upper_brain_no_safe_pick"
|
||||
|
||||
if main_pick:
|
||||
supporting = [
|
||||
row for row in supporting
|
||||
if not (
|
||||
row.get("market") == main_pick.get("market")
|
||||
and row.get("pick") == main_pick.get("pick")
|
||||
)
|
||||
][:6]
|
||||
|
||||
guarded["main_pick"] = main_pick if main_pick else None
|
||||
guarded["value_pick"] = value_pick
|
||||
guarded["supporting_picks"] = supporting
|
||||
guarded["bet_summary"] = bet_summary
|
||||
|
||||
playable = bool(main_pick and main_pick.get("playable") and not main_pick.get("upper_brain", {}).get("veto"))
|
||||
advice = dict(guarded.get("bet_advice") or {})
|
||||
advice["playable"] = playable
|
||||
advice["suggested_stake_units"] = float(main_pick.get("stake_units", 0.0)) if playable else 0.0
|
||||
if playable:
|
||||
advice["reason"] = "playable_pick_found"
|
||||
elif vetoed_keys:
|
||||
advice["reason"] = "upper_brain_no_safe_pick"
|
||||
else:
|
||||
advice["reason"] = "no_bet_conditions_met"
|
||||
guarded["bet_advice"] = advice
|
||||
|
||||
guarded["upper_brain"] = {
|
||||
"applied": True,
|
||||
"guarded_count": len(guarded_keys),
|
||||
"vetoed_count": len(vetoed_keys),
|
||||
"vetoed": sorted(vetoed_keys)[:8],
|
||||
"rules": {
|
||||
"min_band_sample": 8,
|
||||
"max_v25_v27_divergence": 0.18,
|
||||
"dc_requires_triple_value": True,
|
||||
},
|
||||
}
|
||||
guarded.setdefault("analysis_details", {})
|
||||
guarded["analysis_details"]["upper_brain_guards_applied"] = True
|
||||
guarded["analysis_details"]["upper_brain_vetoed_count"] = len(vetoed_keys)
|
||||
return guarded
|
||||
|
||||
def _upper_brain_assessment(
|
||||
self,
|
||||
item: Dict[str, Any],
|
||||
package: Dict[str, Any],
|
||||
) -> Dict[str, Any]:
|
||||
market = str(item.get("market") or "")
|
||||
pick = str(item.get("pick") or "")
|
||||
if not market or not pick:
|
||||
return {"applies": False}
|
||||
|
||||
v27_engine = package.get("v27_engine") or {}
|
||||
triple_value = v27_engine.get("triple_value") or {}
|
||||
model_prob = self._upper_brain_market_probability(item, package)
|
||||
v27_prob = self._upper_brain_v27_probability(market, pick, v27_engine)
|
||||
triple_key = self._upper_brain_triple_key(market, pick)
|
||||
triple = triple_value.get(triple_key) if triple_key else None
|
||||
|
||||
veto = False
|
||||
downgrade = False
|
||||
reasons: List[str] = []
|
||||
divergence = None
|
||||
|
||||
if model_prob is not None and v27_prob is not None:
|
||||
divergence = abs(float(model_prob) - float(v27_prob))
|
||||
if divergence >= 0.18:
|
||||
veto = True
|
||||
reasons.append("upper_brain_v25_v27_divergence")
|
||||
elif divergence >= 0.12:
|
||||
downgrade = True
|
||||
reasons.append("upper_brain_v25_v27_warning")
|
||||
|
||||
if isinstance(triple, dict):
|
||||
band_sample = int(float(triple.get("band_sample", 0) or 0))
|
||||
is_value = bool(triple.get("is_value"))
|
||||
if market == "DC":
|
||||
if band_sample < 8:
|
||||
veto = True
|
||||
reasons.append("upper_brain_band_sample_too_low")
|
||||
elif not is_value:
|
||||
veto = True
|
||||
reasons.append("upper_brain_triple_value_rejected")
|
||||
elif market in {"MS", "OU25"} and band_sample > 0 and band_sample < 8:
|
||||
downgrade = True
|
||||
reasons.append("upper_brain_band_sample_thin")
|
||||
elif market in {"OU15", "HT_OU05"} and band_sample < 8:
|
||||
downgrade = True
|
||||
reasons.append("upper_brain_band_sample_thin")
|
||||
|
||||
consensus = str(v27_engine.get("consensus") or "").upper()
|
||||
if consensus == "DISAGREE" and market in {"MS", "DC"} and not veto:
|
||||
downgrade = True
|
||||
reasons.append("upper_brain_consensus_disagree")
|
||||
|
||||
applies = bool(reasons or triple is not None or v27_prob is not None)
|
||||
return {
|
||||
"applies": applies,
|
||||
"veto": veto,
|
||||
"downgrade": downgrade,
|
||||
"reason_codes": reasons,
|
||||
"model_prob": round(float(model_prob), 4) if model_prob is not None else None,
|
||||
"v27_prob": round(float(v27_prob), 4) if v27_prob is not None else None,
|
||||
"divergence": round(float(divergence), 4) if divergence is not None else None,
|
||||
"triple_key": triple_key,
|
||||
"triple_value": triple,
|
||||
}
|
||||
|
||||
def _upper_brain_market_probability(
|
||||
self,
|
||||
item: Dict[str, Any],
|
||||
package: Dict[str, Any],
|
||||
) -> Optional[float]:
|
||||
raw_prob = item.get("probability")
|
||||
if raw_prob is not None:
|
||||
try:
|
||||
return float(raw_prob)
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
|
||||
market = str(item.get("market") or "")
|
||||
pick = str(item.get("pick") or "")
|
||||
board = package.get("market_board") or {}
|
||||
payload = board.get(market) if isinstance(board, dict) else None
|
||||
probs = payload.get("probs") if isinstance(payload, dict) else None
|
||||
if not isinstance(probs, dict):
|
||||
return None
|
||||
|
||||
prob_key = self._upper_brain_prob_key(market, pick)
|
||||
if prob_key is None:
|
||||
return None
|
||||
try:
|
||||
return float(probs.get(prob_key))
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
def _upper_brain_v27_probability(
|
||||
self,
|
||||
market: str,
|
||||
pick: str,
|
||||
v27_engine: Dict[str, Any],
|
||||
) -> Optional[float]:
|
||||
predictions = v27_engine.get("predictions") or {}
|
||||
ms = predictions.get("ms") or {}
|
||||
ou25 = predictions.get("ou25") or {}
|
||||
|
||||
if market == "MS":
|
||||
return self._safe_float(ms.get({"1": "home", "X": "draw", "2": "away"}.get(pick, "")))
|
||||
if market == "DC":
|
||||
if pick == "1X":
|
||||
return self._safe_float(ms.get("home"), 0.0) + self._safe_float(ms.get("draw"), 0.0)
|
||||
if pick == "X2":
|
||||
return self._safe_float(ms.get("draw"), 0.0) + self._safe_float(ms.get("away"), 0.0)
|
||||
if pick == "12":
|
||||
return self._safe_float(ms.get("home"), 0.0) + self._safe_float(ms.get("away"), 0.0)
|
||||
if market == "OU25":
|
||||
prob_key = self._upper_brain_prob_key(market, pick)
|
||||
return self._safe_float(ou25.get(prob_key)) if prob_key else None
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _upper_brain_prob_key(market: str, pick: str) -> Optional[str]:
|
||||
pick_norm = str(pick or "").strip().casefold()
|
||||
if market in {"MS", "HT", "HCAP"}:
|
||||
return pick if pick in {"1", "X", "2"} else None
|
||||
if market == "DC":
|
||||
return pick.upper() if pick.upper() in {"1X", "X2", "12"} else None
|
||||
if market in {"OU15", "OU25", "OU35", "HT_OU05", "HT_OU15", "CARDS"}:
|
||||
if "over" in pick_norm or "st" in pick_norm:
|
||||
return "over"
|
||||
if "under" in pick_norm or "alt" in pick_norm:
|
||||
return "under"
|
||||
if market == "BTTS":
|
||||
if "yes" in pick_norm or "var" in pick_norm:
|
||||
return "yes"
|
||||
if "no" in pick_norm or "yok" in pick_norm:
|
||||
return "no"
|
||||
if market == "OE":
|
||||
if "odd" in pick_norm or "tek" in pick_norm:
|
||||
return "odd"
|
||||
if "even" in pick_norm or "ift" in pick_norm:
|
||||
return "even"
|
||||
if market == "HTFT" and "/" in pick:
|
||||
return pick
|
||||
return None
|
||||
|
||||
def _upper_brain_triple_key(self, market: str, pick: str) -> Optional[str]:
|
||||
prob_key = self._upper_brain_prob_key(market, pick)
|
||||
if market == "MS":
|
||||
return {"1": "home", "2": "away"}.get(pick)
|
||||
if market == "DC":
|
||||
return f"dc_{pick.lower()}" if pick.upper() in {"1X", "X2", "12"} else None
|
||||
if market in {"OU15", "OU25", "OU35"} and prob_key == "over":
|
||||
return f"{market.lower()}_over"
|
||||
if market == "BTTS" and prob_key == "yes":
|
||||
return "btts_yes"
|
||||
if market == "HT":
|
||||
return {"1": "ht_home", "2": "ht_away"}.get(pick)
|
||||
if market in {"HT_OU05", "HT_OU15"} and prob_key == "over":
|
||||
return f"{market.lower()}_over"
|
||||
if market == "OE" and prob_key == "odd":
|
||||
return "oe_odd"
|
||||
if market == "CARDS" and prob_key == "over":
|
||||
return "cards_over"
|
||||
if market == "HTFT" and "/" in pick:
|
||||
return f"htft_{pick.replace('/', '').lower()}"
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _safe_float(value: Any, default: Optional[float] = None) -> Optional[float]:
|
||||
try:
|
||||
return float(value)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
def analyze_match_htms(self, match_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
HT/MS focused response for upset-hunting workflows.
|
||||
@@ -2104,7 +2581,7 @@ class SingleMatchOrchestrator:
|
||||
return None
|
||||
|
||||
odds_data = self._extract_odds(cur, row)
|
||||
home_lineup, away_lineup, lineup_source = self._extract_lineups(cur, row)
|
||||
home_lineup, away_lineup, lineup_source, lineup_confidence = self._extract_lineups(cur, row)
|
||||
sidelined = self._parse_json_dict(row.get("sidelined"))
|
||||
match_date_ms = int(row.get("match_date_ms") or 0)
|
||||
league_id = str(row.get("league_id")) if row.get("league_id") else None
|
||||
@@ -2159,6 +2636,7 @@ class SingleMatchOrchestrator:
|
||||
status=str(row.get("status") or ""),
|
||||
state=row.get("state"),
|
||||
substate=row.get("substate"),
|
||||
lineup_confidence=lineup_confidence,
|
||||
current_score_home=(
|
||||
int(row.get("score_home"))
|
||||
if row.get("score_home") is not None
|
||||
@@ -2291,13 +2769,26 @@ class SingleMatchOrchestrator:
|
||||
self,
|
||||
cur: RealDictCursor,
|
||||
row: Dict[str, Any],
|
||||
) -> Tuple[Optional[List[str]], Optional[List[str]], str]:
|
||||
) -> Tuple[Optional[List[str]], Optional[List[str]], str, float]:
|
||||
live_lineups = row.get("lineups")
|
||||
home, away = self._parse_lineups_json(live_lineups)
|
||||
status_upper = str(row.get("status") or "").upper()
|
||||
state_upper = str(row.get("state") or "").upper()
|
||||
substate_upper = str(row.get("substate") or "").upper()
|
||||
can_trust_feed_lineups = (
|
||||
status_upper in {"LIVE", "1H", "2H", "HT", "FT", "FINISHED"}
|
||||
or state_upper in {"LIVE", "FIRSTHALF", "SECONDHALF", "POSTGAME", "POST_GAME"}
|
||||
or substate_upper in {"LIVE", "FIRSTHALF", "SECONDHALF"}
|
||||
)
|
||||
home, away = self._parse_lineups_json(live_lineups) if can_trust_feed_lineups else (None, None)
|
||||
if (home and len(home) >= 9) and (away and len(away) >= 9):
|
||||
return home, away, "confirmed_live"
|
||||
return home, away, "confirmed_live", 1.0
|
||||
|
||||
# fallback 1: current match participation table
|
||||
home_id = str(row["home_team_id"])
|
||||
away_id = str(row["away_team_id"])
|
||||
|
||||
# fallback 1: current match participation table.
|
||||
# Trust this only for live/finished matches; pre-match rows can be stale feed snapshots.
|
||||
if can_trust_feed_lineups:
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT team_id, player_id
|
||||
@@ -2307,8 +2798,6 @@ class SingleMatchOrchestrator:
|
||||
""",
|
||||
(row["match_id"],),
|
||||
)
|
||||
home_id = str(row["home_team_id"])
|
||||
away_id = str(row["away_team_id"])
|
||||
rows = cur.fetchall()
|
||||
if rows:
|
||||
home_players = [str(r["player_id"]) for r in rows if str(r["team_id"]) == home_id]
|
||||
@@ -2318,21 +2807,40 @@ class SingleMatchOrchestrator:
|
||||
if not away and away_players:
|
||||
away = away_players
|
||||
if (home and len(home) >= 9) and (away and len(away) >= 9):
|
||||
return home, away, "confirmed_participation"
|
||||
return home, away, "confirmed_participation", 0.98
|
||||
|
||||
# fallback 2: probable XI from historical starts before match date
|
||||
before_date_ms = int(row.get("match_date_ms") or 0)
|
||||
sidelined = self._parse_json_dict(row.get("sidelined")) or {}
|
||||
home_excluded = self._sidelined_player_ids(sidelined.get("homeTeam"))
|
||||
away_excluded = self._sidelined_player_ids(sidelined.get("awayTeam"))
|
||||
used_probable = False
|
||||
if not home:
|
||||
home = self._build_probable_xi(cur, home_id, before_date_ms)
|
||||
home_conf = 0.0
|
||||
away_conf = 0.0
|
||||
if not home or len(home) < 9:
|
||||
home, home_conf = self._build_probable_xi(
|
||||
cur,
|
||||
home_id,
|
||||
before_date_ms,
|
||||
excluded_player_ids=home_excluded,
|
||||
)
|
||||
used_probable = used_probable or bool(home)
|
||||
if not away:
|
||||
away = self._build_probable_xi(cur, away_id, before_date_ms)
|
||||
if not away or len(away) < 9:
|
||||
away, away_conf = self._build_probable_xi(
|
||||
cur,
|
||||
away_id,
|
||||
before_date_ms,
|
||||
excluded_player_ids=away_excluded,
|
||||
)
|
||||
used_probable = used_probable or bool(away)
|
||||
|
||||
if used_probable:
|
||||
return home, away, "probable_xi"
|
||||
return home, away, "none"
|
||||
inferred_conf = min(
|
||||
home_conf if home else 0.0,
|
||||
away_conf if away else 0.0,
|
||||
)
|
||||
return home, away, "probable_xi", inferred_conf
|
||||
return home, away, "none", 0.0
|
||||
|
||||
def _calculate_team_form(
|
||||
self,
|
||||
@@ -2445,35 +2953,172 @@ class SingleMatchOrchestrator:
|
||||
cur: RealDictCursor,
|
||||
team_id: str,
|
||||
before_date_ms: int,
|
||||
max_days: int = 30,
|
||||
) -> Optional[List[str]]:
|
||||
match_limit: int = 5,
|
||||
lookback_days: int = 370,
|
||||
max_staleness_days: int = 120,
|
||||
excluded_player_ids: Optional[Set[str]] = None,
|
||||
) -> Tuple[Optional[List[str]], float]:
|
||||
if not team_id:
|
||||
return None
|
||||
return None, 0.0
|
||||
min_date_ms = max(0, before_date_ms - (lookback_days * 24 * 60 * 60 * 1000))
|
||||
|
||||
min_date_ms = max(0, before_date_ms - (max_days * 24 * 60 * 60 * 1000))
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT
|
||||
mpp.player_id,
|
||||
COUNT(*) AS starts,
|
||||
MAX(m.mst_utc) AS last_start_ms
|
||||
m.id AS match_id,
|
||||
m.mst_utc,
|
||||
m.home_team_id,
|
||||
m.away_team_id
|
||||
FROM match_player_participation mpp
|
||||
JOIN matches m ON m.id = mpp.match_id
|
||||
WHERE mpp.team_id = %s
|
||||
AND mpp.is_starting = true
|
||||
AND m.status = 'FT'
|
||||
AND m.mst_utc < %s
|
||||
AND m.mst_utc >= %s
|
||||
GROUP BY mpp.player_id
|
||||
ORDER BY starts DESC, last_start_ms DESC
|
||||
LIMIT 11
|
||||
AND NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM match_player_participation later_mpp
|
||||
JOIN matches later_m ON later_m.id = later_mpp.match_id
|
||||
WHERE later_mpp.player_id = mpp.player_id
|
||||
AND later_mpp.team_id <> %s
|
||||
AND later_m.mst_utc > m.mst_utc
|
||||
AND later_m.mst_utc < %s
|
||||
AND (
|
||||
later_m.status = 'FT'
|
||||
OR later_m.state = 'postGame'
|
||||
OR (later_m.score_home IS NOT NULL AND later_m.score_away IS NOT NULL)
|
||||
)
|
||||
)
|
||||
AND m.id IN (
|
||||
SELECT m2.id
|
||||
FROM matches m2
|
||||
JOIN match_player_participation recent_mpp
|
||||
ON recent_mpp.match_id = m2.id
|
||||
AND recent_mpp.team_id = %s
|
||||
AND recent_mpp.is_starting = true
|
||||
WHERE (m2.home_team_id = %s OR m2.away_team_id = %s)
|
||||
AND (
|
||||
m2.status = 'FT'
|
||||
OR m2.state = 'postGame'
|
||||
OR (m2.score_home IS NOT NULL AND m2.score_away IS NOT NULL)
|
||||
)
|
||||
AND m2.mst_utc < %s
|
||||
AND m2.mst_utc >= %s
|
||||
GROUP BY m2.id
|
||||
HAVING COUNT(recent_mpp.*) >= 9
|
||||
ORDER BY MAX(m2.mst_utc) DESC
|
||||
LIMIT %s
|
||||
)
|
||||
ORDER BY m.mst_utc DESC
|
||||
""",
|
||||
(team_id, before_date_ms, min_date_ms),
|
||||
(
|
||||
team_id,
|
||||
team_id,
|
||||
before_date_ms,
|
||||
team_id,
|
||||
team_id,
|
||||
team_id,
|
||||
before_date_ms,
|
||||
min_date_ms,
|
||||
match_limit,
|
||||
),
|
||||
)
|
||||
rows = cur.fetchall()
|
||||
if not rows:
|
||||
return None
|
||||
return [str(r["player_id"]) for r in rows]
|
||||
return None, 0.0
|
||||
|
||||
latest_mst = max(int(row.get("mst_utc") or 0) for row in rows)
|
||||
age_days = (before_date_ms - latest_mst) / (24 * 60 * 60 * 1000)
|
||||
stale_projection = age_days > max_staleness_days
|
||||
|
||||
excluded = {str(pid) for pid in (excluded_player_ids or set()) if pid}
|
||||
match_order: Dict[str, int] = {}
|
||||
for row in rows:
|
||||
match_id = str(row["match_id"])
|
||||
if match_id not in match_order:
|
||||
match_order[match_id] = len(match_order)
|
||||
|
||||
player_scores: Dict[str, Dict[str, float]] = {}
|
||||
for row in rows:
|
||||
player_id = str(row["player_id"])
|
||||
if player_id in excluded:
|
||||
continue
|
||||
|
||||
idx = match_order.get(str(row["match_id"]), match_limit)
|
||||
recency_weight = max(1.0, float(match_limit - idx))
|
||||
score = recency_weight
|
||||
if idx == 0:
|
||||
score += 3.0
|
||||
elif idx == 1:
|
||||
score += 1.5
|
||||
|
||||
stats = player_scores.setdefault(
|
||||
player_id,
|
||||
{
|
||||
"score": 0.0,
|
||||
"starts": 0.0,
|
||||
"last_seen_rank": float(idx),
|
||||
},
|
||||
)
|
||||
stats["score"] += score
|
||||
stats["starts"] += 1.0
|
||||
stats["last_seen_rank"] = min(stats["last_seen_rank"], float(idx))
|
||||
|
||||
if not player_scores:
|
||||
return None, 0.0
|
||||
|
||||
ranked = sorted(
|
||||
player_scores.items(),
|
||||
key=lambda item: (
|
||||
item[1]["score"],
|
||||
item[1]["starts"],
|
||||
-item[1]["last_seen_rank"],
|
||||
),
|
||||
reverse=True,
|
||||
)
|
||||
lineup = [player_id for player_id, _ in ranked[:11]]
|
||||
|
||||
coverage = min(1.0, len(lineup) / 11.0)
|
||||
available_matches = max(1, len(match_order))
|
||||
history_score = min(1.0, available_matches / float(match_limit))
|
||||
core_stability = 0.0
|
||||
if ranked:
|
||||
stable_core = sum(1 for _, stats in ranked[:11] if stats["starts"] >= 2.0)
|
||||
core_stability = stable_core / 11.0
|
||||
|
||||
staleness_factor = max(
|
||||
0.35,
|
||||
min(1.0, float(max_staleness_days) / max(age_days, 1.0)),
|
||||
)
|
||||
confidence = (
|
||||
(coverage * 0.45) + (history_score * 0.25) + (core_stability * 0.30)
|
||||
) * staleness_factor
|
||||
if excluded:
|
||||
confidence *= 0.92
|
||||
|
||||
confidence_cap = 0.58 if stale_projection else 0.88
|
||||
return lineup or None, round(max(0.0, min(confidence_cap, confidence)), 3)
|
||||
|
||||
@staticmethod
|
||||
def _sidelined_player_ids(team_data: Any) -> Set[str]:
|
||||
if not isinstance(team_data, dict):
|
||||
return set()
|
||||
players = team_data.get("players")
|
||||
if not isinstance(players, list):
|
||||
return set()
|
||||
|
||||
ids: Set[str] = set()
|
||||
for player in players:
|
||||
if not isinstance(player, dict):
|
||||
continue
|
||||
player_id = (
|
||||
player.get("playerId")
|
||||
or player.get("player_id")
|
||||
or player.get("id")
|
||||
or player.get("personId")
|
||||
)
|
||||
if player_id:
|
||||
ids.add(str(player_id))
|
||||
return ids
|
||||
|
||||
def _parse_odds_json(self, odds_json: Any) -> Dict[str, float]:
|
||||
odds_json = self._parse_json_dict(odds_json)
|
||||
@@ -4267,7 +4912,8 @@ class SingleMatchOrchestrator:
|
||||
lineup_sensitive = market in ("MS", "BTTS", "HT", "HTFT")
|
||||
lineup_penalty = 5.0 if lineup_missing and lineup_sensitive else 0.0
|
||||
if data.lineup_source == "probable_xi" and lineup_sensitive:
|
||||
lineup_penalty += 4.0
|
||||
lineup_conf = max(0.0, min(1.0, float(getattr(data, "lineup_confidence", 0.0) or 0.0)))
|
||||
lineup_penalty += max(1.0, (1.0 - lineup_conf) * 5.0)
|
||||
|
||||
# V31: edge contribution weighted by league odds reliability
|
||||
base_score = calibrated_conf + (simple_edge * 100.0 * edge_multiplier)
|
||||
@@ -4438,8 +5084,11 @@ class SingleMatchOrchestrator:
|
||||
away_n = len(data.away_lineup or [])
|
||||
lineup_score = min(home_n, away_n) / 11.0 if min(home_n, away_n) > 0 else 0.0
|
||||
if data.lineup_source == "probable_xi":
|
||||
lineup_score *= 0.55
|
||||
lineup_conf = max(0.0, min(1.0, float(getattr(data, "lineup_confidence", 0.0) or 0.0)))
|
||||
lineup_score *= max(0.45, min(0.88, lineup_conf))
|
||||
flags.append("lineup_probable_not_confirmed")
|
||||
if lineup_conf < 0.65:
|
||||
flags.append("lineup_projection_low_confidence")
|
||||
elif data.lineup_source == "none":
|
||||
flags.append("lineup_unavailable")
|
||||
if lineup_score < 0.7:
|
||||
@@ -4464,6 +5113,7 @@ class SingleMatchOrchestrator:
|
||||
"home_lineup_count": home_n,
|
||||
"away_lineup_count": away_n,
|
||||
"lineup_source": data.lineup_source,
|
||||
"lineup_confidence": round(float(getattr(data, "lineup_confidence", 0.0) or 0.0), 3),
|
||||
"flags": flags,
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,370 @@
|
||||
# V28-Pro-Max Model Architecture Documentation
|
||||
|
||||
> **Model Version:** `v28-pro-max`
|
||||
> **Engine File:** `ai-engine/services/single_match_orchestrator.py` (4656 satır)
|
||||
> **Son Güncelleme:** 2026-04-24
|
||||
|
||||
---
|
||||
|
||||
## 1. Genel Bakış
|
||||
|
||||
V28-Pro-Max, üç bağımsız tahmin katmanını (V25, V27, V28) tek bir orchestrator içinde birleştiren **üçlü hibrit AI tahmin motorudur**. Her maç için 13+ bahis pazarını analiz eder, olasılık hesaplar, risk değerlendirir ve "Value Bet" tespiti yapar.
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────┐
|
||||
│ SingleMatchOrchestrator │
|
||||
│ │
|
||||
│ ┌──────────┐ ┌──────────┐ ┌────────────────┐ │
|
||||
│ │ V25 │ │ V27 │ │ V28 │ │
|
||||
│ │ Ensemble │ │ Dual-Eng │ │ Odds-Band │ │
|
||||
│ │ (XGB+LGB)│ │ Divergnce│ │ Historical │ │
|
||||
│ └────┬─────┘ └────┬─────┘ └───────┬────────┘ │
|
||||
│ │ │ │ │
|
||||
│ └──────────────┼────────────────┘ │
|
||||
│ ▼ │
|
||||
│ FullMatchPrediction │
|
||||
│ │ │
|
||||
│ ┌───────────┼───────────┐ │
|
||||
│ ▼ ▼ ▼ │
|
||||
│ Market Rows Risk Calc Triple Value │
|
||||
│ │ │ │ │
|
||||
│ └───────────┼───────────┘ │
|
||||
│ ▼ │
|
||||
│ _build_prediction_package() │
|
||||
│ → JSON Response (v28-pro-max) │
|
||||
└─────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 2. Katman Detayları
|
||||
|
||||
### 2.1 V25 — Ensemble ML Katmanı
|
||||
**Dosya:** `ai-engine/models/v25_ensemble.py`
|
||||
|
||||
- **Algoritmalar:** XGBoost + LightGBM ensemble
|
||||
- **Girdi:** Pre-match feature vektörü (form, elo, odds, kadro, hakem vb.)
|
||||
- **Çıktı:** Tüm pazarlar için olasılık dağılımları + confidence skorları
|
||||
- **Özellik:** Odds-aware (bahis oranlarını feature olarak kullanır)
|
||||
- **Target leakage koruması:** Maç sonucu bilgisi asla feature olarak kullanılmaz
|
||||
|
||||
```python
|
||||
# V25 çağrılma noktası (orchestrator L310-315)
|
||||
v25_signal = v25_predictor.predict(features)
|
||||
# Çıktı: {MS: {home: 0.45, draw: 0.28, away: 0.27}, OU25: {...}, BTTS: {...}, ...}
|
||||
```
|
||||
|
||||
### 2.2 V27 — Dual-Engine Divergence Katmanı
|
||||
**Dosya:** `ai-engine/models/v27_predictor.py`
|
||||
|
||||
- **Amaç:** Odds-FREE temel olasılıkları hesaplar (sadece form/elo/kadro)
|
||||
- **Mekanizma:** V25 (odds-aware) vs V27 (odds-free) karşılaştırması
|
||||
- **Divergence Tespiti:** İki motor arasındaki fark → bahisçinin fiyatlandırma hatasını tespit eder
|
||||
- **Çıktı:** `ms_divergence`, `ou25_divergence`, `is_value` sinyalleri
|
||||
|
||||
```python
|
||||
# Divergence hesaplama (orchestrator L830-863)
|
||||
ms_divergence = {
|
||||
"home": v25_home_prob - v27_home_prob, # Pozitif = V25 bahisçiyle hemfikir
|
||||
"away": v25_away_prob - v27_away_prob, # Negatif = Model bahisçiden farklı düşünüyor
|
||||
}
|
||||
ms_value = {
|
||||
"home": {"is_value": v27_home > implied_home and abs(div) > 0.05},
|
||||
"away": {"is_value": v27_away > implied_away and abs(div) > 0.05},
|
||||
}
|
||||
```
|
||||
|
||||
### 2.3 V28 — Odds-Band Historical Performance Katmanı
|
||||
**Dosya:** `ai-engine/features/odds_band_analyzer.py`
|
||||
|
||||
- **Amaç:** "Bu oran bandında tarihsel olarak ne oldu?" sorusunu yanıtlar
|
||||
- **Mekanizma:** Maçın mevcut oranını bir banda yerleştirir (ör: MS Home 1.70-1.90), ardından veritabanındaki aynı banddaki geçmiş maçları sorgular
|
||||
- **Sorgu:** PostgreSQL üzerinden takım-spesifik tarihsel performans
|
||||
|
||||
```python
|
||||
# OddsBandAnalyzer.compute_all() çıktısı — 18 pazar için band metrikleri:
|
||||
{
|
||||
"home_band_ms_win_rate": 0.62, # Ev sahibi bu oran bandında %62 kazanmış
|
||||
"home_band_ms_sample": 34, # 34 maçlık örneklem
|
||||
"band_ou25_over_rate": 0.58, # Bu banddaki maçların %58'i 2.5 üst
|
||||
"band_btts_yes_rate": 0.51, # KG Var oranı
|
||||
"band_htft_11_rate": 0.28, # İY/MS 1/1 oranı
|
||||
"band_cards_referee_avg": 4.2, # Hakem kart ortalaması
|
||||
# ... toplam 60+ feature
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 3. Analiz Edilen Bahis Pazarları (13+)
|
||||
|
||||
| # | Pazar | Kod | Olasılık Alanları | Odds Anahtarları |
|
||||
|---|-------|-----|-------------------|------------------|
|
||||
| 1 | Maç Sonucu | `MS` | home/draw/away | ms_h, ms_d, ms_a |
|
||||
| 2 | Çifte Şans | `DC` | 1X/X2/12 | dc_1x, dc_x2, dc_12 |
|
||||
| 3 | Üst/Alt 1.5 | `OU15` | over/under | ou15_o, ou15_u |
|
||||
| 4 | Üst/Alt 2.5 | `OU25` | over/under | ou25_o, ou25_u |
|
||||
| 5 | Üst/Alt 3.5 | `OU35` | over/under | ou35_o, ou35_u |
|
||||
| 6 | Karşılıklı Gol | `BTTS` | yes/no | btts_y, btts_n |
|
||||
| 7 | İlk Yarı Sonucu | `HT` | 1/X/2 | ht_h, ht_d, ht_a |
|
||||
| 8 | İY/MS (9 kombo) | `HTFT` | 1/1, 1/X, 1/2, X/1, X/X, X/2, 2/1, 2/X, 2/2 | htft_11..htft_22 |
|
||||
| 9 | Tek/Çift | `OE` | odd/even | oe_odd, oe_even |
|
||||
| 10 | İY Üst/Alt 0.5 | `HT_OU05` | over/under | ht_ou05_o, ht_ou05_u |
|
||||
| 11 | İY Üst/Alt 1.5 | `HT_OU15` | over/under | ht_ou15_o, ht_ou15_u |
|
||||
| 12 | Kartlar | `CARDS` | over/under | cards_o, cards_u |
|
||||
| 13 | Handikap | `HCAP` | 1/X/2 | hcap_h, hcap_d, hcap_a |
|
||||
|
||||
---
|
||||
|
||||
## 4. Triple Value Detection (V28 Ana Yeniliği)
|
||||
|
||||
V28'in en kritik özelliği: **3 bağımsız kaynağı çapraz kontrol ederek "gerçek değer" tespiti yapması.**
|
||||
|
||||
```
|
||||
Triple Value = V27 Divergence + V28 Band Rate + Odds Implied Probability
|
||||
|
||||
Koşullar (hepsi sağlanmalı):
|
||||
1. V27 olasılığı > bahisçi implied olasılığı (v27_confirms)
|
||||
2. Band tarihsel oranı > implied olasılık (band_confirms)
|
||||
3. Kombine edge > %5 (edge > 0.05)
|
||||
4. Band örneklem >= 8 maç (band_sample >= 8)
|
||||
|
||||
→ Tüm koşullar sağlanırsa: is_value = True
|
||||
```
|
||||
|
||||
**Örnek:**
|
||||
```
|
||||
Galatasaray vs Beşiktaş — MS Home (1.85 oran)
|
||||
├── Implied Prob: 1/1.85 = 0.54 (%54)
|
||||
├── V27 (odds-free): 0.61 (%61) → ✅ V27 confirms (0.61 > 0.54)
|
||||
├── V28 Band Rate: 0.62 (%62, 34 maç) → ✅ Band confirms (0.62 > 0.54)
|
||||
├── Combined Prob: (0.61 + 0.62) / 2 = 0.615
|
||||
├── Edge: 0.615 - 0.54 = 0.075 (%7.5) → ✅ Edge > 5%
|
||||
└── is_value = TRUE → "Bu bahis değerli!"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5. Market Row Dekorasyon Pipeline'ı
|
||||
|
||||
Her pazar aşağıdaki pipeline'dan geçer:
|
||||
|
||||
```
|
||||
_build_market_rows() → Ham market row'ları oluştur (13 pazar)
|
||||
↓
|
||||
_apply_market_consistency() → Pazarlar arası tutarlılık kontrolü
|
||||
↓
|
||||
_decorate_market_row() → Her row'a playability, grading, staking ekle
|
||||
↓
|
||||
Sort by (playable, play_score) → En iyi pick'ler başa gelir
|
||||
```
|
||||
|
||||
### 5.1 Decorate Market Row — Quant Hybrid Sistemi
|
||||
|
||||
Her market row şu metriklerle dekore edilir:
|
||||
|
||||
| Metrik | Formül | Açıklama |
|
||||
|--------|--------|----------|
|
||||
| `calibrated_confidence` | `raw_conf × market_calibration` | Kalibre edilmiş güven |
|
||||
| `ev_edge` | `(prob × odds) - 1.0` | Expected Value edge |
|
||||
| `simple_edge` | `prob - (1/odds)` | Basit olasılık farkı |
|
||||
| `play_score` | `cal_conf + (edge × 100 × edge_mult) - penalties` | Oynanabilirlik skoru |
|
||||
| `stake_units` | Quarter-Kelly Criterion | Önerilen bahis miktarı |
|
||||
| `bet_grade` | A/B/C/PASS | EV edge bazlı not |
|
||||
|
||||
### 5.2 Playability Gates (Güvenlik Kapıları)
|
||||
|
||||
Bir market row'un "playable" olması için tüm kapılardan geçmesi gerekir:
|
||||
|
||||
1. **Confidence Gate:** `calibrated_conf >= min_conf` (pazar bazlı eşik)
|
||||
2. **Odds Gate:** Odds-required pazarlarda `odds > 1.01`
|
||||
3. **Risk-Quality Gate:** HIGH/EXTREME risk + LOW kalite → BLOK
|
||||
4. **Negative Edge Gate:** `simple_edge < neg_threshold` → BLOK
|
||||
5. **EV Edge Gate:** `ev_edge < min_edge` → BLOK
|
||||
6. **Play Score Gate:** `play_score < min_play_score` → BLOK
|
||||
|
||||
### 5.3 Kelly Criterion Staking
|
||||
|
||||
```python
|
||||
# Quarter-Kelly (¼ Kelly, 10-unit bankroll)
|
||||
f* = ((b × p) - q) / b # Full Kelly
|
||||
stake = f* × 0.25 × 10 # Quarter Kelly × bankroll
|
||||
stake = min(stake, 3.0) # Cap: max 3 unit
|
||||
stake = max(stake, 0.25) # Floor: min 0.25 unit
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 6. Guaranteed Pick Logic (V32 Calibration-Aware)
|
||||
|
||||
Ana pick seçimi 4 öncelik sırasıyla yapılır:
|
||||
|
||||
```
|
||||
Priority 1: HIGH_ACCURACY markets (DC, OU15, HT_OU05)
|
||||
+ Odds >= 1.30 + Confidence >= 44%
|
||||
→ is_guaranteed = True, reason = "high_accuracy_market"
|
||||
|
||||
Priority 2: Any playable + Odds >= 1.30 + Conf >= 44%
|
||||
→ is_guaranteed = True, reason = "confidence_threshold_met"
|
||||
|
||||
Priority 3: Any playable + Odds >= 1.30
|
||||
→ is_guaranteed = False, reason = "odds_only_fallback"
|
||||
|
||||
Priority 4: Best non-playable (last resort)
|
||||
→ is_guaranteed = False, reason = "last_resort"
|
||||
```
|
||||
|
||||
**Value Pick:** `main_pick`'ten farklı, odds >= 1.60, confidence >= %40 olan en iyi alternatif.
|
||||
|
||||
**Aggressive Pick:** HT/FT reversal senaryoları (1/2, 2/1, X/1, X/2) arasından en yüksek olasılıklı.
|
||||
|
||||
---
|
||||
|
||||
## 7. Risk Assessment Sistemi
|
||||
|
||||
```python
|
||||
risk_score = 100 - max_market_conf + lineup_penalty + referee_penalty + parity_penalty
|
||||
|
||||
# Penalty'ler:
|
||||
lineup_penalty = 12.0 (kadro yok) | 7.0 (probable_xi) | 0.0 (confirmed)
|
||||
referee_penalty = 6.0 (hakem yok) | 0.0
|
||||
parity_penalty = 8.0 (|ms_edge| < 0.08) | 0.0
|
||||
|
||||
# Risk seviyeleri:
|
||||
EXTREME: score >= 78
|
||||
HIGH: score >= 62
|
||||
MEDIUM: score >= 40
|
||||
LOW: score < 40
|
||||
```
|
||||
|
||||
### Surprise Risk Tespiti
|
||||
- `is_surprise_risk = True` → Risk HIGH/EXTREME VEYA draw_prob >= %30
|
||||
- `surprise_type`: `balanced_match_risk` veya `draw_pressure`
|
||||
|
||||
---
|
||||
|
||||
## 8. xG ve Skor Tahmini
|
||||
|
||||
```python
|
||||
base_home_xg = (home_goals_avg + away_xga) / 2
|
||||
base_away_xg = (away_goals_avg + home_xga) / 2
|
||||
|
||||
# MS edge ve BTTS etkisiyle düzeltme:
|
||||
home_xg = base_home_xg + (ms_edge × 0.55) + (btts_prob - 0.5) × 0.18
|
||||
away_xg = base_away_xg - (ms_edge × 0.55) + (btts_prob - 0.5) × 0.18
|
||||
|
||||
# Liga ortalamasıyla ölçekleme:
|
||||
total_target = league_avg_goals × 0.55 + team_avgs × 0.45 + ou25_signal × 1.15
|
||||
scale = total_target / (home_xg + away_xg)
|
||||
final_home_xg = home_xg × scale
|
||||
final_away_xg = away_xg × scale
|
||||
|
||||
# Skor tahmini:
|
||||
FT = round(home_xg) - round(away_xg)
|
||||
HT = round(home_xg × 0.45) - round(away_xg × 0.45)
|
||||
Top5 = Poisson dağılımı ile en olası 5 skor
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 9. Data Quality Skoru
|
||||
|
||||
```python
|
||||
quality_score = odds_score × 0.35 + lineup_score × 0.35 + ref_score × 0.15 + form_score × 0.15
|
||||
|
||||
# Etiketleme:
|
||||
HIGH: score >= 0.75
|
||||
MEDIUM: score >= 0.45
|
||||
LOW: score < 0.45
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 10. Çıktı JSON Kontratı
|
||||
|
||||
```json
|
||||
{
|
||||
"model_version": "v28-pro-max",
|
||||
"match_info": { "match_id", "home_team", "away_team", "league", ... },
|
||||
"data_quality": { "label", "score", "lineup_source", "flags" },
|
||||
"risk": { "level", "score", "is_surprise_risk", "warnings" },
|
||||
"engine_breakdown": { "team", "player", "odds", "referee" },
|
||||
"main_pick": { "market", "pick", "confidence", "odds", "ev_edge", "bet_grade", "is_guaranteed" },
|
||||
"value_pick": { ... },
|
||||
"aggressive_pick": { "market": "HT/FT", "pick": "1/2", ... },
|
||||
"bet_advice": { "playable", "suggested_stake_units", "reason" },
|
||||
"bet_summary": [ { "market", "pick", "calibrated_confidence", "bet_grade", "ev_edge", ... } ],
|
||||
"supporting_picks": [ ... ],
|
||||
"score_prediction": { "ft", "ht", "xg_home", "xg_away", "xg_total" },
|
||||
"scenario_top5": [ "1-0", "2-1", ... ],
|
||||
"market_board": { "MS": {...}, "DC": {...}, "OU25": {...}, ... },
|
||||
"v25_signal": { "available", "markets", "value_bets" },
|
||||
"reasoning_factors": [ ... ]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 11. League-Specific Odds Reliability (V31)
|
||||
|
||||
Bazı liglerin bahis oranları daha güvenilirdir. Bu bilgi `_decorate_market_row` içinde edge ağırlıklandırmasında kullanılır:
|
||||
|
||||
```python
|
||||
odds_rel = league_reliability.get(league_id, 0.35) # 0.0 - 1.0
|
||||
edge_multiplier = 0.60 + (odds_rel × 0.60) # 0.60 - 1.20
|
||||
|
||||
# Güvenilir lig → edge daha fazla ağırlık alır
|
||||
# Güvenilsiz lig → model confidence'a daha çok güvenilir
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 12. Dosya Haritası
|
||||
|
||||
```
|
||||
ai-engine/
|
||||
├── services/
|
||||
│ └── single_match_orchestrator.py ← Ana orchestrator (4656 satır)
|
||||
├── models/
|
||||
│ ├── v25_ensemble.py ← XGBoost + LightGBM ensemble
|
||||
│ └── v27_predictor.py ← Odds-free fundamental predictor
|
||||
├── features/
|
||||
│ └── odds_band_analyzer.py ← V28 tarihsel band analizi
|
||||
└── main.py ← FastAPI endpoint (/predict)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 13. Akış Özeti
|
||||
|
||||
```
|
||||
HTTP POST /predict {match_id}
|
||||
│
|
||||
▼
|
||||
SingleMatchOrchestrator.analyze_match(match_id)
|
||||
│
|
||||
├── _load_match_data() → DB'den maç + odds + kadro + form
|
||||
│
|
||||
├── V25: v25_predictor.predict(features)
|
||||
│ → 13 pazar olasılık + confidence
|
||||
│
|
||||
├── V27: v27_predictor.predict(features)
|
||||
│ → Odds-free MS/OU25 olasılıkları
|
||||
│ → Divergence hesaplama
|
||||
│
|
||||
├── V28: odds_band_analyzer.compute_all()
|
||||
│ → 18 pazar için tarihsel band metrikleri
|
||||
│
|
||||
├── Triple Value Detection
|
||||
│ → V27 + V28 + Implied çapraz kontrol
|
||||
│
|
||||
├── _enrich_prediction() → xG, risk, skor tahmini
|
||||
│
|
||||
├── _build_market_rows() → 13+ ham market row
|
||||
├── _apply_market_consistency()
|
||||
├── _decorate_market_row() → EV, Kelly, grading
|
||||
│
|
||||
├── Guaranteed Pick Selection → main_pick, value_pick, aggressive_pick
|
||||
│
|
||||
└── _build_prediction_package() → Final JSON kontratı
|
||||
```
|
||||
+1
-1
@@ -22,7 +22,7 @@
|
||||
"ai:backtest": "python ai-engine/scripts/backtest_v2_runtime.py",
|
||||
"ai:train:vqwen": "python ai-engine/scripts/train_vqwen_v3.py",
|
||||
"feeder:historical": "ts-node -r tsconfig-paths/register src/scripts/run-feeder.ts",
|
||||
"feeder:previous-day": "ts-node -r tsconfig-paths/register src/scripts/run-feeder.ts",
|
||||
"feeder:previous-day": "ts-node -r tsconfig-paths/register src/scripts/run-feeder-previous-day.ts",
|
||||
"feeder:fill-gaps": "ts-node -r tsconfig-paths/register src/scripts/run-feeder-filtered.ts",
|
||||
"feeder:basketball": "ts-node -r tsconfig-paths/register src/scripts/run-feeder-basketball.ts",
|
||||
"feeder:live": "ts-node -r tsconfig-paths/register src/scripts/run-live-feeder.ts",
|
||||
|
||||
@@ -856,19 +856,46 @@ export class FeederPersistenceService {
|
||||
const matches = await this.prisma.match.findMany({
|
||||
where: {
|
||||
id: { in: matchIds },
|
||||
AND: [
|
||||
{ oddCategories: { some: {} } },
|
||||
{
|
||||
oddCategories: { some: {} },
|
||||
OR: [
|
||||
{ footballTeamStats: { some: {} } },
|
||||
{ basketballTeamStats: { some: {} } },
|
||||
],
|
||||
{
|
||||
sport: "football",
|
||||
footballTeamStats: { some: {} },
|
||||
playerParticipations: { some: { isStarting: true } },
|
||||
},
|
||||
{
|
||||
sport: "basketball",
|
||||
basketballTeamStats: { some: {} },
|
||||
basketballPlayerStats: { some: {} },
|
||||
},
|
||||
],
|
||||
},
|
||||
select: { id: true },
|
||||
select: { id: true, sport: true },
|
||||
});
|
||||
return matches.map((m) => m.id);
|
||||
|
||||
const footballIds = matches
|
||||
.filter((m) => m.sport === "football")
|
||||
.map((m) => m.id);
|
||||
const completeFootballIds = new Set<string>();
|
||||
|
||||
if (footballIds.length > 0) {
|
||||
const starterCounts = await this.prisma.matchPlayerParticipation.groupBy({
|
||||
by: ["matchId"],
|
||||
where: {
|
||||
matchId: { in: footballIds },
|
||||
isStarting: true,
|
||||
},
|
||||
_count: { _all: true },
|
||||
});
|
||||
|
||||
for (const row of starterCounts) {
|
||||
if (row._count._all >= 18) completeFootballIds.add(row.matchId);
|
||||
}
|
||||
}
|
||||
|
||||
return matches
|
||||
.filter((m) => m.sport !== "football" || completeFootballIds.has(m.id))
|
||||
.map((m) => m.id);
|
||||
}
|
||||
|
||||
async hasOdds(matchId: string): Promise<boolean> {
|
||||
|
||||
@@ -168,7 +168,7 @@ export class FeederService {
|
||||
// writing to live_matches. Historical scan should only fill matches table.
|
||||
endDate.setDate(endDate.getDate() - 2);
|
||||
|
||||
const stateKey = `historical_scan_state_${sports.join("_")}${targetLeagueIds.length > 0 ? "_filtered" : ""}_desc`;
|
||||
const stateKey = `historical_full_data_v2_state_${sports.join("_")}${targetLeagueIds.length > 0 ? "_filtered" : ""}_desc`;
|
||||
let currentDate: Date | null = null;
|
||||
|
||||
// Resume from saved state
|
||||
@@ -310,9 +310,20 @@ export class FeederService {
|
||||
const { startTs: targetDateStartTs, endTs: targetDateEndTs } =
|
||||
this.getDayBoundsForTimeZone(dateString, this.DAILY_SYNC_TIME_ZONE);
|
||||
|
||||
// DEBUG: Log sample mstUtc values vs target bounds to diagnose filtering
|
||||
if (allMatches.length > 0) {
|
||||
const sample = allMatches.slice(0, 3);
|
||||
this.logger.warn(
|
||||
`[${sport}] [${dateString}] DEBUG: bounds=[${targetDateStartTs}, ${targetDateEndTs}] ` +
|
||||
`(${new Date(targetDateStartTs * 1000).toISOString()} - ${new Date(targetDateEndTs * 1000).toISOString()}) | ` +
|
||||
`sampleMstUtc=[${sample.map((m) => `${m.mstUtc} (asSec=${new Date(m.mstUtc * 1000).toISOString()}, asMs=${new Date(m.mstUtc).toISOString()})`).join(', ')}]`,
|
||||
);
|
||||
}
|
||||
|
||||
const dateFilteredMatches = allMatches.filter((m) => {
|
||||
const matchTs = m.mstUtc;
|
||||
return matchTs >= targetDateStartTs && matchTs <= targetDateEndTs;
|
||||
// mstUtc is in milliseconds from API, bounds are in seconds
|
||||
const matchTsSec = Math.floor(m.mstUtc / 1000);
|
||||
return matchTsSec >= targetDateStartTs && matchTsSec <= targetDateEndTs;
|
||||
});
|
||||
|
||||
const apiReturnedCount = allMatches.length;
|
||||
@@ -753,10 +764,7 @@ export class FeederService {
|
||||
}
|
||||
|
||||
// Starting Formation & Substitutes (Always for lineups or all)
|
||||
// V20 OPTIMIZATION: Disabled to speed up feeder and reduce 502 errors.
|
||||
// We only use Team Stats for V20 model.
|
||||
/*
|
||||
if (scope === 'all' || scope === 'lineups') {
|
||||
if (scope === "all" || scope === "lineups") {
|
||||
// Starting Formation
|
||||
try {
|
||||
const formationData =
|
||||
@@ -780,7 +788,7 @@ export class FeederService {
|
||||
);
|
||||
}
|
||||
} catch (e: any) {
|
||||
if (e.message?.includes('502')) hasCriticalError = true;
|
||||
if (e.message?.includes("502")) hasCriticalError = true;
|
||||
this.logger.warn(`[${matchId}] Formation failed: ${e.message}`);
|
||||
}
|
||||
|
||||
@@ -807,11 +815,10 @@ export class FeederService {
|
||||
);
|
||||
}
|
||||
} catch (e: any) {
|
||||
if (e.message?.includes('502')) hasCriticalError = true;
|
||||
if (e.message?.includes("502")) hasCriticalError = true;
|
||||
this.logger.warn(`[${matchId}] Subs failed: ${e.message}`);
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
// Game Stats & Officials
|
||||
if (scope === "all") {
|
||||
@@ -935,6 +942,8 @@ export class FeederService {
|
||||
const missingParts: string[] = [];
|
||||
if (scope === "all" && completedMatch) {
|
||||
if (sport === "football" && !stats) missingParts.push("Stats");
|
||||
if (sport === "football" && participationData.length < 18)
|
||||
missingParts.push("Lineups");
|
||||
if (sport === "basketball" && !basketballTeamStats)
|
||||
missingParts.push("BoxScore");
|
||||
if (oddsArray.length === 0) missingParts.push("Odds");
|
||||
|
||||
@@ -586,7 +586,44 @@ export class MatchesService {
|
||||
date: new Date(Number(liveMatch.mstUtc)),
|
||||
// Fill missing relations with empty arrays
|
||||
teamStats: [],
|
||||
playerParticipations: [],
|
||||
playerParticipations: (() => {
|
||||
const parsed: Array<{ teamId: string; isStarting: boolean; shirtNumber: string | number | null; position: string | null; player: { id: string; name: string } }> = [];
|
||||
const canTrustFeedLineups = displayStatus === "LIVE" || displayStatus === "Finished";
|
||||
if (!canTrustFeedLineups) {
|
||||
return parsed;
|
||||
}
|
||||
if (liveMatch.lineups && typeof liveMatch.lineups === 'object') {
|
||||
const lu = liveMatch.lineups as Record<string, any>;
|
||||
const addPlayers = (teamLu: any, teamId: string | null) => {
|
||||
if (!teamLu || !teamId) return;
|
||||
if (teamLu.xi && Array.isArray(teamLu.xi)) {
|
||||
teamLu.xi.forEach((p: any) => {
|
||||
parsed.push({
|
||||
teamId,
|
||||
isStarting: true,
|
||||
shirtNumber: p.shirtNumber || p.number,
|
||||
position: p.position || p.pos,
|
||||
player: { id: p.personId || p.id || p.playerId || 'unknown', name: p.matchName || p.name || p.playerName || 'Bilinmiyor' }
|
||||
});
|
||||
});
|
||||
}
|
||||
if (teamLu.subs && Array.isArray(teamLu.subs)) {
|
||||
teamLu.subs.forEach((p: any) => {
|
||||
parsed.push({
|
||||
teamId,
|
||||
isStarting: false,
|
||||
shirtNumber: p.shirtNumber || p.number,
|
||||
position: p.position || p.pos,
|
||||
player: { id: p.personId || p.id || p.playerId || 'unknown', name: p.matchName || p.name || p.playerName || 'Bilinmiyor' }
|
||||
});
|
||||
});
|
||||
}
|
||||
};
|
||||
addPlayers(lu.home, liveMatch.homeTeamId);
|
||||
addPlayers(lu.away, liveMatch.awayTeamId);
|
||||
}
|
||||
return parsed;
|
||||
})(),
|
||||
playerEvents: [],
|
||||
oddCategories: [], // Will handle odds parsing below
|
||||
officials: [],
|
||||
@@ -597,6 +634,64 @@ export class MatchesService {
|
||||
|
||||
if (!match) return null;
|
||||
|
||||
const detailDisplayStatus = getDisplayMatchStatus({
|
||||
state: match.state,
|
||||
status: match.status,
|
||||
substate: match.substate,
|
||||
scoreHome: match.scoreHome,
|
||||
scoreAway: match.scoreAway,
|
||||
});
|
||||
const canTrustStoredLineups = this.canTrustStoredLineups(detailDisplayStatus);
|
||||
|
||||
if (Array.isArray(match.playerParticipations)) {
|
||||
if (!canTrustStoredLineups) {
|
||||
match.playerParticipations = [];
|
||||
}
|
||||
|
||||
const hasHomeLineup = match.playerParticipations.some(
|
||||
(p: any) => p.teamId === match.homeTeamId && p.isStarting,
|
||||
);
|
||||
const hasAwayLineup = match.playerParticipations.some(
|
||||
(p: any) => p.teamId === match.awayTeamId && p.isStarting,
|
||||
);
|
||||
|
||||
if (!hasHomeLineup || !hasAwayLineup) {
|
||||
const sidelined =
|
||||
match.sidelined && typeof match.sidelined === "object"
|
||||
? (match.sidelined as Record<string, any>)
|
||||
: {};
|
||||
const matchDateMs = Number(match.mstUtc || Date.now());
|
||||
const probableLineups: any[] = [];
|
||||
|
||||
if (!hasHomeLineup && match.homeTeamId) {
|
||||
probableLineups.push(
|
||||
...(await this.buildProbableLineupForTeam({
|
||||
teamId: match.homeTeamId,
|
||||
beforeDateMs: matchDateMs,
|
||||
sidelinedTeamData: sidelined.homeTeam,
|
||||
})),
|
||||
);
|
||||
}
|
||||
|
||||
if (!hasAwayLineup && match.awayTeamId) {
|
||||
probableLineups.push(
|
||||
...(await this.buildProbableLineupForTeam({
|
||||
teamId: match.awayTeamId,
|
||||
beforeDateMs: matchDateMs,
|
||||
sidelinedTeamData: sidelined.awayTeam,
|
||||
})),
|
||||
);
|
||||
}
|
||||
|
||||
if (probableLineups.length > 0) {
|
||||
match.playerParticipations = canTrustStoredLineups
|
||||
? [...match.playerParticipations, ...probableLineups]
|
||||
: probableLineups;
|
||||
match.lineupSource = "probable_xi";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Structure odds
|
||||
const odds: Record<
|
||||
string,
|
||||
@@ -699,4 +794,211 @@ export class MatchesService {
|
||||
|
||||
return team?.id || null;
|
||||
}
|
||||
|
||||
private async buildProbableLineupForTeam(params: {
|
||||
teamId: string;
|
||||
beforeDateMs: number;
|
||||
sidelinedTeamData?: any;
|
||||
matchLimit?: number;
|
||||
lookbackDays?: number;
|
||||
maxStalenessDays?: number;
|
||||
}) {
|
||||
const matchLimit = params.matchLimit ?? 5;
|
||||
const lookbackDays = params.lookbackDays ?? 370;
|
||||
const maxStalenessDays = params.maxStalenessDays ?? 120;
|
||||
const beforeDateMs = params.beforeDateMs || Date.now();
|
||||
const minDateMs = Math.max(
|
||||
0,
|
||||
beforeDateMs - lookbackDays * 24 * 60 * 60 * 1000,
|
||||
);
|
||||
const excluded = this.extractSidelinedPlayerIds(params.sidelinedTeamData);
|
||||
|
||||
const rows = await this.prisma.$queryRaw<any[]>`
|
||||
SELECT
|
||||
mpp.player_id AS "playerId",
|
||||
p.name AS "playerName",
|
||||
mpp.position AS "position",
|
||||
mpp.shirt_number AS "shirtNumber",
|
||||
m.id AS "matchId",
|
||||
m.mst_utc AS "mstUtc"
|
||||
FROM match_player_participation mpp
|
||||
JOIN matches m ON m.id = mpp.match_id
|
||||
JOIN players p ON p.id = mpp.player_id
|
||||
WHERE mpp.team_id = ${params.teamId}
|
||||
AND mpp.is_starting = true
|
||||
AND NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM match_player_participation later_mpp
|
||||
JOIN matches later_m ON later_m.id = later_mpp.match_id
|
||||
WHERE later_mpp.player_id = mpp.player_id
|
||||
AND later_mpp.team_id <> ${params.teamId}
|
||||
AND later_m.mst_utc > m.mst_utc
|
||||
AND later_m.mst_utc < ${BigInt(beforeDateMs)}
|
||||
AND (
|
||||
later_m.status = 'FT'
|
||||
OR later_m.state = 'postGame'
|
||||
OR (later_m.score_home IS NOT NULL AND later_m.score_away IS NOT NULL)
|
||||
)
|
||||
)
|
||||
AND m.id IN (
|
||||
SELECT m2.id
|
||||
FROM matches m2
|
||||
JOIN match_player_participation recent_mpp
|
||||
ON recent_mpp.match_id = m2.id
|
||||
AND recent_mpp.team_id = ${params.teamId}
|
||||
AND recent_mpp.is_starting = true
|
||||
WHERE (m2.home_team_id = ${params.teamId} OR m2.away_team_id = ${params.teamId})
|
||||
AND (
|
||||
m2.status = 'FT'
|
||||
OR m2.state = 'postGame'
|
||||
OR (m2.score_home IS NOT NULL AND m2.score_away IS NOT NULL)
|
||||
)
|
||||
AND m2.mst_utc < ${BigInt(beforeDateMs)}
|
||||
AND m2.mst_utc >= ${BigInt(minDateMs)}
|
||||
GROUP BY m2.id
|
||||
HAVING COUNT(recent_mpp.*) >= 9
|
||||
ORDER BY MAX(m2.mst_utc) DESC
|
||||
LIMIT ${matchLimit}
|
||||
)
|
||||
ORDER BY m.mst_utc DESC
|
||||
`;
|
||||
|
||||
if (!rows.length) return [];
|
||||
|
||||
const latestMst = Math.max(
|
||||
...rows.map((row) => Number(row.mstUtc || 0)),
|
||||
);
|
||||
const ageDays =
|
||||
latestMst > 0
|
||||
? (beforeDateMs - latestMst) / (24 * 60 * 60 * 1000)
|
||||
: Number.POSITIVE_INFINITY;
|
||||
const staleProjection = ageDays > maxStalenessDays;
|
||||
|
||||
const matchOrder = new Map<string, number>();
|
||||
for (const row of rows) {
|
||||
const matchId = String(row.matchId);
|
||||
if (!matchOrder.has(matchId)) {
|
||||
matchOrder.set(matchId, matchOrder.size);
|
||||
}
|
||||
}
|
||||
|
||||
const playerMap = new Map<
|
||||
string,
|
||||
{
|
||||
playerId: string;
|
||||
playerName: string;
|
||||
position: string | null;
|
||||
shirtNumber: number | null;
|
||||
score: number;
|
||||
starts: number;
|
||||
lastSeenRank: number;
|
||||
}
|
||||
>();
|
||||
|
||||
for (const row of rows) {
|
||||
const playerId = String(row.playerId);
|
||||
if (excluded.has(playerId)) continue;
|
||||
|
||||
const rank = matchOrder.get(String(row.matchId)) ?? matchLimit;
|
||||
const recencyWeight = Math.max(1, matchLimit - rank);
|
||||
const score =
|
||||
recencyWeight + (rank === 0 ? 3 : rank === 1 ? 1.5 : 0);
|
||||
const existing = playerMap.get(playerId);
|
||||
|
||||
if (!existing) {
|
||||
playerMap.set(playerId, {
|
||||
playerId,
|
||||
playerName: row.playerName || "Bilinmiyor",
|
||||
position: row.position ?? null,
|
||||
shirtNumber:
|
||||
row.shirtNumber === null || row.shirtNumber === undefined
|
||||
? null
|
||||
: Number(row.shirtNumber),
|
||||
score,
|
||||
starts: 1,
|
||||
lastSeenRank: rank,
|
||||
});
|
||||
} else {
|
||||
existing.score += score;
|
||||
existing.starts += 1;
|
||||
existing.lastSeenRank = Math.min(existing.lastSeenRank, rank);
|
||||
existing.position = existing.position || row.position || null;
|
||||
existing.shirtNumber =
|
||||
existing.shirtNumber ??
|
||||
(row.shirtNumber === null || row.shirtNumber === undefined
|
||||
? null
|
||||
: Number(row.shirtNumber));
|
||||
}
|
||||
}
|
||||
|
||||
const ranked = [...playerMap.values()]
|
||||
.sort((a, b) => {
|
||||
if (b.score !== a.score) return b.score - a.score;
|
||||
if (b.starts !== a.starts) return b.starts - a.starts;
|
||||
return a.lastSeenRank - b.lastSeenRank;
|
||||
})
|
||||
.slice(0, 11);
|
||||
|
||||
const coverage = Math.min(1, ranked.length / 11);
|
||||
const historyScore = Math.min(1, matchOrder.size / matchLimit);
|
||||
const stableCore = ranked.filter((p) => p.starts >= 2).length / 11;
|
||||
const stalenessFactor = Math.max(
|
||||
0.35,
|
||||
Math.min(1, maxStalenessDays / Math.max(ageDays, 1)),
|
||||
);
|
||||
const confidence = Math.max(
|
||||
0,
|
||||
Math.min(
|
||||
staleProjection ? 0.58 : 0.88,
|
||||
(coverage * 0.45 + historyScore * 0.25 + stableCore * 0.3) *
|
||||
stalenessFactor,
|
||||
),
|
||||
);
|
||||
|
||||
return ranked.map((p) => ({
|
||||
teamId: params.teamId,
|
||||
isStarting: true,
|
||||
shirtNumber: p.shirtNumber,
|
||||
position: p.position,
|
||||
isProbable: true,
|
||||
lineupSource: "probable_xi",
|
||||
projectionConfidence: Number(confidence.toFixed(3)),
|
||||
projectionAgeDays: Number(ageDays.toFixed(1)),
|
||||
projectionStale: staleProjection,
|
||||
projectionMatchLimit: matchLimit,
|
||||
projectionLookbackDays: lookbackDays,
|
||||
projectionMaxStalenessDays: maxStalenessDays,
|
||||
player: {
|
||||
id: p.playerId,
|
||||
name: p.playerName,
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
private extractSidelinedPlayerIds(teamData: any): Set<string> {
|
||||
if (!teamData || typeof teamData !== "object") return new Set();
|
||||
const players = Array.isArray(teamData.players) ? teamData.players : [];
|
||||
return new Set(
|
||||
players
|
||||
.map((player: any) =>
|
||||
String(
|
||||
player?.playerId ??
|
||||
player?.player_id ??
|
||||
player?.id ??
|
||||
player?.personId ??
|
||||
"",
|
||||
),
|
||||
)
|
||||
.filter(Boolean),
|
||||
);
|
||||
}
|
||||
|
||||
private canTrustStoredLineups(displayStatus?: string): boolean {
|
||||
const normalized = String(displayStatus || "").toLowerCase();
|
||||
return (
|
||||
normalized === "live" ||
|
||||
normalized === "finished" ||
|
||||
normalized === "ft"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -96,11 +96,10 @@ export class PredictionsController {
|
||||
async getPrediction(
|
||||
@Param("matchId") matchId: string,
|
||||
): Promise<MatchPredictionDto> {
|
||||
// Check cache first - DISABLED per user request to always fetch from scratch
|
||||
// const cached = await this.predictionsService.getCachedPrediction(matchId);
|
||||
// if (cached) {
|
||||
// return cached;
|
||||
// }
|
||||
const cached = await this.predictionsService.getCachedPrediction(matchId);
|
||||
if (cached) {
|
||||
return cached;
|
||||
}
|
||||
|
||||
// Get from AI Engine
|
||||
const prediction = await this.predictionsService.getPredictionById(matchId);
|
||||
|
||||
@@ -223,11 +223,13 @@ export class PredictionsService implements OnModuleInit, OnModuleDestroy {
|
||||
`/v20plus/analyze/${matchId}`,
|
||||
{ simulate: true, is_simulation: true, pre_match_only: true },
|
||||
);
|
||||
await this.recordPredictionRun(matchId, response.data);
|
||||
return this.enrichPredictionResponse(
|
||||
response.data as MatchPredictionDto,
|
||||
const prediction = this.enrichPredictionResponse(
|
||||
response.data,
|
||||
matchContext,
|
||||
);
|
||||
await this.recordPredictionRun(matchId, response.data);
|
||||
await this.cachePrediction(matchId, prediction);
|
||||
return prediction;
|
||||
} catch (e: unknown) {
|
||||
const requestError =
|
||||
e instanceof AiEngineRequestError
|
||||
@@ -235,6 +237,20 @@ export class PredictionsService implements OnModuleInit, OnModuleDestroy {
|
||||
: new AiEngineRequestError("AI Engine request failed");
|
||||
const status = requestError.status;
|
||||
const detail = requestError.detail || requestError.message;
|
||||
|
||||
if (
|
||||
status === HttpStatus.SERVICE_UNAVAILABLE &&
|
||||
this.hasCooldown(detail)
|
||||
) {
|
||||
const storedPrediction = await this.getStoredPrediction(matchId);
|
||||
if (storedPrediction) {
|
||||
this.logger.warn(
|
||||
`AI Engine cooldown for ${matchId}; returning stored prediction`,
|
||||
);
|
||||
return this.enrichPredictionResponse(storedPrediction, matchContext);
|
||||
}
|
||||
}
|
||||
|
||||
this.logger.error(
|
||||
`Direct AI Engine call failed for ${matchId}: status=${status}, detail=${JSON.stringify(detail)}`,
|
||||
);
|
||||
@@ -674,6 +690,11 @@ export class PredictionsService implements OnModuleInit, OnModuleDestroy {
|
||||
odds: this.normalizeDisplayOdds(odds, impliedProb),
|
||||
implied_prob: impliedProb,
|
||||
ev_edge: evEdge,
|
||||
playable: Boolean(record.playable) && interval.threshold_met,
|
||||
stake_units:
|
||||
Boolean(record.playable) && interval.threshold_met
|
||||
? this.asNumber(record.stake_units)
|
||||
: 0,
|
||||
reasons: Array.isArray(record.reasons)
|
||||
? record.reasons.map((reason) => this.translateReason(String(reason)))
|
||||
: [],
|
||||
@@ -919,15 +940,39 @@ export class PredictionsService implements OnModuleInit, OnModuleDestroy {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const normalizedPick = pickName.toUpperCase();
|
||||
const normalizedPick = this.normalizePickKey(pickName);
|
||||
for (const [key, value] of Object.entries(probabilities)) {
|
||||
if (key.toUpperCase() === normalizedPick) {
|
||||
if (this.normalizePickKey(key) === normalizedPick) {
|
||||
return this.asNumber(value);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
private normalizePickKey(value: string): string {
|
||||
const normalized = value.trim().toUpperCase();
|
||||
const aliases: Record<string, string> = {
|
||||
ÜST: "OVER",
|
||||
UST: "OVER",
|
||||
OVER: "OVER",
|
||||
ALT: "UNDER",
|
||||
UNDER: "UNDER",
|
||||
"KG VAR": "YES",
|
||||
VAR: "YES",
|
||||
YES: "YES",
|
||||
"KG YOK": "NO",
|
||||
YOK: "NO",
|
||||
NO: "NO",
|
||||
TEK: "ODD",
|
||||
ODD: "ODD",
|
||||
ÇİFT: "EVEN",
|
||||
CIFT: "EVEN",
|
||||
EVEN: "EVEN",
|
||||
};
|
||||
|
||||
return aliases[normalized] ?? normalized;
|
||||
}
|
||||
|
||||
private impliedProbabilityFromOdds(odds: number): number {
|
||||
if (odds <= 1) {
|
||||
return 0;
|
||||
@@ -1132,6 +1177,30 @@ export class PredictionsService implements OnModuleInit, OnModuleDestroy {
|
||||
return prediction.predictionJson as unknown as MatchPredictionDto;
|
||||
}
|
||||
|
||||
private async getStoredPrediction(
|
||||
matchId: string,
|
||||
): Promise<MatchPredictionDto | null> {
|
||||
const prediction = await this.prisma.prediction.findUnique({
|
||||
where: { matchId },
|
||||
});
|
||||
|
||||
return prediction
|
||||
? (prediction.predictionJson as unknown as MatchPredictionDto)
|
||||
: null;
|
||||
}
|
||||
|
||||
private hasCooldown(detail: unknown): boolean {
|
||||
if (typeof detail === "string") {
|
||||
return detail.includes("cooldownRemainingMs");
|
||||
}
|
||||
|
||||
if (detail && typeof detail === "object") {
|
||||
return "cooldownRemainingMs" in detail;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private async ensureSmartCouponDataReady(matchIds: string[]): Promise<void> {
|
||||
const uniqueMatchIds = [...new Set(matchIds.filter((id) => !!id))];
|
||||
if (uniqueMatchIds.length === 0) {
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
/**
|
||||
* Run Previous-Day Completed Match Sync
|
||||
* Usage: npm run feeder:previous-day
|
||||
*/
|
||||
|
||||
import { NestFactory } from "@nestjs/core";
|
||||
import { FeederService } from "../modules/feeder/feeder.service";
|
||||
import { Logger } from "@nestjs/common";
|
||||
|
||||
async function bootstrap() {
|
||||
process.env.FEEDER_MODE = "historical";
|
||||
|
||||
const logger = new Logger("FeederPreviousDayScript");
|
||||
|
||||
logger.log("🚀 Starting previous-day completed match sync...");
|
||||
|
||||
// Load AppModule after FEEDER_MODE is set so cron imports can be disabled.
|
||||
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
||||
const { AppModule } = require("../app.module");
|
||||
const app = await NestFactory.createApplicationContext(AppModule, {
|
||||
logger: ["log", "error", "warn"],
|
||||
});
|
||||
|
||||
try {
|
||||
const feederService = app.get(FeederService);
|
||||
await feederService.runPreviousDayCompletedMatchesScan();
|
||||
logger.log("✅ Previous-day completed match sync completed successfully!");
|
||||
} catch (error: any) {
|
||||
logger.error(`❌ Feeder failed: ${error.message}`);
|
||||
logger.error(error.stack);
|
||||
process.exit(1);
|
||||
} finally {
|
||||
await app.close();
|
||||
}
|
||||
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
void bootstrap();
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Run Previous-Day Completed Match Sync
|
||||
* Run Full Historical Feeder
|
||||
* Usage: npm run feeder:historical
|
||||
*/
|
||||
|
||||
@@ -12,7 +12,7 @@ async function bootstrap() {
|
||||
|
||||
const logger = new Logger("FeederScript");
|
||||
|
||||
logger.log("🚀 Starting previous-day completed match sync...");
|
||||
logger.log("🚀 Starting full historical feeder...");
|
||||
|
||||
// Load AppModule after FEEDER_MODE is set so cron imports can be disabled.
|
||||
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
||||
@@ -23,8 +23,14 @@ async function bootstrap() {
|
||||
|
||||
try {
|
||||
const feederService = app.get(FeederService);
|
||||
await feederService.runPreviousDayCompletedMatchesScan();
|
||||
logger.log("✅ Previous-day completed match sync completed successfully!");
|
||||
const startDate = process.env.FEEDER_START_DATE || "2023-06-01";
|
||||
const sports = (process.env.FEEDER_SPORTS || "football,basketball")
|
||||
.split(",")
|
||||
.map((sport) => sport.trim())
|
||||
.filter(Boolean) as Array<"football" | "basketball">;
|
||||
|
||||
await feederService.runHistoricalScan(sports, startDate);
|
||||
logger.log("✅ Full historical feeder completed successfully!");
|
||||
} catch (error: any) {
|
||||
logger.error(`❌ Feeder failed: ${error.message}`);
|
||||
logger.error(error.stack);
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { Injectable, Logger } from "@nestjs/common";
|
||||
import { Injectable, Logger } from "@nestjs/common";
|
||||
import { Cron } from "@nestjs/schedule";
|
||||
import { HttpService } from "@nestjs/axios";
|
||||
import { PrismaService } from "../database/prisma.service";
|
||||
@@ -182,7 +182,9 @@ export class DataFetcherTask {
|
||||
this.logger.log("syncLiveMatches START");
|
||||
|
||||
const today = getDateStringInTimeZone(new Date(), this.timeZone);
|
||||
const tomorrow = getShiftedDateStringInTimeZone(1, this.timeZone);
|
||||
await this.syncMatchList(today);
|
||||
await this.syncMatchList(tomorrow);
|
||||
await this.updateLiveScores();
|
||||
await this.fetchOddsForMatches();
|
||||
await this.fillMissingLineups();
|
||||
@@ -432,7 +434,10 @@ export class DataFetcherTask {
|
||||
|
||||
for (const match of toUpdate) {
|
||||
try {
|
||||
const formation = await this.scraper.fetchStartingFormation(match.id);
|
||||
const [formation, substitutions] = await Promise.all([
|
||||
this.scraper.fetchStartingFormation(match.id),
|
||||
this.scraper.fetchSubstitutions(match.id),
|
||||
]);
|
||||
const sidelined = match.matchSlug
|
||||
? await this.scraper.fetchSidelinedPlayers(
|
||||
match.id,
|
||||
@@ -440,11 +445,26 @@ export class DataFetcherTask {
|
||||
)
|
||||
: null;
|
||||
|
||||
// Normalize to same home.xi/away.xi format used by processMatchOdds
|
||||
let normalizedLineups: Record<string, unknown> | null = null;
|
||||
if (formation || substitutions) {
|
||||
normalizedLineups = {
|
||||
home: {
|
||||
xi: formation?.stats?.home || [],
|
||||
subs: substitutions?.stats?.home || [],
|
||||
},
|
||||
away: {
|
||||
xi: formation?.stats?.away || [],
|
||||
subs: substitutions?.stats?.away || [],
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
await this.prisma.liveMatch.update({
|
||||
where: { id: match.id },
|
||||
data: {
|
||||
lineups: formation
|
||||
? JSON.parse(JSON.stringify(formation))
|
||||
lineups: normalizedLineups
|
||||
? JSON.parse(JSON.stringify(normalizedLineups))
|
||||
: Prisma.JsonNull,
|
||||
sidelined: sidelined
|
||||
? JSON.parse(JSON.stringify(sidelined))
|
||||
@@ -810,8 +830,8 @@ export class DataFetcherTask {
|
||||
const matchTime = Number(match.mstUtc);
|
||||
const diffHours = (matchTime - now) / (1000 * 60 * 60);
|
||||
|
||||
// Fetch if between -3 hours (started) and +4 hours (upcoming)
|
||||
if (diffHours < 4 && diffHours > -3) {
|
||||
// Fetch if between -3 hours (started) and +24 hours (upcoming)
|
||||
if (diffHours < 24 && diffHours > -3) {
|
||||
// Lineups
|
||||
try {
|
||||
const [startingFormation, substitutions] = await Promise.all([
|
||||
@@ -1269,3 +1289,4 @@ export class DataFetcherTask {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user