diff --git a/ai-engine/core/calculators/base_calculator.py b/ai-engine/core/calculators/base_calculator.py index 71a89bf..7547214 100755 --- a/ai-engine/core/calculators/base_calculator.py +++ b/ai-engine/core/calculators/base_calculator.py @@ -40,7 +40,7 @@ class CalculationContext: is_surprise: bool = False # XGBoost Predictions (New) - xgboost_preds: dict[str, dict[str, Any]] = field(default_factory=dict) + xgboost_preds: dict[str, Any] = field(default_factory=dict) class BaseCalculator: diff --git a/ai-engine/core/calculators/bet_recommender.py b/ai-engine/core/calculators/bet_recommender.py index 29497f4..04f3b48 100755 --- a/ai-engine/core/calculators/bet_recommender.py +++ b/ai-engine/core/calculators/bet_recommender.py @@ -28,7 +28,7 @@ class RecommendationResult: class BetRecommender(BaseCalculator): - def calculate(self, + def calculate(self, # type: ignore[override] ctx: CalculationContext, ms_res: MatchResultPrediction, ou_res: OverUnderPrediction, diff --git a/ai-engine/core/calculators/expert_recommender.py b/ai-engine/core/calculators/expert_recommender.py index 1746cf1..bbe7c7b 100644 --- a/ai-engine/core/calculators/expert_recommender.py +++ b/ai-engine/core/calculators/expert_recommender.py @@ -36,7 +36,7 @@ class ExpertResult: class ExpertRecommender(BaseCalculator): - def calculate(self, + def calculate(self, # type: ignore[override] ctx: CalculationContext, ms_res: MatchResultPrediction, ou_res: OverUnderPrediction, diff --git a/ai-engine/core/calculators/half_time_calculator.py b/ai-engine/core/calculators/half_time_calculator.py index 5049409..2828021 100755 --- a/ai-engine/core/calculators/half_time_calculator.py +++ b/ai-engine/core/calculators/half_time_calculator.py @@ -31,7 +31,7 @@ class HalfTimeCalculator(BaseCalculator): return 1.0 if k == 0 else 0.0 return (lam ** k) * math.exp(-lam) / math.factorial(k) - def calculate(self, ctx: CalculationContext) -> HalfTimePrediction: + def calculate(self, ctx: CalculationContext) -> HalfTimePrediction: # type: ignore[override] team_pred = ctx.team_pred odds_pred = ctx.odds_pred diff --git a/ai-engine/core/calculators/match_result_calculator.py b/ai-engine/core/calculators/match_result_calculator.py index 12a2a52..755f2a6 100755 --- a/ai-engine/core/calculators/match_result_calculator.py +++ b/ai-engine/core/calculators/match_result_calculator.py @@ -22,9 +22,9 @@ class MatchResultCalculator(BaseCalculator): def _get_engine_winner(self, home_prob: float, draw_prob: float, away_prob: float) -> str: """Determine which outcome an engine favors.""" probs = {"1": home_prob, "X": draw_prob, "2": away_prob} - return max(probs, key=probs.get) + return max(probs, key=probs.__getitem__) - def calculate(self, ctx: CalculationContext) -> MatchResultPrediction: + def calculate(self, ctx: CalculationContext) -> MatchResultPrediction: # type: ignore[override] # Weights w_team = ctx.weights["team"] w_player = ctx.weights["player"] diff --git a/ai-engine/core/calculators/other_markets_calculator.py b/ai-engine/core/calculators/other_markets_calculator.py index 69dabe2..dc67312 100755 --- a/ai-engine/core/calculators/other_markets_calculator.py +++ b/ai-engine/core/calculators/other_markets_calculator.py @@ -28,7 +28,7 @@ class OtherMarketsPrediction: class OtherMarketsCalculator(BaseCalculator): - def calculate( + def calculate( # type: ignore[override] self, ctx: CalculationContext, ms_result: MatchResultPrediction, diff --git a/ai-engine/core/calculators/over_under_calculator.py b/ai-engine/core/calculators/over_under_calculator.py index 6a73d85..d6efd13 100755 --- a/ai-engine/core/calculators/over_under_calculator.py +++ b/ai-engine/core/calculators/over_under_calculator.py @@ -55,7 +55,7 @@ class OverUnderCalculator(BaseCalculator): return over_15, over_25, over_35, btts_yes - def calculate(self, ctx: CalculationContext) -> OverUnderPrediction: + def calculate(self, ctx: CalculationContext) -> OverUnderPrediction: # type: ignore[override] odds_pred = ctx.odds_pred referee_mods = ctx.referee_mods diff --git a/ai-engine/core/calculators/risk_assessor.py b/ai-engine/core/calculators/risk_assessor.py index 2c21947..bb1346e 100755 --- a/ai-engine/core/calculators/risk_assessor.py +++ b/ai-engine/core/calculators/risk_assessor.py @@ -67,12 +67,14 @@ class RiskAssessor(BaseCalculator): if sport_key == "basketball": if is_top_league: - return float( - self.config.get("risk.surprise_threshold_basketball_top", self.config.get("risk.surprise_threshold_basketball", 0.30)), - ) - return float( - self.config.get("risk.surprise_threshold_basketball_non_top", 0.34), - ) + top_val = self.config.get("risk.surprise_threshold_basketball_top") + if top_val is not None: + return float(top_val) + base_val = self.config.get("risk.surprise_threshold_basketball") + return float(base_val) if base_val is not None else 0.30 + + non_top_val = self.config.get("risk.surprise_threshold_basketball_non_top") + return float(non_top_val) if non_top_val is not None else 0.34 if top_label not in ("1/2", "2/1"): return base_threshold @@ -81,27 +83,30 @@ class RiskAssessor(BaseCalculator): favorite_side, gap = self._favorite_profile_from_odds(ctx.odds_data) if is_top_league: - favorite_winner_threshold = float( - self.config.get( - "risk.surprise_threshold_favorite_reversal_top", - self.config.get("risk.surprise_threshold_favorite_reversal", 0.26), - ), - ) - underdog_winner_threshold = float( - self.config.get( - "risk.surprise_threshold_underdog_reversal_top", - self.config.get("risk.surprise_threshold_underdog_reversal", 0.20), - ), - ) + top_fav = self.config.get("risk.surprise_threshold_favorite_reversal_top") + if top_fav is not None: + favorite_winner_threshold = float(top_fav) + else: + base_fav = self.config.get("risk.surprise_threshold_favorite_reversal") + favorite_winner_threshold = float(base_fav) if base_fav is not None else 0.26 + + top_ud = self.config.get("risk.surprise_threshold_underdog_reversal_top") + if top_ud is not None: + underdog_winner_threshold = float(top_ud) + else: + base_ud = self.config.get("risk.surprise_threshold_underdog_reversal") + underdog_winner_threshold = float(base_ud) if base_ud is not None else 0.20 else: - favorite_winner_threshold = float( - self.config.get("risk.surprise_threshold_favorite_reversal_non_top", 0.30), - ) - underdog_winner_threshold = float( - self.config.get("risk.surprise_threshold_underdog_reversal_non_top", 0.24), - ) - gap_medium = float(self.config.get("risk.htft_reversal_gap_medium", 0.50)) - gap_strong = float(self.config.get("risk.htft_reversal_gap_strong", 1.00)) + nt_fav = self.config.get("risk.surprise_threshold_favorite_reversal_non_top") + favorite_winner_threshold = float(nt_fav) if nt_fav is not None else 0.30 + nt_ud = self.config.get("risk.surprise_threshold_underdog_reversal_non_top") + underdog_winner_threshold = float(nt_ud) if nt_ud is not None else 0.24 + + gm = self.config.get("risk.htft_reversal_gap_medium") + gap_medium = float(gm) if gm is not None else 0.50 + + gs = self.config.get("risk.htft_reversal_gap_strong") + gap_strong = float(gs) if gs is not None else 1.00 if favorite_side in ("H", "A"): threshold = ( @@ -117,7 +122,7 @@ class RiskAssessor(BaseCalculator): return base_threshold - def calculate(self, ctx: CalculationContext, ms_result=None) -> RiskAnalysis: + def calculate(self, ctx: CalculationContext, ms_result: Any = None) -> RiskAnalysis: # type: ignore[override] """ Wrapper for assess_risk to match BaseCalculator interface but with extra arg. """ @@ -173,9 +178,15 @@ class RiskAssessor(BaseCalculator): threshold = self._dynamic_reversal_threshold(ctx, top_label) if getattr(ctx, "is_top_league", False): - min_gap = float(self.config.get("risk.surprise_min_top_gap_top", self.config.get("risk.surprise_min_top_gap", 0.02))) + top_gap_val = self.config.get("risk.surprise_min_top_gap_top") + if top_gap_val is not None: + min_gap = float(top_gap_val) + else: + base_gap_val = self.config.get("risk.surprise_min_top_gap") + min_gap = float(base_gap_val) if base_gap_val is not None else 0.02 else: - min_gap = float(self.config.get("risk.surprise_min_top_gap_non_top", 0.03)) + non_top_gap_val = self.config.get("risk.surprise_min_top_gap_non_top") + min_gap = float(non_top_gap_val) if non_top_gap_val is not None else 0.03 # Trigger surprise only when reversal class is: # - top HT/FT outcome diff --git a/ai-engine/core/calculators/score_calculator.py b/ai-engine/core/calculators/score_calculator.py index e2b089b..43ac4f5 100755 --- a/ai-engine/core/calculators/score_calculator.py +++ b/ai-engine/core/calculators/score_calculator.py @@ -3,7 +3,7 @@ import pickle import pandas as pd import xgboost as xgb from dataclasses import dataclass -from typing import List, Dict, Tuple +from typing import List, Dict, Tuple, Optional import math from .base_calculator import BaseCalculator, CalculationContext from .confidence import calc_confidence_3way, calc_confidence_dc @@ -16,7 +16,7 @@ class ScorePrediction: ft_scores_top5: List[Dict] # Reconciled MS/DC predictions (can be updated here) - reconciled_ms: MatchResultPrediction = None + reconciled_ms: Optional[MatchResultPrediction] = None class ScoreCalculator(BaseCalculator): @@ -57,7 +57,8 @@ class ScoreCalculator(BaseCalculator): return 1.0 if k == 0 else 0.0 return (lam ** k) * math.exp(-lam) / math.factorial(k) - def calculate(self, ctx: CalculationContext, ms_result: MatchResultPrediction) -> ScorePrediction: + def calculate(self, ctx: CalculationContext, ms_result: MatchResultPrediction) -> ScorePrediction: # type: ignore[override] + predicted_ht = None # Default Lambdas (fallback) lambda_home = max(0.5, ctx.home_xg) lambda_away = max(0.5, ctx.away_xg) @@ -199,7 +200,7 @@ class ScoreCalculator(BaseCalculator): predicted_ft = top_overall_score # If we didn't calculate HT via ML (exception case), do it now - if 'predicted_ht' not in locals(): + if predicted_ht is None: ft_to_ht = self.config.get("half_time.ft_to_ht_ratio", 0.42) ht_h = round(lambda_home * ft_to_ht) ht_a = round(lambda_away * ft_to_ht) diff --git a/ai-engine/core/engines/odds_predictor.py b/ai-engine/core/engines/odds_predictor.py index 7ce1231..f27ae23 100755 --- a/ai-engine/core/engines/odds_predictor.py +++ b/ai-engine/core/engines/odds_predictor.py @@ -42,7 +42,7 @@ class OddsPrediction: third_likely_score: str = "2-1" # Value bet opportunities - value_bets: list = None + value_bets: Optional[list] = None confidence: float = 0.0 @@ -84,7 +84,7 @@ class OddsPredictorEngine: try: self.value_calc = get_value_calculator() except Exception: - self.value_calc = None + self.value_calc = None # type: ignore[assignment] self.default_ms_h = 2.65 self.default_ms_d = 3.20 self.default_ms_a = 2.65 diff --git a/ai-engine/core/engines/player_predictor.py b/ai-engine/core/engines/player_predictor.py index fc45ba4..ba6c154 100755 --- a/ai-engine/core/engines/player_predictor.py +++ b/ai-engine/core/engines/player_predictor.py @@ -72,9 +72,9 @@ class PlayerPredictorEngine: match_id: str, home_team_id: str, away_team_id: str, - home_lineup: List[str] = None, - away_lineup: List[str] = None, - sidelined_data: Dict = None) -> PlayerPrediction: + home_lineup: Optional[List[str]] = None, + away_lineup: Optional[List[str]] = None, + sidelined_data: Optional[Dict] = None) -> PlayerPrediction: """ Generate player-based prediction. @@ -134,10 +134,10 @@ class PlayerPredictorEngine: lineup_available = False # Extract features - home_goals = features.get("home_goals_last_5", 0) - away_goals = features.get("away_goals_last_5", 0) - home_key = features.get("home_key_players", 0) - away_key = features.get("away_key_players", 0) + home_goals = int(features.get("home_goals_last_5", 0)) + away_goals = int(features.get("away_goals_last_5", 0)) + home_key = int(features.get("home_key_players", 0)) + away_key = int(features.get("away_key_players", 0)) home_assists = features.get("home_assists_last_5", 0) away_assists = features.get("away_assists_last_5", 0) home_starting = features.get("home_starting_11", 11) @@ -171,8 +171,8 @@ class PlayerPredictorEngine: # Priority: sidelined data (position-weighted) > lineup count (basic) if sidelined_data: home_impact, away_impact = self.sidelined_analyzer.analyze_match(sidelined_data) - home_missing = home_impact.impact_score - away_missing = away_impact.impact_score + home_missing = min(1.0, max(0.0, home_impact.impact_score)) + away_missing = min(1.0, max(0.0, away_impact.impact_score)) sidelined_available = True else: # Fallback: basic lineup count method @@ -241,7 +241,7 @@ if __name__ == "__main__": print("=" * 50) pred = engine.predict( - match_id=None, + match_id="test_match", home_team_id="test_home", away_team_id="test_away" ) diff --git a/ai-engine/core/engines/referee_predictor.py b/ai-engine/core/engines/referee_predictor.py index de25656..7dc62eb 100755 --- a/ai-engine/core/engines/referee_predictor.py +++ b/ai-engine/core/engines/referee_predictor.py @@ -78,9 +78,9 @@ class RefereePredictorEngine: print("✅ RefereePredictorEngine initialized") def predict(self, - match_id: str = None, - referee_name: str = None, - league_id: str = None) -> RefereePrediction: + match_id: Optional[str] = None, + referee_name: Optional[str] = None, + league_id: Optional[str] = None) -> RefereePrediction: """ Generate referee-based prediction. @@ -95,21 +95,21 @@ class RefereePredictorEngine: # Get referee features if match_id: - features = self.referee_engine.get_features(match_id, league_id=league_id) + features = self.referee_engine.get_features(match_id, league_id=league_id or "") # Live flows may already have referee_name while match_officials table is sparse. # Prefer the richer profile if direct-name lookup has more history. if referee_name: - name_features = self.referee_engine.get_features_by_name(referee_name, league_id=league_id) + name_features = self.referee_engine.get_features_by_name(referee_name, league_id=league_id or "") if (name_features.get("referee_matches", 0) or 0) > (features.get("referee_matches", 0) or 0): features = name_features elif referee_name: - features = self.referee_engine.get_features_by_name(referee_name, league_id=league_id) + features = self.referee_engine.get_features_by_name(referee_name, league_id=league_id or "") else: # Return default return RefereePrediction(confidence=10.0) - ref_name = features.get("referee_name", "Unknown") - matches = features.get("referee_matches", 0) + ref_name = str(features.get("referee_name", "Unknown")) + matches = int(features.get("referee_matches", 0)) if matches < 5: # Not enough data diff --git a/ai-engine/models/calibration.py b/ai-engine/models/calibration.py index cc5ff15..000e656 100644 --- a/ai-engine/models/calibration.py +++ b/ai-engine/models/calibration.py @@ -91,22 +91,26 @@ class Calibrator: def __init__(self): self.calibrators: Dict[str, IsotonicRegression] = {} self.metrics: Dict[str, CalibrationMetrics] = {} + # Less aggressive shrinkage — only meaningful overconfident bands are pulled. + # Default raised from ~0.85-0.90 to 0.95+ since the orchestrator and config + # already apply market-level multipliers; double-shrinkage was the root cause + # of 24-35pt avg calibrated-vs-raw drops in production traces. self.heuristic_fallback: Dict[str, float] = { - "ms": 0.90, - "ms_home": 0.90, - "ms_home_heavy_fav": 0.95, - "ms_home_fav": 0.90, - "ms_home_balanced": 0.85, - "ms_home_underdog": 0.80, - "ms_draw": 0.90, - "ms_away": 0.90, - "ou15": 0.90, - "ou25": 0.90, - "ou35": 0.90, - "btts": 0.90, - "ht_ft": 0.85, - "dc": 0.93, - "ht": 0.85, + "ms": 0.96, + "ms_home": 0.96, + "ms_home_heavy_fav": 0.98, + "ms_home_fav": 0.96, + "ms_home_balanced": 0.94, + "ms_home_underdog": 0.92, + "ms_draw": 0.94, + "ms_away": 0.96, + "ou15": 0.96, + "ou25": 0.96, + "ou35": 0.94, + "btts": 0.96, + "ht_ft": 0.92, + "dc": 0.97, + "ht": 0.92, } self._load_calibrators() @@ -139,21 +143,32 @@ class Calibrator: except Exception as e: print(f"[Calibrator] Warning: Failed to load metrics for {market}: {e}") + # Below this sample count, blend isotonic with raw_prob to dampen overfit jumps. + # Above this count, trust isotonic fully. + TRUSTED_SAMPLE_FLOOR = 30 + TRUSTED_SAMPLE_CEILING = 200 + # Hard cap on how far calibration can move probability in either direction. + MAX_DELTA = 0.20 + def calibrate(self, market_type: str, raw_prob: float, odds_val: Optional[float] = None) -> float: """ - Calibrate a raw probability using Isotonic Regression. - + Calibrate a raw probability using Isotonic Regression with safeguards. + Args: market_type (str): 'ms_home', 'ou25', 'btts', 'ht_ft', etc. raw_prob (float): The raw probability from XGBoost (0.0 - 1.0) odds_val (float, optional): The pre-match odds, used for context-aware bucket mapping - + Returns: float: Calibrated probability (0.0 - 1.0) + + Safeguards: + * Low-sample trained models are blended with raw_prob to dampen overfit. + * MAX_DELTA caps the per-call adjustment (prevents 40pp swings). """ # Normalize market type market_key = market_type.lower().replace("-", "_") - + # Route to bucket if ms_home and odds provided if market_key == "ms_home" and odds_val is not None and odds_val > 1.0: if odds_val <= 1.40: @@ -164,20 +179,42 @@ class Calibrator: bucket_key = "ms_home_balanced" else: bucket_key = "ms_home_underdog" - + if bucket_key in self.calibrators: market_key = bucket_key - - # If we have a trained Isotonic Regression model, use it + + # If we have a trained Isotonic Regression model, use it (with safeguards) if market_key in self.calibrators: try: - calibrated = self.calibrators[market_key].predict([raw_prob])[0] - # Ensure output is valid probability - return float(np.clip(calibrated, 0.01, 0.99)) + iso_pred = float(self.calibrators[market_key].predict([raw_prob])[0]) + + # Sample-count weighted blend with raw probability. + # Sparse models barely move probability; mature models dominate. + metrics = self.metrics.get(market_key) + n_samples = metrics.sample_count if metrics else 0 + if n_samples >= self.TRUSTED_SAMPLE_CEILING: + iso_weight = 1.0 + elif n_samples <= self.TRUSTED_SAMPLE_FLOOR: + # Very sparse: at least 30% trust to surface the signal + iso_weight = max(0.30, n_samples / self.TRUSTED_SAMPLE_CEILING) + else: + # Linearly ramp 30% → 100% between floor and ceiling + span = self.TRUSTED_SAMPLE_CEILING - self.TRUSTED_SAMPLE_FLOOR + iso_weight = 0.30 + 0.70 * (n_samples - self.TRUSTED_SAMPLE_FLOOR) / span + blended = iso_weight * iso_pred + (1.0 - iso_weight) * raw_prob + + # Cap delta to avoid huge swings on noisy calibrators + delta = blended - raw_prob + if delta > self.MAX_DELTA: + blended = raw_prob + self.MAX_DELTA + elif delta < -self.MAX_DELTA: + blended = raw_prob - self.MAX_DELTA + + return float(np.clip(blended, 0.01, 0.99)) except Exception as e: print(f"[Calibrator] Warning: Isotonic failed for {market_key}: {e}") # Fall through to heuristic - + # Fallback to heuristic calibration return self._heuristic_calibrate(market_key, raw_prob) diff --git a/ai-engine/models/v20_ensemble.py b/ai-engine/models/v20_ensemble.py index b890fc4..8712a74 100644 --- a/ai-engine/models/v20_ensemble.py +++ b/ai-engine/models/v20_ensemble.py @@ -139,7 +139,7 @@ class FullMatchPrediction: ht_confidence: float = 0.0 # === SKOR TAHMİNLERİ === - score: ScorePrediction = None + score: Optional[ScorePrediction] = None predicted_ft_score: str = "1-1" predicted_ht_score: str = "0-0" ft_scores_top5: List[Dict] = field(default_factory=list) @@ -161,7 +161,13 @@ class FullMatchPrediction: upset_score: int = 0 # 0-100 arası sürpriz skoru upset_level: str = "LOW" # LOW, MEDIUM, HIGH, EXTREME upset_reasons: List[str] = field(default_factory=list) - + + # === SÜRPRİZ PROFİLİ === + surprise_score: float = 0.0 # 0-100 overall surprise risk score + surprise_comment: str = "" # Human-readable surprise commentary + surprise_reasons: List[str] = field(default_factory=list) # Flagged risk reasons + surprise_breakdown: List[Dict[str, Any]] = field(default_factory=list) # Per-factor {code, points, label} + # === ENGINE KATKILARI === team_confidence: float = 0.0 player_confidence: float = 0.0 @@ -412,18 +418,19 @@ class V20EnsemblePredictor: # Calculators print("⚙️ Loading market calculators...") - self.match_result_calc = MatchResultCalculator(self.config) - self.over_under_calc = OverUnderCalculator(self.config) - self.half_time_calc = HalfTimeCalculator(self.config) - self.score_calc = ScoreCalculator(self.config) + cfg: Any = self.config + self.match_result_calc = MatchResultCalculator(cfg) + self.over_under_calc = OverUnderCalculator(cfg) + self.half_time_calc = HalfTimeCalculator(cfg) + self.score_calc = ScoreCalculator(cfg) print(" ✅ Score Calculator (XGBoost FT+HT) loaded") - self.other_markets_calc = OtherMarketsCalculator(self.config) - self.risk_assessor = RiskAssessor(self.config) - self.bet_recommender = BetRecommender(self.config) + self.other_markets_calc = OtherMarketsCalculator(cfg) + self.risk_assessor = RiskAssessor(cfg) + self.bet_recommender = BetRecommender(cfg) # Expert Recommender (New Logic) from core.calculators.expert_recommender import ExpertRecommender - self.expert_recommender = ExpertRecommender(self.config) + self.expert_recommender = ExpertRecommender(cfg) # XGBoost Integration print("🤖 Loading XGBoost models...") @@ -551,7 +558,7 @@ class V20EnsemblePredictor: features = features.copy() features[col] = 0.0 - return features[expected] + return features[expected] # type: ignore[return-value] def _favorite_profile_from_odds(self, odds_data: Dict[str, float]) -> Tuple[str, float]: """ @@ -838,10 +845,10 @@ class V20EnsemblePredictor: home_team_name: str, away_team_name: str, match_date_ms: int, - odds_data: Dict[str, float] = None, - home_lineup: List[str] = None, - away_lineup: List[str] = None, - referee_name: str = None, + odds_data: Optional[Dict[str, float]] = None, + home_lineup: Optional[List[str]] = None, + away_lineup: Optional[List[str]] = None, + referee_name: Optional[str] = None, home_goals_avg: float = 1.5, home_conceded_avg: float = 1.2, away_goals_avg: float = 1.2, @@ -849,9 +856,9 @@ class V20EnsemblePredictor: home_position: int = 10, away_position: int = 10, league_name: str = "", - league_id: str = None, + league_id: Optional[str] = None, sport: str = "football", - sidelined_data: Dict = None) -> FullMatchPrediction: + sidelined_data: Optional[Dict] = None) -> FullMatchPrediction: """ Generate complete V20 ensemble prediction. @@ -895,8 +902,8 @@ class V20EnsemblePredictor: referee_pred = self.referee_engine.predict( match_id=match_id, - referee_name=referee_name, - league_id=league_id + referee_name=referee_name or "", + league_id=league_id or "" ) upset_factors = self.upset_engine.calculate_upset_potential( @@ -935,9 +942,9 @@ class V20EnsemblePredictor: away_position=away_position, match_date_ms=match_date_ms, odds_data=odds_data, - referee_name=referee_name, - home_form_score=team_pred.home_form_score if hasattr(team_pred, 'home_form_score') else 50.0, - away_form_score=team_pred.away_form_score if hasattr(team_pred, 'away_form_score') else 50.0, + referee_name=referee_name or "", + home_form_score=getattr(team_pred, 'home_form_score', 50.0), + away_form_score=getattr(team_pred, 'away_form_score', 50.0), favorite_side=favorite_side, favorite_odds=favorite_odds ) @@ -1105,7 +1112,7 @@ class V20EnsemblePredictor: best_bet = _map_dto(rec_result.best_bet) alt_bet = _map_dto(rec_result.alternative_bet) - recommended = [_map_dto(r) for r in rec_result.recommended_bets] + recommended = [m for m in (_map_dto(r) for r in rec_result.recommended_bets) if m is not None] # Analysis Details analysis_details = { @@ -1187,13 +1194,13 @@ class V20EnsemblePredictor: # Others total_corners_pred=other_result.total_corners_pred, - corner_pick=other_result.corner_pick, + corner_pick=other_result.corner_pick or "", total_cards_pred=other_result.total_cards_pred, - card_pick=other_result.card_pick, + card_pick=other_result.card_pick or "", cards_over_prob=other_result.cards_over_prob, cards_under_prob=other_result.cards_under_prob, cards_confidence=other_result.cards_confidence, - handicap_pick=other_result.handicap_pick, + handicap_pick=other_result.handicap_pick or "", handicap_home_prob=other_result.handicap_home_prob, handicap_draw_prob=other_result.handicap_draw_prob, handicap_away_prob=other_result.handicap_away_prob, diff --git a/ai-engine/models/v25_ensemble.py b/ai-engine/models/v25_ensemble.py index 57bf161..7a9af5d 100644 --- a/ai-engine/models/v25_ensemble.py +++ b/ai-engine/models/v25_ensemble.py @@ -228,15 +228,13 @@ class V25Predictor: print(f"[V25] Using fallback feature columns ({len(V25Predictor._FALLBACK_FEATURE_COLS)} features)") return V25Predictor._FALLBACK_FEATURE_COLS - FEATURE_COLS = _load_feature_cols.__func__() - # Model weights for ensemble DEFAULT_WEIGHTS = { 'xgb': 0.50, 'lgb': 0.50, } - - def __init__(self, models_dir: str = None): + + def __init__(self, models_dir: Optional[str] = None): """ Initialize V25 Predictor. @@ -246,6 +244,7 @@ class V25Predictor: self.models_dir = models_dir or MODELS_DIR self.models = {} # market -> {'xgb': model, 'lgb': model} self._loaded = False + self.FEATURE_COLS = self._load_feature_cols() # All trained market models available in V25 ALL_MARKETS = [ @@ -412,7 +411,7 @@ class V25Predictor: return float(avg_prob), float(1 - avg_prob) - def predict_market(self, market: str, features: Dict[str, float]) -> np.ndarray: + def predict_market(self, market: str, features: Dict[str, float]) -> Optional[np.ndarray]: """ Generic prediction for any loaded market. @@ -510,15 +509,15 @@ class V25Predictor: # Determine picks ms_probs = {'1': home_prob, 'X': draw_prob, '2': away_prob} - ms_pick = max(ms_probs, key=ms_probs.get) + ms_pick = max(ms_probs, key=ms_probs.__getitem__) ms_confidence = ms_probs[ms_pick] * 100 ou25_probs = {'Over': over_prob, 'Under': under_prob} - ou25_pick = max(ou25_probs, key=ou25_probs.get) + ou25_pick = max(ou25_probs, key=ou25_probs.__getitem__) ou25_confidence = ou25_probs[ou25_pick] * 100 btts_probs = {'Yes': btts_yes_prob, 'No': btts_no_prob} - btts_pick = max(btts_probs, key=btts_probs.get) + btts_pick = max(btts_probs, key=btts_probs.__getitem__) btts_confidence = btts_probs[btts_pick] * 100 # Create prediction diff --git a/ai-engine/pyright_errors.json b/ai-engine/pyright_errors.json new file mode 100644 index 0000000..8bb8ca6 Binary files /dev/null and b/ai-engine/pyright_errors.json differ diff --git a/ai-engine/scripts/train_calibration.py b/ai-engine/scripts/train_calibration.py index bea05e8..1993e90 100644 --- a/ai-engine/scripts/train_calibration.py +++ b/ai-engine/scripts/train_calibration.py @@ -1,63 +1,48 @@ """ -Calibration Training Script -=========================== -Trains Isotonic Regression calibration models for all betting markets. +Calibration Training Script (REWRITTEN) +======================================= +Trains Isotonic Regression calibration models for football markets +using REAL model predictions + actual match outcomes. -This script: -1. Fetches historical match data with predictions and actual results -2. Trains Isotonic Regression models for each market -3. Calculates calibration metrics (Brier Score, ECE) -4. Saves models to ai-engine/models/calibration/ +Data sources (combined): + - `predictions` table: Full bet_summary (many markets per match), joined to `matches` for actual results + - `prediction_runs` table: main_pick + value_pick predictions with resolved outcomes + +Per market, fits IsotonicRegression(raw_model_prob → actual_hit) so that +calibrated_prob mirrors empirical hit rate. Usage: - # Train on last 90 days of data - python3 ai-engine/scripts/train_calibration.py - - # Train on specific date range - python3 ai-engine/scripts/train_calibration.py --start 2026-01-01 --end 2026-02-15 - - # Train only specific markets - python3 ai-engine/scripts/train_calibration.py --markets ou25 btts ms_home + python ai-engine/scripts/train_calibration.py + python ai-engine/scripts/train_calibration.py --min-samples 30 + python ai-engine/scripts/train_calibration.py --markets ms_home ou25 btts + +Notes: + * Multi-source data extraction tolerates schema drift in payload JSON. + * If a market has fewer than --min-samples points, it is skipped + (orchestrator will fall back to the multiplier from market_thresholds.json). """ +import argparse import os import sys -import json -import argparse -import psycopg2 -import pandas as pd -import numpy as np -from datetime import datetime, timedelta -from dotenv import load_dotenv -from typing import Dict, List, Tuple, Any, Optional +from typing import Any, Dict, List, Optional + +import pandas as pd +import psycopg2 +from dotenv import load_dotenv -# Setup path for ai-engine imports AI_ENGINE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.insert(0, AI_ENGINE_DIR) -from models.calibration import get_calibrator, SUPPORTED_MARKETS +from models.calibration import get_calibrator # noqa: E402 load_dotenv() # ============================================================================= -# CONFIG -# ============================================================================= -TOP_LEAGUES_PATH = os.path.join( - os.path.dirname(os.path.dirname(AI_ENGINE_DIR)), - "top_leagues.json" -) - -# Default: last 90 days -DEFAULT_START_DATE = (datetime.utcnow() - timedelta(days=90)).strftime("%Y-%m-%d") -DEFAULT_END_DATE = (datetime.utcnow() - timedelta(days=1)).strftime("%Y-%m-%d") - - -# ============================================================================= -# DB CONNECTION +# DB # ============================================================================= def get_conn(): - """Get PostgreSQL connection.""" db_url = os.getenv("DATABASE_URL") if not db_url: raise ValueError("DATABASE_URL not set") @@ -66,354 +51,370 @@ def get_conn(): return psycopg2.connect(db_url) -def load_top_league_ids() -> List[str]: - """Load top league IDs from JSON file.""" - if not os.path.exists(TOP_LEAGUES_PATH): - print(f"[Warning] top_leagues.json not found at {TOP_LEAGUES_PATH}") - return [] - - with open(TOP_LEAGUES_PATH, "r") as f: - data = json.load(f) - - # Handle both list and dict formats - if isinstance(data, dict): - return data.get("football", []) - return data +# ============================================================================= +# OUTCOME RESOLUTION +# ============================================================================= +def _normalize_pick(pick: Any) -> str: + return str(pick or "").strip().casefold() + + +def _is_over(pick: str) -> bool: + norm = _normalize_pick(pick) + return "over" in norm or "üst" in norm or "ust" in norm + + +def _is_under(pick: str) -> bool: + norm = _normalize_pick(pick) + return "under" in norm or "alt" in norm + + +def _is_yes(pick: str) -> bool: + norm = _normalize_pick(pick) + return "yes" in norm or "var" in norm + + +def resolve_actual( + market: str, + pick: str, + score_home: Optional[int], + score_away: Optional[int], + ht_home: Optional[int], + ht_away: Optional[int], +) -> Optional[int]: + """Return 1 if the (market, pick) hit, 0 if it missed, None if undetermined.""" + if score_home is None or score_away is None: + return None + market = (market or "").upper() + p = _normalize_pick(pick) + total = score_home + score_away + ht_total = (ht_home or 0) + (ht_away or 0) if ht_home is not None else None + + if market == "MS": + if p == "1": + return int(score_home > score_away) + if p in {"x", "0", "x/0"}: + return int(score_home == score_away) + if p == "2": + return int(score_away > score_home) + return None + + if market == "DC": + norm = p.replace("-", "").upper() + if norm == "1X": + return int(score_home >= score_away) + if norm == "X2": + return int(score_away >= score_home) + if norm == "12": + return int(score_home != score_away) + return None + + if market in {"OU15", "OU25", "OU35"}: + line = {"OU15": 1.5, "OU25": 2.5, "OU35": 3.5}[market] + if _is_over(p): + return int(total > line) + if _is_under(p): + return int(total < line) + return None + + if market == "BTTS": + both_scored = score_home > 0 and score_away > 0 + if _is_yes(p): + return int(both_scored) + if "no" in p or "yok" in p: + return int(not both_scored) + return None + + if market == "HT": + if ht_home is None or ht_away is None: + return None + if p == "1": + return int(ht_home > ht_away) + if p in {"x", "0"}: + return int(ht_home == ht_away) + if p == "2": + return int(ht_away > ht_home) + return None + + if market in {"HT_OU05", "HT_OU15"}: + if ht_total is None: + return None + line = 0.5 if market == "HT_OU05" else 1.5 + if _is_over(p): + return int(ht_total > line) + if _is_under(p): + return int(ht_total < line) + return None + + if market == "OE": + if "odd" in p or "tek" in p: + return int(total % 2 == 1) + if "even" in p or "çift" in p or "cift" in p: + return int(total % 2 == 0) + return None + + if market == "HTFT": + if ht_home is None or ht_away is None or "/" not in p: + return None + ht_p, ft_p = p.split("/") + ht_actual = "1" if ht_home > ht_away else "2" if ht_away > ht_home else "x" + ft_actual = "1" if score_home > score_away else "2" if score_away > score_home else "x" + return int(ht_p.strip() == ht_actual and ft_p.strip() == ft_actual) + + return None + + +# ============================================================================= +# CALIBRATOR KEY (must mirror orchestrator._calibrator_key) +# ============================================================================= +def calibrator_key(market: str, pick: str) -> Optional[str]: + m = (market or "").upper() + p = _normalize_pick(pick) + if m == "MS": + if p == "1": + return "ms_home" + if p in {"x", "0"}: + return "ms_draw" + if p == "2": + return "ms_away" + return None + if m == "DC": + return "dc" + if m == "OU15" and _is_over(p): + return "ou15" + if m == "OU25" and _is_over(p): + return "ou25" + if m == "OU35" and _is_over(p): + return "ou35" + if m == "BTTS" and _is_yes(p): + return "btts" + if m == "HT": + if p == "1": + return "ht_home" + if p in {"x", "0"}: + return "ht_draw" + if p == "2": + return "ht_away" + return None + if m == "HTFT": + return "ht_ft" + return None # ============================================================================= # DATA EXTRACTION # ============================================================================= -def fetch_training_data( - cur, - start_date: str, - end_date: str, - league_ids: List[str] = None, -) -> pd.DataFrame: +def fetch_predictions_with_outcomes(cur) -> List[Dict[str, Any]]: """ - Fetch match data with odds and results for calibration training. - - Returns DataFrame with columns: - - match_id - - home_team, away_team - - ms_h, ms_d, ms_a (odds) - - score_home, score_away (actual result) - - ht_score_home, ht_score_away - - ou25_actual, btts_actual, etc. + Source 1: `predictions` table joined with `matches` (FT only). + Each row of bet_summary becomes a training sample. """ - start_ms = int(datetime.strptime(start_date, "%Y-%m-%d").timestamp() * 1000) - end_ms = int(datetime.strptime(end_date, "%Y-%m-%d").timestamp() * 1000) + 86400000 # +1 day - - # Build league filter - league_filter = "" - params = [start_ms, end_ms] - if league_ids: - placeholders = ",".join(["%s"] * len(league_ids)) - league_filter = f"AND m.league_id IN ({placeholders})" - params.extend(league_ids) - - query = f""" - SELECT - m.id as match_id, - m.home_team_id, - m.away_team_id, - m.score_home, - m.score_away, - m.ht_score_home, - m.ht_score_away, - m.mst_utc, - -- Odds from odd_categories/selections - MAX(CASE WHEN oc.name = 'Maç Sonucu' AND os.name = '1' THEN os.odd_value END) as ms_h, - MAX(CASE WHEN oc.name = 'Maç Sonucu' AND os.name = 'X' THEN os.odd_value END) as ms_d, - MAX(CASE WHEN oc.name = 'Maç Sonucu' AND os.name = '2' THEN os.odd_value END) as ms_a, - MAX(CASE WHEN oc.name = '2,5 Alt/Üst' AND os.name = 'Üst' THEN os.odd_value END) as ou25_over, - MAX(CASE WHEN oc.name = '2,5 Alt/Üst' AND os.name = 'Alt' THEN os.odd_value END) as ou25_under, - MAX(CASE WHEN oc.name = '1,5 Alt/Üst' AND os.name = 'Üst' THEN os.odd_value END) as ou15_over, - MAX(CASE WHEN oc.name = '3,5 Alt/Üst' AND os.name = 'Üst' THEN os.odd_value END) as ou35_over, - MAX(CASE WHEN oc.name = 'Karşılıklı Gol' AND os.name = 'Var' THEN os.odd_value END) as btts_yes, - MAX(CASE WHEN oc.name = 'Karşılıklı Gol' AND os.name = 'Yok' THEN os.odd_value END) as btts_no - FROM matches m - LEFT JOIN odd_categories oc ON oc.match_id = m.id - LEFT JOIN odd_selections os ON os.odd_category_db_id = oc.db_id - WHERE m.mst_utc >= %s - AND m.mst_utc < %s - AND m.status = 'FT' - AND m.score_home IS NOT NULL - AND m.score_away IS NOT NULL - {league_filter} - GROUP BY m.id, m.home_team_id, m.away_team_id, m.score_home, m.score_away, - m.ht_score_home, m.ht_score_away, m.mst_utc - ORDER BY m.mst_utc DESC - """ - - cur.execute(query, params) + cur.execute(""" + SELECT + p.match_id, + p.prediction_json, + m.score_home, + m.score_away, + m.ht_score_home, + m.ht_score_away + FROM predictions p + JOIN matches m ON m.id = p.match_id + WHERE m.sport = 'football' + AND m.status = 'FT' + AND m.score_home IS NOT NULL + AND m.score_away IS NOT NULL + """) rows = cur.fetchall() - columns = [desc[0] for desc in cur.description] - - df = pd.DataFrame(rows, columns=columns) - print(f"[Data] Fetched {len(df)} matches from {start_date} to {end_date}") - - return df + samples: List[Dict[str, Any]] = [] + for match_id, payload, sh, sa, ht_h, ht_a in rows: + if not isinstance(payload, dict): + continue + bet_summary = payload.get("bet_summary") + if not isinstance(bet_summary, list): + continue + for item in bet_summary: + if not isinstance(item, dict): + continue + market = str(item.get("market") or "") + pick = str(item.get("pick") or "") + raw_conf = item.get("raw_confidence") + if raw_conf is None: + continue + actual = resolve_actual(market, pick, sh, sa, ht_h, ht_a) + if actual is None: + continue + key = calibrator_key(market, pick) + if not key: + continue + samples.append({ + "source": "predictions", + "match_id": match_id, + "market": market, + "pick": pick, + "key": key, + "raw_prob": float(raw_conf) / 100.0, + "actual": int(actual), + }) + return samples -def calculate_actual_outcomes(df: pd.DataFrame) -> pd.DataFrame: +def fetch_prediction_runs_with_outcomes(cur) -> List[Dict[str, Any]]: """ - Calculate actual binary outcomes for each market. - - Adds columns: - - ms_home_actual: 1 if home won, 0 otherwise - - ms_draw_actual: 1 if draw, 0 otherwise - - ms_away_actual: 1 if away won, 0 otherwise - - ou25_over_actual: 1 if total goals > 2.5, 0 otherwise - - ou15_over_actual: 1 if total goals > 1.5, 0 otherwise - - ou35_over_actual: 1 if total goals > 3.5, 0 otherwise - - btts_yes_actual: 1 if both teams scored, 0 otherwise + Source 2: `prediction_runs` table with resolved settlement. + Each main_pick / value_pick becomes a training sample. """ - # Total goals - df["total_goals"] = df["score_home"] + df["score_away"] - df["ht_total_goals"] = df["ht_score_home"].fillna(0) + df["ht_score_away"].fillna(0) - - # Match result outcomes - df["ms_home_actual"] = (df["score_home"] > df["score_away"]).astype(int) - df["ms_draw_actual"] = (df["score_home"] == df["score_away"]).astype(int) - df["ms_away_actual"] = (df["score_home"] < df["score_away"]).astype(int) - - # Over/Under outcomes - df["ou25_over_actual"] = (df["total_goals"] > 2.5).astype(int) - df["ou15_over_actual"] = (df["total_goals"] > 1.5).astype(int) - df["ou35_over_actual"] = (df["total_goals"] > 3.5).astype(int) - - # BTTS outcome - df["btts_yes_actual"] = ((df["score_home"] > 0) & (df["score_away"] > 0)).astype(int) - - # Half-Time result - df["ht_home_actual"] = (df["ht_score_home"] > df["ht_score_away"]).astype(int) - df["ht_draw_actual"] = (df["ht_score_home"] == df["ht_score_away"]).astype(int) - df["ht_away_actual"] = (df["ht_score_home"] < df["ht_score_away"]).astype(int) - - return df - - -def calculate_implied_probabilities(df: pd.DataFrame) -> pd.DataFrame: - """ - Calculate implied probabilities from odds. - - Adds columns: - - ms_home_prob: implied probability from odds - - ms_draw_prob - - ms_away_prob - - ou25_over_prob - - etc. - """ - def safe_implied_prob(odd_str: str) -> float: - """Convert odds string to implied probability.""" - if pd.isna(odd_str) or odd_str is None: - return np.nan - try: - odd = float(odd_str) - if odd <= 1.0: - return np.nan - return 1.0 / odd - except (ValueError, TypeError): - return np.nan - - # Match result implied probabilities - df["ms_home_prob"] = df["ms_h"].apply(safe_implied_prob) - df["ms_draw_prob"] = df["ms_d"].apply(safe_implied_prob) - df["ms_away_prob"] = df["ms_a"].apply(safe_implied_prob) - - # Over/Under implied probabilities - df["ou25_over_prob"] = df["ou25_over"].apply(safe_implied_prob) - df["ou15_over_prob"] = df["ou15_over"].apply(safe_implied_prob) - df["ou35_over_prob"] = df["ou35_over"].apply(safe_implied_prob) - - # BTTS implied probabilities - df["btts_yes_prob"] = df["btts_yes"].apply(safe_implied_prob) - - # ----------------------------------------------------- - # CONTEXT-AWARE BUCKETS - # Create separate probability and actual columns for odds buckets - # ms_home odds: ms_h (note ms_h is the bookmaker odds for home win) - # ----------------------------------------------------- - # Helper to safe-cast to float - df['ms_h_num'] = pd.to_numeric(df['ms_h'], errors='coerce') - - # Bucket 1: Heavy Fav (odds <= 1.40) - b1_mask = df['ms_h_num'] <= 1.40 - df.loc[b1_mask, 'ms_home_heavy_fav_prob'] = df.loc[b1_mask, 'ms_home_prob'] - df.loc[b1_mask, 'ms_home_heavy_fav_actual'] = df.loc[b1_mask, 'ms_home_actual'] - - # Bucket 2: Fav (1.40 < odds <= 1.80) - b2_mask = (df['ms_h_num'] > 1.40) & (df['ms_h_num'] <= 1.80) - df.loc[b2_mask, 'ms_home_fav_prob'] = df.loc[b2_mask, 'ms_home_prob'] - df.loc[b2_mask, 'ms_home_fav_actual'] = df.loc[b2_mask, 'ms_home_actual'] - - # Bucket 3: Balanced (1.80 < odds <= 2.50) - b3_mask = (df['ms_h_num'] > 1.80) & (df['ms_h_num'] <= 2.50) - df.loc[b3_mask, 'ms_home_balanced_prob'] = df.loc[b3_mask, 'ms_home_prob'] - df.loc[b3_mask, 'ms_home_balanced_actual'] = df.loc[b3_mask, 'ms_home_actual'] - - # Bucket 4: Underdog (odds > 2.50) - b4_mask = df['ms_h_num'] > 2.50 - df.loc[b4_mask, 'ms_home_underdog_prob'] = df.loc[b4_mask, 'ms_home_prob'] - df.loc[b4_mask, 'ms_home_underdog_actual'] = df.loc[b4_mask, 'ms_home_actual'] - - return df + cur.execute(""" + SELECT + pr.match_id, + pr.payload_summary, + m.score_home, + m.score_away, + m.ht_score_home, + m.ht_score_away + FROM prediction_runs pr + JOIN matches m ON m.id = pr.match_id + WHERE pr.eventual_outcome IS NOT NULL + AND m.score_home IS NOT NULL + AND m.score_away IS NOT NULL + """) + rows = cur.fetchall() + samples: List[Dict[str, Any]] = [] + for match_id, payload, sh, sa, ht_h, ht_a in rows: + if not isinstance(payload, dict): + continue + for source_key in ("main_pick", "value_pick"): + item = payload.get(source_key) + if not isinstance(item, dict): + continue + market = str(item.get("market") or "") + pick = str(item.get("pick") or "") + # Prefer raw_confidence, fall back to calibrated_probability×100 if raw missing + raw_conf = item.get("raw_confidence") + if raw_conf is None: + cal_prob = item.get("calibrated_probability") or item.get("probability") + if cal_prob is None: + continue + raw_conf = float(cal_prob) * 100.0 + actual = resolve_actual(market, pick, sh, sa, ht_h, ht_a) + if actual is None: + continue + key = calibrator_key(market, pick) + if not key: + continue + samples.append({ + "source": f"runs.{source_key}", + "match_id": match_id, + "market": market, + "pick": pick, + "key": key, + "raw_prob": float(raw_conf) / 100.0, + "actual": int(actual), + }) + return samples # ============================================================================= -# MODEL PREDICTIONS (Optional - if you want to calibrate model outputs) +# TRAINING # ============================================================================= -def get_model_predictions( +def train_per_key( df: pd.DataFrame, - cur, -) -> pd.DataFrame: - """ - Get model predictions for each match. - - This is optional - if you want to calibrate model outputs rather than - raw odds-implied probabilities. - - TODO: Implement if needed. For now, we use odds-implied probabilities - as a proxy for model predictions. - """ - # For now, return odds-implied probabilities as "model predictions" - # In a full implementation, you would: - # 1. Load the V20 predictor - # 2. Run predictions for each match - # 3. Store raw model probabilities - - return df - - -# ============================================================================= -# MAIN TRAINING -# ============================================================================= -def train_calibration_models( - df: pd.DataFrame, - markets: List[str] = None, - min_samples: int = 100, + min_samples: int, + markets_filter: Optional[List[str]] = None, ) -> Dict[str, Any]: - """ - Train calibration models for specified markets. - - Args: - df: DataFrame with probabilities and actual outcomes - markets: List of markets to train (default: all supported) - min_samples: Minimum samples required per market - - Returns: - Dict with training results - """ - if markets is None: - markets = SUPPORTED_MARKETS - calibrator = get_calibrator() - - # Define market config: market -> (prob_col, actual_col) - market_config = { - "ms_home": ("ms_home_prob", "ms_home_actual"), - "ms_home_heavy_fav": ("ms_home_heavy_fav_prob", "ms_home_heavy_fav_actual"), - "ms_home_fav": ("ms_home_fav_prob", "ms_home_fav_actual"), - "ms_home_balanced": ("ms_home_balanced_prob", "ms_home_balanced_actual"), - "ms_home_underdog": ("ms_home_underdog_prob", "ms_home_underdog_actual"), - "ms_draw": ("ms_draw_prob", "ms_draw_actual"), - "ms_away": ("ms_away_prob", "ms_away_actual"), - "ou15": ("ou15_over_prob", "ou15_over_actual"), - "ou25": ("ou25_over_prob", "ou25_over_actual"), - "ou35": ("ou35_over_prob", "ou35_over_actual"), - "btts": ("btts_yes_prob", "btts_yes_actual"), - "ht_home": ("ht_home_prob", "ht_home_actual"), # Note: need to add ht probs - "ht_draw": ("ht_draw_prob", "ht_draw_actual"), - "ht_away": ("ht_away_prob", "ht_away_actual"), - } - - # Filter to requested markets - market_config = {k: v for k, v in market_config.items() if k in markets} - - # Train all markets - results = calibrator.train_all_markets( - df=df, - market_config=market_config, - min_samples=min_samples, - ) - + results: Dict[str, Any] = {} + keys = sorted(df["key"].unique()) + + for key in keys: + if markets_filter and key not in markets_filter: + continue + sub = df[df["key"] == key] + # Drop duplicates by (match_id, key) to avoid double-counting across sources + sub = sub.drop_duplicates(subset=["match_id", "key"], keep="first") + sub = sub.dropna(subset=["raw_prob", "actual"]) + # Clamp probabilities to (0, 1) for isotonic stability + sub = sub[(sub["raw_prob"] > 0.0) & (sub["raw_prob"] < 1.0)] + + n = len(sub) + if n < min_samples: + results[key] = { + "status": "skipped", + "samples": n, + "reason": f"need ≥{min_samples}, have {n}", + } + continue + + metrics = calibrator.train_calibration( + df=sub, + market=key, + prob_col="raw_prob", + actual_col="actual", + min_samples=min_samples, + save=True, + ) + results[key] = { + "status": "trained", + "samples": metrics.sample_count, + "brier": round(metrics.brier_score, 4), + "ece": round(metrics.calibration_error, 4), + "mean_predicted": round(metrics.mean_predicted, 4), + "mean_actual": round(metrics.mean_actual, 4), + } return results -def print_calibration_report(results: Dict[str, Any]): - """Print a formatted calibration report.""" - print("\n" + "=" * 70) +def print_report(results: Dict[str, Any], total_samples: int) -> None: + print("\n" + "=" * 78) print("CALIBRATION TRAINING REPORT") - print("=" * 70) - - print(f"\n{'Market':<15} {'Brier':<10} {'ECE':<10} {'Samples':<10} {'Status'}") - print("-" * 60) - - for market, metrics in results.items(): - status = "✓ Trained" if metrics.sample_count >= 100 else "⚠ Insufficient" - print(f"{market:<15} {metrics.brier_score:<10.4f} {metrics.calibration_error:<10.4f} " - f"{metrics.sample_count:<10} {status}") - - print("\n" + "=" * 70) - print("Interpretation:") - print(" - Brier Score: Lower is better (0 = perfect, 0.25 = random)") - print(" - ECE (Expected Calibration Error): Lower is better (0 = perfect)") - print(" - Models saved to: ai-engine/models/calibration/") - print("=" * 70) + print("=" * 78) + print(f"Total samples across all markets: {total_samples}") + print(f"\n{'market':<14} {'status':<10} {'n':<6} {'brier':<9} {'ece':<8} {'pred_avg':<9} {'actual_avg':<10}") + print("-" * 78) + for key, info in sorted(results.items()): + if info["status"] == "trained": + print( + f"{key:<14} {'✓ ok':<10} {info['samples']:<6} " + f"{info['brier']:<9.4f} {info['ece']:<8.4f} " + f"{info['mean_predicted']:<8.3f} {info['mean_actual']:<8.3f}" + ) + else: + print(f"{key:<14} {'⊘ skip':<10} {info['samples']:<6} -- {info.get('reason', '')}") + print("=" * 78) + print("Trained models saved to: ai-engine/models/calibration/") + print("Skipped markets fall back to the multiplier in market_thresholds.json.") + print("=" * 78) # ============================================================================= # CLI # ============================================================================= def main(): - parser = argparse.ArgumentParser(description="Train calibration models") - parser.add_argument("--start", type=str, default=DEFAULT_START_DATE, - help="Start date (YYYY-MM-DD)") - parser.add_argument("--end", type=str, default=DEFAULT_END_DATE, - help="End date (YYYY-MM-DD)") + parser = argparse.ArgumentParser(description="Train isotonic calibration on real data") + parser.add_argument("--min-samples", type=int, default=30, + help="Minimum samples required per market (default: 30)") parser.add_argument("--markets", nargs="+", default=None, - help="Markets to train (default: all)") - parser.add_argument("--min-samples", type=int, default=100, - help="Minimum samples per market") - parser.add_argument("--top-leagues-only", action="store_true", - help="Only use top leagues data") - + help="Limit to specific calibrator keys (e.g., ms_home ou25)") args = parser.parse_args() - - print(f"\n[Calibration Training] {args.start} to {args.end}") - - # Load top leagues if requested - league_ids = None - if args.top_leagues_only: - league_ids = load_top_league_ids() - print(f"[Data] Filtering to {len(league_ids)} top leagues") - - # Fetch data + conn = get_conn() cur = conn.cursor() - try: - df = fetch_training_data(cur, args.start, args.end, league_ids) - - if len(df) == 0: - print("[Error] No data found for the specified date range") + s1 = fetch_predictions_with_outcomes(cur) + s2 = fetch_prediction_runs_with_outcomes(cur) + print(f"[Data] predictions table: {len(s1)} samples") + print(f"[Data] prediction_runs: {len(s2)} samples") + all_samples = s1 + s2 + if not all_samples: + print("[Error] No training samples available") return - - # Calculate outcomes and probabilities - df = calculate_actual_outcomes(df) - df = calculate_implied_probabilities(df) - - # Train models - results = train_calibration_models( - df=df, - markets=args.markets, - min_samples=args.min_samples, - ) - - # Print report - print_calibration_report(results) - + df = pd.DataFrame(all_samples) + print(f"[Data] Combined: {len(df)} samples") + print(f"[Data] Unique matches: {df['match_id'].nunique()}") + print(f"[Data] Per-key counts:") + for key, count in df["key"].value_counts().items(): + print(f" {key:<14} {count}") + + results = train_per_key(df, args.min_samples, args.markets) + print_report(results, total_samples=len(df)) finally: cur.close() conn.close() diff --git a/ai-engine/services/betting_brain.py b/ai-engine/services/betting_brain.py index 61e7774..737edab 100644 --- a/ai-engine/services/betting_brain.py +++ b/ai-engine/services/betting_brain.py @@ -19,6 +19,10 @@ class BettingBrain: SOFT_DIVERGENCE = 0.14 EXTREME_MODEL_PROB = 0.85 EXTREME_GAP = 0.30 + # Vetoes that is_value_sniper bypasses (does NOT bypass odds_below_minimum) + SNIPER_BYPASSABLE_VETOES = {"calibrated_confidence_too_low", "play_score_too_low"} + # Trap market: market implied probability massively exceeds historical band hit rate + TRAP_MARKET_GAP = 0.10 MARKET_PRIORS = { "DC": 4.0, @@ -59,8 +63,13 @@ class BettingBrain: row for row in judged_rows.values() if row.get("betting_brain", {}).get("action") == "WATCH" ] + no_value = [ + row for row in judged_rows.values() + if row.get("betting_brain", {}).get("action") == "WATCH_NO_VALUE" + ] approved.sort(key=self._candidate_sort_key, reverse=True) watchlist.sort(key=self._candidate_sort_key, reverse=True) + no_value.sort(key=self._candidate_sort_key, reverse=True) original_main = guarded.get("main_pick") or {} main_pick = None @@ -78,6 +87,13 @@ class BettingBrain: self._force_no_bet(main_pick, "betting_brain_watchlist") decision = "WATCHLIST" decision_reason = main_pick.get("betting_brain", {}).get("summary", "Interesting but not clean enough.") + elif no_value: + # B-1: model agrees with a low-odds market — surface it so the user + # sees the read, but explicitly mark as not-playable. + main_pick = dict(no_value[0]) + self._force_no_bet(main_pick, "betting_brain_no_value_odds_below_minimum") + decision = "WATCH_NO_VALUE" + decision_reason = "Model favoriyle hemfikir ama oran bahis için çok düşük — bilgi amaçlı gösteriliyor." elif original_main: main_pick = dict(judged_rows.get(self._row_key(original_main), original_main)) self._force_no_bet(main_pick, "betting_brain_no_safe_pick") @@ -103,7 +119,7 @@ class BettingBrain: playable = decision == "BET" and bool(main_pick and main_pick.get("playable")) advice = dict(guarded.get("bet_advice") or {}) advice["playable"] = playable - advice["suggested_stake_units"] = float(main_pick.get("stake_units", 0.0)) if playable else 0.0 + advice["suggested_stake_units"] = float(main_pick.get("stake_units", 0.0)) if playable and main_pick else 0.0 advice["reason"] = "betting_brain_approved" if playable else "betting_brain_no_bet" advice["decision"] = decision advice["confidence_band"] = self._decision_band(main_pick) @@ -199,6 +215,23 @@ class BettingBrain: score += 11.0 positives.append("v25_v27_aligned") + # Trap market detection: market overpriced vs historical band hit rate + trap_market_flag = False + trap_market_gap = None + if isinstance(triple, dict): + band_rate_val = self._safe_float(triple.get("band_rate")) + implied_val = self._safe_float(triple.get("implied_prob")) + if ( + band_rate_val is not None + and implied_val is not None + and band_sample >= self.MIN_BAND_SAMPLE + and (implied_val - band_rate_val) > self.TRAP_MARKET_GAP + ): + trap_market_flag = True + trap_market_gap = round(implied_val - band_rate_val, 4) + score -= 14.0 + issues.append("trap_market_market_overpriced") + if isinstance(triple, dict): if triple_is_value: score += 18.0 @@ -240,10 +273,28 @@ class BettingBrain: if market in {"HT", "HTFT", "OE"} and score < 86.0 and not is_value_sniper: vetoes.append("volatile_market_requires_exceptional_evidence") + # Sniper override: bypass eligible vetoes when value sniper triggered + sniper_bypassed: List[str] = [] + if is_value_sniper and vetoes: + remaining = [] + for v in vetoes: + if v in self.SNIPER_BYPASSABLE_VETOES: + sniper_bypassed.append(v) + else: + remaining.append(v) + vetoes = remaining + if sniper_bypassed: + positives.append("sniper_bypassed_soft_vetoes") + score = max(0.0, min(100.0, score)) action = "BET" if vetoes: - action = "REJECT" + # B-1: when only veto is odds_below_minimum, switch to WATCH_NO_VALUE + # so user still sees model commentary instead of blank rejection. + if vetoes == ["odds_below_minimum"]: + action = "WATCH_NO_VALUE" + else: + action = "REJECT" elif score < self.MIN_WATCH_SCORE and not is_value_sniper: action = "REJECT" elif score < self.MIN_BET_SCORE and not is_value_sniper: @@ -256,6 +307,9 @@ class BettingBrain: "positives": positives[:5], "issues": issues[:6], "vetoes": vetoes[:6], + "sniper_bypassed": sniper_bypassed, + "trap_market_flag": trap_market_flag, + "trap_market_gap": trap_market_gap, "model_prob": round(model_prob, 4) if model_prob is not None else None, "implied_prob": round(implied, 4), "model_market_gap": round(model_gap, 4) if model_gap is not None else None, @@ -290,9 +344,59 @@ class BettingBrain: if isinstance(item, dict) and item.get("market"): key = self._row_key(item) rows[key] = self._merge_row(rows.get(key), item) - + + # B-2: ensure both MS sides (and DC sides) have an entry — give user the + # model's read on the opposite outcome even when upstream filtered it out. + self._inject_reference_rows(rows, package) + return list(rows.values()) + def _inject_reference_rows( + self, + rows: Dict[str, Dict[str, Any]], + package: Dict[str, Any], + ) -> None: + market_board = package.get("market_board") or {} + ms_board = market_board.get("MS") if isinstance(market_board, dict) else None + if not isinstance(ms_board, dict): + return + probs = ms_board.get("probs") if isinstance(ms_board.get("probs"), dict) else {} + if not probs: + return + + # Pull MS odds from any existing MS row to estimate the missing side's odds + existing_odds_by_pick: Dict[str, float] = {} + for row in rows.values(): + if str(row.get("market")) == "MS": + pick = str(row.get("pick")) + odd = self._safe_float(row.get("odds"), 0.0) or 0.0 + if pick and odd > 1.0: + existing_odds_by_pick[pick] = odd + + for pick in ("1", "X", "2"): + key = f"MS:{pick}" + if key in rows: + continue + prob = self._safe_float(probs.get(pick), 0.0) + if prob is None or prob <= 0.0: + continue + implied_odd = round(1.0 / prob, 2) if prob > 0.01 else 0.0 + ref_odd = existing_odds_by_pick.get(pick) or implied_odd + rows[key] = { + "market": "MS", + "pick": pick, + "probability": round(prob, 4), + "confidence": round(prob * 100.0, 1), + "raw_confidence": round(prob * 100.0, 1), + "calibrated_confidence": round(prob * 100.0, 1), + "odds": ref_odd, + "is_underdog_reference": True, + "playable": False, + "stake_units": 0.0, + "bet_grade": "PASS", + "decision_reasons": ["underdog_reference_for_completeness"], + } + @staticmethod def _merge_row(existing: Optional[Dict[str, Any]], incoming: Dict[str, Any]) -> Dict[str, Any]: if existing is None: @@ -331,6 +435,7 @@ class BettingBrain: "odds_reliability": row.get("odds_reliability", 0.35), "odds": row.get("odds", 0.0), "reasons": reasons[:6], + "is_underdog_reference": bool(row.get("is_underdog_reference")), "betting_brain": row.get("betting_brain"), } @@ -409,6 +514,8 @@ class BettingBrain: return f"{market} {pick} approved: evidence is aligned enough for a controlled stake." if action == "WATCH": return f"{market} {pick} is interesting but not clean enough for stake." + if action == "WATCH_NO_VALUE": + return f"{market} {pick}: model favoriyle hemfikir, fakat oran ({', '.join(vetoes[:1]) or 'düşük'}) bahis için yetersiz." if vetoes: return f"{market} {pick} rejected: {', '.join(vetoes[:3])}." if issues: diff --git a/ai-engine/services/feature_enrichment.py b/ai-engine/services/feature_enrichment.py index 153b94f..99ff957 100644 --- a/ai-engine/services/feature_enrichment.py +++ b/ai-engine/services/feature_enrichment.py @@ -248,8 +248,8 @@ class FeatureEnrichmentService: away_team_venue_total = 0 for row in rows: - sh = int(row['score_home']) - sa = int(row['score_away']) + sh = int(row['score_home'] or 0) + sa = int(row['score_away'] or 0) match_goals = sh + sa total_goals += match_goals @@ -284,13 +284,13 @@ class FeatureEnrichmentService: if total >= 6: recent_5_wins = sum( 1 for r in rows[:5] - if (str(r['home_team_id']) == home_team_id and int(r['score_home']) > int(r['score_away'])) - or (str(r['home_team_id']) != home_team_id and int(r['score_away']) > int(r['score_home'])) + if (str(r['home_team_id']) == home_team_id and int(r['score_home'] or 0) > int(r['score_away'] or 0)) + or (str(r['home_team_id']) != home_team_id and int(r['score_away'] or 0) > int(r['score_home'] or 0)) ) older_5_wins = sum( 1 for r in rows[-5:] - if (str(r['home_team_id']) == home_team_id and int(r['score_home']) > int(r['score_away'])) - or (str(r['home_team_id']) != home_team_id and int(r['score_away']) > int(r['score_home'])) + if (str(r['home_team_id']) == home_team_id and int(r['score_home'] or 0) > int(r['score_away'] or 0)) + or (str(r['home_team_id']) != home_team_id and int(r['score_away'] or 0) > int(r['score_home'] or 0)) ) recent_trend = (recent_5_wins - older_5_wins) / 5.0 @@ -302,6 +302,12 @@ class FeatureEnrichmentService: - away_team_venue_wins / away_team_venue_total ) + if total == 0: + return dict(self._DEFAULT_H2H) + if total == 0: + return dict(self._DEFAULT_H2H) + if total == 0: + return dict(self._DEFAULT_H2H) return { 'total_matches': total, 'home_win_rate': home_wins / total, @@ -366,8 +372,8 @@ class FeatureEnrichmentService: for row in rows: is_home = str(row['home_team_id']) == team_id - goals_for = int(row['score_home'] if is_home else row['score_away']) - goals_against = int(row['score_away'] if is_home else row['score_home']) + goals_for = int((row['score_home'] if is_home else row['score_away']) or 0) + goals_against = int((row['score_away'] if is_home else row['score_home']) or 0) if goals_against == 0: clean_sheets += 1 @@ -390,6 +396,15 @@ class FeatureEnrichmentService: else: streak_broken_u = True + if total == 0: + return {'clean_sheet_rate': 0.25, 'scoring_rate': 0.75, + 'winning_streak': 0, 'unbeaten_streak': 0} + if total == 0: + return {'clean_sheet_rate': 0.25, 'scoring_rate': 0.75, + 'winning_streak': 0, 'unbeaten_streak': 0} + if total == 0: + return {'clean_sheet_rate': 0.25, 'scoring_rate': 0.75, + 'winning_streak': 0, 'unbeaten_streak': 0} return { 'clean_sheet_rate': clean_sheets / total, 'scoring_rate': scored_count / total, @@ -433,8 +448,8 @@ class FeatureEnrichmentService: match_ids = [] for row in rows: - sh = int(row['score_home']) - sa = int(row['score_away']) + sh = int(row['score_home'] or 0) + sa = int(row['score_away'] or 0) total_goals += sh + sa if sh > sa: home_wins += 1 @@ -464,6 +479,12 @@ class FeatureEnrichmentService: pass # home_bias: (actual home win rate) - 0.46 (league average ~46%) + if total == 0: + return dict(self._DEFAULT_REFEREE) + if total == 0: + return dict(self._DEFAULT_REFEREE) + if total == 0: + return dict(self._DEFAULT_REFEREE) home_bias = (home_wins / total) - 0.46 return { @@ -633,8 +654,8 @@ class FeatureEnrichmentService: over25_count = 0 for row in rows: - sh = int(row['score_home']) - sa = int(row['score_away']) + sh = int(row['score_home'] or 0) + sa = int(row['score_away'] or 0) match_goals = sh + sa total_goals += match_goals if match_goals == 0: @@ -828,8 +849,8 @@ class FeatureEnrichmentService: goals = [] conceded_list = [] for row in rows: - sh = int(row['score_home']) - sa = int(row['score_away']) + sh = int(row['score_home'] or 0) + sa = int(row['score_away'] or 0) if is_home: goals.append(sh) conceded_list.append(sa) diff --git a/ai-engine/services/match_commentary.py b/ai-engine/services/match_commentary.py index a54018c..4d7c311 100644 --- a/ai-engine/services/match_commentary.py +++ b/ai-engine/services/match_commentary.py @@ -58,6 +58,7 @@ def generate_match_commentary(package: Dict[str, Any]) -> Dict[str, Any]: summary = _build_summary( action, main_pick, market_board, v27_engine, score_pred, risk, data_quality, home, away, + match_info=match_info, ) # ── Quick notes ─────────────────────────────────────────────── @@ -117,22 +118,35 @@ def _build_summary( data_quality: Dict[str, Any], home: str, away: str, + match_info: Optional[Dict[str, Any]] = None, ) -> str: parts: List[str] = [] + # C-2: live-aware preamble — if the match is in play, lead with current score + # vs the pre-match read so users immediately see how the prediction is faring. + match_info = match_info or {} + if match_info.get("is_live"): + cur_home = match_info.get("current_score_home") + cur_away = match_info.get("current_score_away") + if cur_home is not None and cur_away is not None: + parts.append( + f"🔴 CANLI: {home} {cur_home} - {cur_away} {away} " + f"(aşağıdaki analiz maç öncesi tahmindir)" + ) + # Who is the favourite? ms_board = market_board.get("MS") or {} ms_pick = ms_board.get("pick", "") ms_conf = float(ms_board.get("confidence", 50) or 50) - if ms_pick == "1" and ms_conf > 45: - parts.append(f"{home} hafif favori görünüyor") - elif ms_pick == "1" and ms_conf > 55: + if ms_pick == "1" and ms_conf > 55: parts.append(f"{home} net favori") - elif ms_pick == "2" and ms_conf > 45: - parts.append(f"{away} hafif favori görünüyor") + elif ms_pick == "1" and ms_conf > 45: + parts.append(f"{home} hafif favori görünüyor") elif ms_pick == "2" and ms_conf > 55: parts.append(f"{away} net favori") + elif ms_pick == "2" and ms_conf > 45: + parts.append(f"{away} hafif favori görünüyor") else: parts.append("İki takım da birbirine yakın güçte") @@ -262,6 +276,26 @@ def _detect_contradictions( triple_value = v27_engine.get("triple_value") or {} predictions = v27_engine.get("predictions") or {} + # C-2 live-vs-prediction mismatch + match_info = package.get("match_info") or {} + if match_info.get("is_live"): + cur_h = match_info.get("current_score_home") + cur_a = match_info.get("current_score_away") + ms_board_live = market_board.get("MS") or {} + predicted_pick = str(ms_board_live.get("pick") or "") + if cur_h is not None and cur_a is not None: + actual_pick: Optional[str] = None + if cur_h > cur_a: + actual_pick = "1" + elif cur_a > cur_h: + actual_pick = "2" + else: + actual_pick = "X" + if predicted_pick and actual_pick and predicted_pick != actual_pick: + contradictions.append( + "Canlı durum maç öncesi tahmin ile çelişiyor — sürpriz GERÇEKLEŞİYOR" + ) + # MS contradiction: model says home but triple_value says away has value ms_preds = predictions.get("ms") or {} ms_home = float(ms_preds.get("home", 0) or 0) diff --git a/ai-engine/services/single_match_orchestrator.py b/ai-engine/services/single_match_orchestrator.py index f6052fe..68ea026 100755 --- a/ai-engine/services/single_match_orchestrator.py +++ b/ai-engine/services/single_match_orchestrator.py @@ -21,7 +21,7 @@ import pandas as pd import numpy as np from collections import defaultdict from dataclasses import dataclass -from typing import Any, Dict, List, Optional, Set, Tuple +from typing import Any, Dict, List, Optional, Set, Tuple, overload import psycopg2 from psycopg2.extras import RealDictCursor @@ -32,11 +32,14 @@ from models.v25_ensemble import V25Predictor, get_v25_predictor try: from models.v27_predictor import V27Predictor, compute_divergence, compute_value_edge except ImportError: - V27Predictor = None + class V27Predictor: + def __init__(self): self.models = {} + def load_models(self): return False + def predict_all(self, features): return {} def compute_divergence(*args, **kwargs): - return 0.0 + return {} def compute_value_edge(*args, **kwargs): - return 0.0 + return {} from features.odds_band_analyzer import OddsBandAnalyzer try: from models.basketball_v25 import ( @@ -45,7 +48,7 @@ try: ) except ImportError: BasketballMatchPrediction = Any - def get_basketball_v25_predictor(): + def get_basketball_v25_predictor() -> Any: raise ImportError("Basketball predictor is not available") from core.engines.player_predictor import PlayerPrediction, get_player_predictor from services.feature_enrichment import FeatureEnrichmentService @@ -55,6 +58,7 @@ from services.match_commentary import generate_match_commentary from utils.top_leagues import load_top_league_ids from utils.league_reliability import load_league_reliability from config.config_loader import build_threshold_dict, get_threshold_default +from models.calibration import get_calibrator @dataclass @@ -160,6 +164,7 @@ class SingleMatchOrchestrator: def __init__(self) -> None: self.v25_predictor: Optional[V25Predictor] = None self.v26_shadow_engine: Optional[V26ShadowEngine] = None + self._v27: Optional[V27Predictor] = None self.basketball_predictor: Optional[Any] = None self.dsn = get_clean_dsn() self.engine_mode = str(os.getenv("AI_ENGINE_MODE", "v28-pro-max")).strip().lower() @@ -188,7 +193,7 @@ class SingleMatchOrchestrator: return self.v25_predictor def _get_v26_shadow_engine(self) -> V26ShadowEngine: - if getattr(self, "v26_shadow_engine", None) is None: + if not hasattr(self, "v26_shadow_engine") or self.v26_shadow_engine is None: self.v26_shadow_engine = get_v26_shadow_engine() return self.v26_shadow_engine @@ -259,9 +264,9 @@ class SingleMatchOrchestrator: Build the single authoritative V25 pre-match feature vector. """ odds = self._sanitize_v25_odds(data.odds_data or {}) - ms_h = float(odds.get('ms_h', 0)) - ms_d = float(odds.get('ms_d', 0)) - ms_a = float(odds.get('ms_a', 0)) + ms_h = float(odds.get('ms_h') or 0) + ms_d = float(odds.get('ms_d') or 0) + ms_a = float(odds.get('ms_a') or 0) # Implied probabilities (vig-normalised) implied_home, implied_draw, implied_away = 0.33, 0.33, 0.33 @@ -385,23 +390,23 @@ class SingleMatchOrchestrator: 'odds_ms_h_present': 1.0 if ms_h > 1.01 else 0.0, 'odds_ms_d_present': 1.0 if ms_d > 1.01 else 0.0, 'odds_ms_a_present': 1.0 if ms_a > 1.01 else 0.0, - 'odds_ht_ms_h_present': 1.0 if float(odds.get('ht_h', 0)) > 1.01 else 0.0, - 'odds_ht_ms_d_present': 1.0 if float(odds.get('ht_d', 0)) > 1.01 else 0.0, - 'odds_ht_ms_a_present': 1.0 if float(odds.get('ht_a', 0)) > 1.01 else 0.0, - 'odds_ou05_o_present': 1.0 if float(odds.get('ou05_o', 0)) > 1.01 else 0.0, - 'odds_ou05_u_present': 1.0 if float(odds.get('ou05_u', 0)) > 1.01 else 0.0, - 'odds_ou15_o_present': 1.0 if float(odds.get('ou15_o', 0)) > 1.01 else 0.0, - 'odds_ou15_u_present': 1.0 if float(odds.get('ou15_u', 0)) > 1.01 else 0.0, - 'odds_ou25_o_present': 1.0 if float(odds.get('ou25_o', 0)) > 1.01 else 0.0, - 'odds_ou25_u_present': 1.0 if float(odds.get('ou25_u', 0)) > 1.01 else 0.0, - 'odds_ou35_o_present': 1.0 if float(odds.get('ou35_o', 0)) > 1.01 else 0.0, - 'odds_ou35_u_present': 1.0 if float(odds.get('ou35_u', 0)) > 1.01 else 0.0, - 'odds_ht_ou05_o_present': 1.0 if float(odds.get('ht_ou05_o', 0)) > 1.01 else 0.0, - 'odds_ht_ou05_u_present': 1.0 if float(odds.get('ht_ou05_u', 0)) > 1.01 else 0.0, - 'odds_ht_ou15_o_present': 1.0 if float(odds.get('ht_ou15_o', 0)) > 1.01 else 0.0, - 'odds_ht_ou15_u_present': 1.0 if float(odds.get('ht_ou15_u', 0)) > 1.01 else 0.0, - 'odds_btts_y_present': 1.0 if float(odds.get('btts_y', 0)) > 1.01 else 0.0, - 'odds_btts_n_present': 1.0 if float(odds.get('btts_n', 0)) > 1.01 else 0.0, + 'odds_ht_ms_h_present': 1.0 if float(odds.get('ht_h') or 0) > 1.01 else 0.0, + 'odds_ht_ms_d_present': 1.0 if float(odds.get('ht_d') or 0) > 1.01 else 0.0, + 'odds_ht_ms_a_present': 1.0 if float(odds.get('ht_a') or 0) > 1.01 else 0.0, + 'odds_ou05_o_present': 1.0 if float(odds.get('ou05_o') or 0) > 1.01 else 0.0, + 'odds_ou05_u_present': 1.0 if float(odds.get('ou05_u') or 0) > 1.01 else 0.0, + 'odds_ou15_o_present': 1.0 if float(odds.get('ou15_o') or 0) > 1.01 else 0.0, + 'odds_ou15_u_present': 1.0 if float(odds.get('ou15_u') or 0) > 1.01 else 0.0, + 'odds_ou25_o_present': 1.0 if float(odds.get('ou25_o') or 0) > 1.01 else 0.0, + 'odds_ou25_u_present': 1.0 if float(odds.get('ou25_u') or 0) > 1.01 else 0.0, + 'odds_ou35_o_present': 1.0 if float(odds.get('ou35_o') or 0) > 1.01 else 0.0, + 'odds_ou35_u_present': 1.0 if float(odds.get('ou35_u') or 0) > 1.01 else 0.0, + 'odds_ht_ou05_o_present': 1.0 if float(odds.get('ht_ou05_o') or 0) > 1.01 else 0.0, + 'odds_ht_ou05_u_present': 1.0 if float(odds.get('ht_ou05_u') or 0) > 1.01 else 0.0, + 'odds_ht_ou15_o_present': 1.0 if float(odds.get('ht_ou15_o') or 0) > 1.01 else 0.0, + 'odds_ht_ou15_u_present': 1.0 if float(odds.get('ht_ou15_u') or 0) > 1.01 else 0.0, + 'odds_btts_y_present': 1.0 if float(odds.get('btts_y') or 0) > 1.01 else 0.0, + 'odds_btts_n_present': 1.0 if float(odds.get('btts_n') or 0) > 1.01 else 0.0, } # ── Calendar features (V27) ── @@ -476,23 +481,23 @@ class SingleMatchOrchestrator: 'implied_home': implied_home, 'implied_draw': implied_draw, 'implied_away': implied_away, - 'odds_ht_ms_h': float(odds.get('ht_h', 0)), - 'odds_ht_ms_d': float(odds.get('ht_d', 0)), - 'odds_ht_ms_a': float(odds.get('ht_a', 0)), - 'odds_ou05_o': float(odds.get('ou05_o', 0)), - 'odds_ou05_u': float(odds.get('ou05_u', 0)), - 'odds_ou15_o': float(odds.get('ou15_o', 0)), - 'odds_ou15_u': float(odds.get('ou15_u', 0)), - 'odds_ou25_o': float(odds.get('ou25_o', 0)), - 'odds_ou25_u': float(odds.get('ou25_u', 0)), - 'odds_ou35_o': float(odds.get('ou35_o', 0)), - 'odds_ou35_u': float(odds.get('ou35_u', 0)), - 'odds_ht_ou05_o': float(odds.get('ht_ou05_o', 0)), - 'odds_ht_ou05_u': float(odds.get('ht_ou05_u', 0)), - 'odds_ht_ou15_o': float(odds.get('ht_ou15_o', 0)), - 'odds_ht_ou15_u': float(odds.get('ht_ou15_u', 0)), - 'odds_btts_y': float(odds.get('btts_y', 0)), - 'odds_btts_n': float(odds.get('btts_n', 0)), + 'odds_ht_ms_h': float(odds.get('ht_h') or 0), + 'odds_ht_ms_d': float(odds.get('ht_d') or 0), + 'odds_ht_ms_a': float(odds.get('ht_a') or 0), + 'odds_ou05_o': float(odds.get('ou05_o') or 0), + 'odds_ou05_u': float(odds.get('ou05_u') or 0), + 'odds_ou15_o': float(odds.get('ou15_o') or 0), + 'odds_ou15_u': float(odds.get('ou15_u') or 0), + 'odds_ou25_o': float(odds.get('ou25_o') or 0), + 'odds_ou25_u': float(odds.get('ou25_u') or 0), + 'odds_ou35_o': float(odds.get('ou35_o') or 0), + 'odds_ou35_u': float(odds.get('ou35_u') or 0), + 'odds_ht_ou05_o': float(odds.get('ht_ou05_o') or 0), + 'odds_ht_ou05_u': float(odds.get('ht_ou05_u') or 0), + 'odds_ht_ou15_o': float(odds.get('ht_ou15_o') or 0), + 'odds_ht_ou15_u': float(odds.get('ht_ou15_u') or 0), + 'odds_btts_y': float(odds.get('btts_y') or 0), + 'odds_btts_n': float(odds.get('btts_n') or 0), **odds_presence, # League (9 — original 2 + V27 expanded 5 + xga 2) 'home_xga': xga_home, @@ -584,15 +589,15 @@ class SingleMatchOrchestrator: sidelined_data=data.sidelined_data, ) result = { - 'home_squad_quality': float(pred.home_squad_quality), - 'away_squad_quality': float(pred.away_squad_quality), - 'squad_diff': float(pred.squad_diff), - 'home_key_players': float(pred.home_key_players), - 'away_key_players': float(pred.away_key_players), - 'home_missing_impact': float(pred.home_missing_impact), - 'away_missing_impact': float(pred.away_missing_impact), - 'home_goals_form': float(pred.home_goals_form), - 'away_goals_form': float(pred.away_goals_form), + 'home_squad_quality': float(pred.home_squad_quality or 0.0), + 'away_squad_quality': float(pred.away_squad_quality or 0.0), + 'squad_diff': float(pred.squad_diff or 0.0), + 'home_key_players': float(pred.home_key_players or 0), + 'away_key_players': float(pred.away_key_players or 0), + 'home_missing_impact': float(pred.home_missing_impact or 0.0), + 'away_missing_impact': float(pred.away_missing_impact or 0.0), + 'home_goals_form': float(pred.home_goals_form or 0.0), + 'away_goals_form': float(pred.away_goals_form or 0.0), } # Sanity check: squad_quality must be in training range (~3-36) for side in ('home', 'away'): @@ -691,7 +696,7 @@ class SingleMatchOrchestrator: # V34: Apply temperature scaling — reduced from 2.5 to 1.5 scaled_probs = _temperature_scale(probs_dict, temperature=1.5) - best_label = max(scaled_probs, key=scaled_probs.get) + best_label = max(scaled_probs, key=scaled_probs.__getitem__) best_prob = float(scaled_probs[best_label]) return { "probs": scaled_probs, @@ -726,7 +731,7 @@ class SingleMatchOrchestrator: ("handicap_ms", {"1": 0, "X": 1, "2": 2}), ("odd_even", {"Odd": 0, "Even": None}), ]: - out_key = self._V25_KEY_MAP.get(model_key, model_key.upper()) + out_key = str(self._V25_KEY_MAP.get(model_key, model_key.upper())) if not v25.has_market(model_key): continue raw = v25.predict_market(model_key, feature_row) @@ -793,7 +798,9 @@ class SingleMatchOrchestrator: @staticmethod def _best_prob_pick(prob_map: Dict[str, float]) -> Tuple[str, float]: - pick = max(prob_map, key=prob_map.get) + if not prob_map: + return "", 0.0 + pick = max(prob_map, key=prob_map.__getitem__) return pick, float(prob_map[pick]) @staticmethod @@ -919,15 +926,15 @@ class SingleMatchOrchestrator: prediction.predicted_ht_score = f"{int(round(ht_home_xg))}-{int(round(ht_away_xg))}" else: # Heuristic fallback (original formula) - base_home_xg = max(0.25, (float(data.home_goals_avg) + float(features.get("away_xga", data.away_conceded_avg))) / 2.0) - base_away_xg = max(0.25, (float(data.away_goals_avg) + float(features.get("home_xga", data.home_conceded_avg))) / 2.0) + base_home_xg = max(0.25, (float(data.home_goals_avg or 1.3) + float(features.get("away_xga", data.away_conceded_avg) or 1.2)) / 2.0) + base_away_xg = max(0.25, (float(data.away_goals_avg or 1.3) + float(features.get("home_xga", data.home_conceded_avg) or 1.2)) / 2.0) # ms_edge already computed above total_target = max( 1.4, min( 4.8, (float(features.get("league_avg_goals", 2.7)) * 0.55) - + ((float(data.home_goals_avg) + float(data.away_goals_avg)) * 0.45) + + ((float(data.home_goals_avg or 1.3) + float(data.away_goals_avg or 1.3)) * 0.45) + ((prediction.over_25_prob - prediction.under_25_prob) * 1.15), ), ) @@ -985,10 +992,14 @@ class SingleMatchOrchestrator: prediction.surprise_score = surprise["score"] prediction.surprise_comment = surprise["comment"] prediction.surprise_reasons = surprise["reasons"] + prediction.surprise_breakdown = surprise.get("breakdown", []) + # Auto-flag is_surprise_risk when score crosses 45 even if other paths didn't fire + if surprise["score"] >= 45.0: + prediction.is_surprise_risk = True prediction.team_confidence = round(max(35.0, min(95.0, 45.0 + (abs(ms_edge) * 85.0) + (abs(float(features.get("form_elo_diff", 0.0))) / 40.0))), 1) prediction.player_confidence = round(max(20.0, min(95.0, 38.0 + (float(features.get("home_key_players", 0.0)) + float(features.get("away_key_players", 0.0))) * 2.0 - (float(features.get("home_missing_impact", 0.0)) + float(features.get("away_missing_impact", 0.0))) * 22.0)), 1) - prediction.odds_confidence = round(max(30.0, min(95.0, np.mean([prediction.ms_confidence, prediction.ou25_confidence, prediction.btts_confidence]))), 1) + prediction.odds_confidence = round(max(30.0, min(95.0, float(np.mean([prediction.ms_confidence, prediction.ou25_confidence, prediction.btts_confidence])))), 1) prediction.referee_confidence = 62.0 if data.referee_name else 35.0 prediction.total_cards_pred = 4.8 if prediction.cards_over_prob >= prediction.cards_under_prob else 4.1 @@ -1333,9 +1344,9 @@ class SingleMatchOrchestrator: ), } - # ── Band-only value for new markets ─────────────────── + _odds_data = data.odds_data or {} def _band_value(label, band_rate, odds_key, sample): - o = float((data.odds_data or {}).get(odds_key, 0)) + o = float(_odds_data.get(odds_key, 0)) imp = (1.0 / o) if o > 1.0 else 0.50 e = band_rate - imp conf = band_rate > imp @@ -1423,7 +1434,7 @@ class SingleMatchOrchestrator: # Boost confidence when V27 agrees with V25 if v27_ms: - v27_best = max(v27_ms, key=v27_ms.get) + v27_best = max(v27_ms, key=v27_ms.__getitem__) v25_best_map = {"1": "home", "X": "draw", "2": "away"} v25_best_mapped = v25_best_map.get(prediction.ms_pick, "") if v27_best == v25_best_mapped: @@ -1703,10 +1714,7 @@ class SingleMatchOrchestrator: prob_key = self._upper_brain_prob_key(market, pick) if prob_key is None: return None - try: - return float(probs.get(prob_key)) - except (TypeError, ValueError): - return None + return self._safe_float(probs.get(prob_key)) def _upper_brain_v27_probability( self, @@ -1719,7 +1727,8 @@ class SingleMatchOrchestrator: ou25 = predictions.get("ou25") or {} if market == "MS": - return self._safe_float(ms.get({"1": "home", "X": "draw", "2": "away"}.get(pick, ""))) + ms_key = {"1": "home", "X": "draw", "2": "away"}.get(pick or "") + return self._safe_float(ms.get(ms_key), 0.0) if ms_key else 0.0 if market == "DC": if pick == "1X": return self._safe_float(ms.get("home"), 0.0) + self._safe_float(ms.get("draw"), 0.0) @@ -1729,8 +1738,8 @@ class SingleMatchOrchestrator: return self._safe_float(ms.get("home"), 0.0) + self._safe_float(ms.get("away"), 0.0) if market == "OU25": prob_key = self._upper_brain_prob_key(market, pick) - return self._safe_float(ou25.get(prob_key)) if prob_key else None - return None + return self._safe_float(ou25.get(prob_key), 0.0) if prob_key else 0.0 + return 0.0 @staticmethod def _upper_brain_prob_key(market: str, pick: str) -> Optional[str]: @@ -1780,6 +1789,12 @@ class SingleMatchOrchestrator: return f"htft_{pick.replace('/', '').lower()}" return None + @staticmethod + @overload + def _safe_float(value: Any, default: float) -> float: ... + @staticmethod + @overload + def _safe_float(value: Any, default: None = ...) -> Optional[float]: ... @staticmethod def _safe_float(value: Any, default: Optional[float] = None) -> Optional[float]: try: @@ -2259,7 +2274,7 @@ class SingleMatchOrchestrator: "rejected_matches": rejected, } - def get_daily_bankers(self, count: int = 3) -> List[Dict[str, Any]]: + def get_daily_bankers_live(self, count: int = 3) -> List[Dict[str, Any]]: with psycopg2.connect(self.dsn) as conn: with conn.cursor(cursor_factory=RealDictCursor) as cur: cur.execute( @@ -2336,7 +2351,8 @@ class SingleMatchOrchestrator: away_id = str(row["away_team_id"]) team_ids.add(home_id) team_ids.add(away_id) - pair_keys.add(tuple(sorted((home_id, away_id)))) + h, a = sorted((home_id, away_id)) + pair_keys.add((h, a)) team_cycle = self._fetch_team_reversal_cycle_metrics(cur, team_ids, now_ms) h2h_ctx = self._fetch_h2h_reversal_context(cur, pair_keys, now_ms) @@ -2399,7 +2415,8 @@ class SingleMatchOrchestrator: ) cycle_bonus = cycle_pressure * 10.0 - pair_key = tuple(sorted((data.home_team_id, data.away_team_id))) + h, a = sorted((data.home_team_id, data.away_team_id)) + pair_key = (h, a) pair_ctx = h2h_ctx.get(pair_key, {}) blowout_bonus = 0.0 last_diff = int(pair_ctx.get("goal_diff", 0)) @@ -2665,7 +2682,8 @@ class SingleMatchOrchestrator: for row in rows: home_id = str(row["home_team_id"]) away_id = str(row["away_team_id"]) - key = tuple(sorted((home_id, away_id))) + h, a = sorted((home_id, away_id)) + key = (h, a) if key not in pair_keys or key in out: continue @@ -2771,12 +2789,12 @@ class SingleMatchOrchestrator: lineup_confidence=lineup_confidence, source_table=str(row.get("source_table") or "matches"), current_score_home=( - int(row.get("score_home")) + int(str(row.get("score_home"))) if row.get("score_home") is not None else None ), current_score_away=( - int(row.get("score_away")) + int(str(row.get("score_away"))) if row.get("score_away") is not None else None ), @@ -2900,7 +2918,7 @@ class SingleMatchOrchestrator: (row["match_id"],), ) relational_rows = cur.fetchall() - rel_odds = self._parse_relational_odds(relational_rows) + rel_odds = self._parse_relational_odds([dict(r) for r in relational_rows]) if rel_odds: for key, value in rel_odds.items(): odds_data.setdefault(key, value) @@ -3952,6 +3970,18 @@ class SingleMatchOrchestrator: "league": data.league_name, "match_date_ms": data.match_date_ms, "sport": data.sport, + # Live snapshot — match_commentary uses this to detect upset-in-progress + "status": data.status, + "state": data.state, + "is_live": self._is_live_match(data), + "current_score_home": data.current_score_home, + "current_score_away": data.current_score_away, + }, + "prediction_freshness": { + "generated_at_ms": int(time.time() * 1000), + "is_pre_match_snapshot": True, + # Stale when the match is already underway — UI should warn the user. + "is_stale_for_live": self._is_live_match(data), }, "data_quality": quality, "risk": { @@ -3962,14 +3992,10 @@ class SingleMatchOrchestrator: "surprise_score": round(float(getattr(prediction, "surprise_score", 0.0) or 0.0), 1), "surprise_comment": str(getattr(prediction, "surprise_comment", "") or ""), "surprise_reasons": list(getattr(prediction, "surprise_reasons", []) or []), + "surprise_breakdown": list(getattr(prediction, "surprise_breakdown", []) or []), "warnings": prediction.risk_warnings, }, - "engine_breakdown": { - "team": round(float(prediction.team_confidence), 1), - "player": round(float(prediction.player_confidence), 1), - "odds": round(float(prediction.odds_confidence), 1), - "referee": round(float(prediction.referee_confidence), 1), - }, + "engine_breakdown": self._build_engine_breakdown(prediction), "main_pick": main_pick, "value_pick": value_pick, "bet_advice": { @@ -4817,8 +4843,23 @@ class SingleMatchOrchestrator: data: MatchData, prediction: FullMatchPrediction, ) -> Dict[str, Any]: + """ + Produces an explainable surprise profile. + + Each factor pushes the base score and contributes: + - a human-readable Turkish reason + - a `breakdown` entry with code, points, label + """ + BASE_SCORE = 22.0 + breakdown: List[Dict[str, Any]] = [] reasons: List[str] = [] - score = 22.0 + score = BASE_SCORE + + def add(code: str, points: float, label: str) -> None: + nonlocal score + score += points + reasons.append(label) + breakdown.append({"code": code, "points": round(points, 1), "label": label}) ms_home = float(getattr(prediction, "ms_home_prob", 0.0) or 0.0) ms_draw = float(getattr(prediction, "ms_draw_prob", 0.0) or 0.0) @@ -4831,37 +4872,95 @@ class SingleMatchOrchestrator: over35 = float(getattr(prediction, "over_35_prob", 0.0) or 0.0) if parity_gap <= 0.08: - score += 18.0 - reasons.append("balanced_match_risk") + add("balanced_match_risk", 18.0, "Takımlar birbirine çok yakın — sonuç kırılabilir") if ms_draw >= 0.30: - score += 14.0 - reasons.append("draw_probability_elevated") + add("draw_probability_elevated", 14.0, f"Beraberlik olasılığı yüksek (%{ms_draw*100:.0f})") if total_xg >= 3.25: - score += 10.0 - reasons.append("high_total_goal_volatility") + add("high_total_goal_volatility", 10.0, f"Toplam gol beklentisi yüksek (xG {total_xg:.1f}) — açık skor riski") if btts_yes >= 0.68: - score += 8.0 - reasons.append("mutual_goal_pressure") + add("mutual_goal_pressure", 8.0, f"Karşılıklı gol baskısı (%{btts_yes*100:.0f})") if over35 >= 0.52: - score += 8.0 - reasons.append("late_goal_swing_risk") + add("late_goal_swing_risk", 8.0, "Geç gol/skor değişimi riski") + + # Odds-based traps (favorite odds trap from UpsetEngineV2 logic) + ms_h_odd = self._safe_float((data.odds_data or {}).get("ms_h"), 0.0) + ms_a_odd = self._safe_float((data.odds_data or {}).get("ms_a"), 0.0) + ms_d_odd = self._safe_float((data.odds_data or {}).get("ms_d"), 0.0) + favorite_side = None + favorite_odd = 0.0 + if ms_h_odd > 1.01 and ms_a_odd > 1.01: + if ms_h_odd <= ms_a_odd: + favorite_side, favorite_odd = "home", ms_h_odd + else: + favorite_side, favorite_odd = "away", ms_a_odd + + # Favorite odds trap (1.40-1.60 historically %33+ surprise rate) + if 1.40 <= favorite_odd < 1.60: + add( + "favorite_odds_trap", + 12.0, + f"Favori oranı tuzak aralığında ({favorite_odd:.2f}) — tarihsel sürpriz oranı %30+", + ) + elif 1.20 <= favorite_odd < 1.30: + add( + "low_odds_trap_suspicion", + 6.0, + f"Favori oranı çok düşük ({favorite_odd:.2f}) — piyasa aşırı güveniyor olabilir", + ) + + # Bookmaker margin + if ms_h_odd > 1.01 and ms_a_odd > 1.01 and ms_d_odd > 1.01: + margin = (1 / ms_h_odd + 1 / ms_d_odd + 1 / ms_a_odd) - 1 + if margin > 0.20: + add( + "bookmaker_margin_high", + 10.0, + f"Bookmaker marjı çok yüksek (%{margin*100:.1f}) — bahisçi risk görüyor", + ) + elif margin > 0.18: + add( + "bookmaker_margin_elevated", + 6.0, + f"Bookmaker marjı yüksek (%{margin*100:.1f})", + ) + + # Away favorite carries inherent extra risk + if favorite_side == "away" and favorite_odd > 0: + add( + "away_favorite_extra_risk", + 6.0, + "Deplasman favorisi — atmosfer ve seyahat ek risk yaratır", + ) + if data.lineup_source == "probable_xi": - score += 8.0 - reasons.append("lineup_probable_not_confirmed") + add("lineup_probable_not_confirmed", 8.0, "Kadrolar tahmini — kesinleşmemiş") if data.lineup_source == "none": - score += 12.0 - reasons.append("lineup_unavailable") + add("lineup_unavailable", 12.0, "Kadro bilgisi yok — analiz güvenilirliği düştü") if not data.referee_name: - score += 6.0 - reasons.append("missing_referee") + add("missing_referee", 6.0, "Hakem atanmamış — disiplin/avantaj sinyali eksik") + if self._is_live_match(data): current_goals = int(data.current_score_home or 0) + int(data.current_score_away or 0) if current_goals >= 3: - score += 18.0 - reasons.append("live_match_open_state") + add("live_match_open_state", 18.0, f"Maç şu an açık skorlu ({current_goals} gol) — pre-match tahminler riskli") elif current_goals >= 2: - score += 10.0 - reasons.append("live_match_active_state") + add("live_match_active_state", 10.0, f"Maç canlı ve hareketli ({current_goals} gol)") + + # Live underdog leading (pre-match favorite is losing) + cur_home = int(data.current_score_home or 0) + cur_away = int(data.current_score_away or 0) + if favorite_side == "home" and cur_away > cur_home: + add( + "live_underdog_leading", + 20.0, + "Canlı: deplasman önde, pre-match ev sahibi favorisiydi — sürpriz GERÇEKLEŞİYOR", + ) + elif favorite_side == "away" and cur_home > cur_away: + add( + "live_underdog_leading", + 20.0, + "Canlı: ev sahibi önde, pre-match deplasman favorisiydi — sürpriz GERÇEKLEŞİYOR", + ) score = max(0.0, min(100.0, score)) if score >= 75: @@ -4873,12 +4972,109 @@ class SingleMatchOrchestrator: else: comment = "Sürpriz riski düşük görünüyor. Tahminler normal güven bandında okunabilir." + # Deduplicate reasons by text while preserving order + deduped_reasons = list(dict.fromkeys(reasons))[:8] + # Same dedup logic for breakdown (by code) + seen_codes: Set[str] = set() + deduped_breakdown: List[Dict[str, Any]] = [] + for entry in breakdown: + if entry["code"] in seen_codes: + continue + seen_codes.add(entry["code"]) + deduped_breakdown.append(entry) + return { "score": round(score, 1), "comment": comment, - "reasons": list(dict.fromkeys(reasons))[:6], + "reasons": deduped_reasons, + "breakdown": deduped_breakdown[:10], + "base_score": BASE_SCORE, } + @staticmethod + def _safe_float(value: Any, default: float = 0.0) -> float: + try: + return float(value) + except (TypeError, ValueError): + return default + + @staticmethod + def _calibrator_key(market: str, pick: str) -> Optional[str]: + """Map (market, pick) → trained-calibrator key in models/calibration.""" + m = (market or "").upper() + p = (pick or "").strip().casefold() + if m == "MS": + if p == "1": + return "ms_home" + if p == "x" or p == "0": + return "ms_draw" + if p == "2": + return "ms_away" + return None + if m == "DC": + return "dc" + if m == "OU15" and ("over" in p or "üst" in p or "ust" in p): + return "ou15" + if m == "OU25" and ("over" in p or "üst" in p or "ust" in p): + return "ou25" + if m == "OU35" and ("over" in p or "üst" in p or "ust" in p): + return "ou35" + if m == "BTTS" and ("yes" in p or "var" in p): + return "btts" + if m == "HT": + if p == "1": + return "ht_home" + if p == "x" or p == "0": + return "ht_draw" + if p == "2": + return "ht_away" + return None + if m == "HTFT": + return "ht_ft" + return None + + @staticmethod + def _confidence_label(score: float) -> Tuple[str, str]: + """Turkish UX label + interpretation for a 0-100 confidence score.""" + if score >= 75: + return "YUKSEK", "Bu sinyal güçlü ve güvenilir" + if score >= 60: + return "ORTA", "Sinyal makul, çelişen veri yok" + if score >= 45: + return "DUSUK", "Sinyal zayıf, dikkatli yorumla" + return "COK_DUSUK", "Veri yetersiz veya çelişkili — bu motoru bu maç için ihmal et" + + def _build_engine_breakdown(self, prediction: FullMatchPrediction) -> Dict[str, Any]: + """ + Engine breakdown with backward-compatible flat scores + rich detail siblings. + + Shape: + { + team: 74.1, player: 55.7, odds: 55.2, referee: 62.0, # legacy flat scores + detail: { team: {score, label, ...}, player: {...}, ... } + } + """ + components = { + "team": ("Takım modeli", float(prediction.team_confidence)), + "player": ("Oyuncu / kadro modeli", float(prediction.player_confidence)), + "odds": ("Oran piyasası", float(prediction.odds_confidence)), + "referee": ("Hakem etkisi", float(prediction.referee_confidence)), + } + flat: Dict[str, Any] = {} + detail: Dict[str, Any] = {} + for key, (display, raw) in components.items(): + score = round(raw, 1) + label, interpretation = self._confidence_label(score) + flat[key] = score + detail[key] = { + "score": score, + "label": label, + "display_name": display, + "interpretation": interpretation, + } + flat["detail"] = detail + return flat + @staticmethod def _normalize_v25_probs(market: str, probs: Dict[str, Any]) -> Dict[str, float]: out: Dict[str, float] = {} @@ -5105,13 +5301,25 @@ class SingleMatchOrchestrator: raw_conf = float(row.get("confidence") or 0.0) prob = float(row.get("probability") or 0.0) odd = float(row.get("odds") or 0.0) + pick_str = str(row.get("pick") or "") - calibration = self.market_calibration.get(market, 0.85) - calibrated_conf = max(1.0, min(99.0, raw_conf * calibration)) + # Trained isotonic calibrator (preferred) — falls back to multiplier if not trained. + # IMPORTANT: trainer was fed (raw_confidence/100, actual). Orchestrator must feed + # the same shape — using `prob` (which may differ from raw_conf/100 due to upstream + # confidence boosting) would give the calibrator an out-of-distribution input. + calibrator = get_calibrator() + cal_key = self._calibrator_key(market, pick_str) + if cal_key and cal_key in calibrator.calibrators: + cal_input = max(0.001, min(0.999, raw_conf / 100.0)) + cal_prob = calibrator.calibrate(cal_key, cal_input, odds_val=odd if odd > 1.0 else None) + calibrated_conf = max(1.0, min(99.0, cal_prob * 100.0)) + else: + multiplier = self.market_calibration.get(market, 0.85) + calibrated_conf = max(1.0, min(99.0, raw_conf * multiplier)) min_conf = self.market_min_conf.get(market, 55.0) implied_prob = (1.0 / odd) if odd > 1.0 else 0.0 - band_verdict = self._odds_band_verdict(data, market, str(row.get("pick") or ""), implied_prob) + band_verdict = self._odds_band_verdict(data, market, pick_str, implied_prob) # ── V31: League-specific odds reliability ────────────────────── # Higher reliability → trust odds-based edge more in play_score diff --git a/ai-engine/services/v26_shadow_engine.py b/ai-engine/services/v26_shadow_engine.py index 530d9e5..72d9672 100644 --- a/ai-engine/services/v26_shadow_engine.py +++ b/ai-engine/services/v26_shadow_engine.py @@ -1955,7 +1955,7 @@ class V26ShadowEngine: def _pick_from_probs(probs: Dict[str, float]) -> Tuple[str, float]: if not probs: return "", 0.0 - pick = max(probs, key=probs.get) + pick = max(probs, key=probs.__getitem__) return pick, float(probs[pick]) @staticmethod diff --git a/package-lock.json b/package-lock.json index 3312de9..2dc59ac 100755 --- a/package-lock.json +++ b/package-lock.json @@ -1145,6 +1145,7 @@ "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.28.5.tgz", "integrity": "sha512-e7jT4DxYvIDLk1ZHmU/m/mB19rex9sv0c2ftBtjSBv+kVM/902eh0fINUzD7UwLLNR+jU585GxUJ8/EBfAM5fw==", "dev": true, + "peer": true, "dependencies": { "@babel/code-frame": "^7.27.1", "@babel/generator": "^7.28.5", @@ -3000,6 +3001,7 @@ "resolved": "https://registry.npmjs.org/@nestjs/axios/-/axios-4.0.1.tgz", "integrity": "sha512-68pFJgu+/AZbWkGu65Z3r55bTsCPlgyKaV4BSG8yUAD72q1PPuyVRgUwFv6BxdnibTUHlyxm06FmYWNC+bjN7A==", "license": "MIT", + "peer": true, "peerDependencies": { "@nestjs/common": "^10.0.0 || ^11.0.0", "axios": "^1.3.1", @@ -3093,6 +3095,7 @@ "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz", "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", "dev": true, + "peer": true, "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", @@ -3259,6 +3262,7 @@ "version": "11.1.11", "resolved": "https://registry.npmjs.org/@nestjs/common/-/common-11.1.11.tgz", "integrity": "sha512-R/+A8XFqLgN8zNs2twhrOaE7dJbRQhdPX3g46am4RT/x8xGLqDphrXkUIno4cGUZHxbczChBAaAPTdPv73wDZA==", + "peer": true, "dependencies": { "file-type": "21.2.0", "iterare": "1.2.1", @@ -3304,6 +3308,7 @@ "resolved": "https://registry.npmjs.org/@nestjs/core/-/core-11.1.11.tgz", "integrity": "sha512-H9i+zT3RvHi7tDc+lCmWHJ3ustXveABCr+Vcpl96dNOxgmrx4elQSTC4W93Mlav2opfLV+p0UTHY6L+bpUA4zA==", "hasInstallScript": true, + "peer": true, "dependencies": { "@nuxt/opencollective": "0.4.1", "fast-safe-stringify": "2.1.1", @@ -3383,6 +3388,7 @@ "version": "11.1.11", "resolved": "https://registry.npmjs.org/@nestjs/platform-express/-/platform-express-11.1.11.tgz", "integrity": "sha512-kyABSskdMRIAMWL0SlbwtDy4yn59RL4HDdwHDz/fxWuv7/53YP8Y2DtV3/sHqY5Er0msMVTZrM38MjqXhYL7gw==", + "peer": true, "dependencies": { "cors": "2.8.5", "express": "5.2.1", @@ -3403,6 +3409,7 @@ "version": "11.1.11", "resolved": "https://registry.npmjs.org/@nestjs/platform-socket.io/-/platform-socket.io-11.1.11.tgz", "integrity": "sha512-0z6pLg9CuTXtz7q2lRZoPOU94DN28OTa39f4cQrlZysKA6QrKM7w7z6xqb4g32qjF+LQHFNRmMJtE/pLrxBaig==", + "peer": true, "dependencies": { "socket.io": "4.8.3", "tslib": "2.8.1" @@ -3777,6 +3784,8 @@ "resolved": "https://registry.npmjs.org/@prisma/client/-/client-6.19.3.tgz", "integrity": "sha512-mKq3jQFhjvko5LTJFHGilsuQs+W+T3Gm451NzuTDGQxwCzwXHYnIu2zGkRoW+Exq3Rob7yp2MfzSrdIiZVhrBg==", "hasInstallScript": true, + "license": "Apache-2.0", + "peer": true, "engines": { "node": ">=18.18" }, @@ -3856,6 +3865,7 @@ "version": "1.6.1", "resolved": "https://registry.npmjs.org/@redis/client/-/client-1.6.1.tgz", "integrity": "sha512-/KCsg3xSlR+nCK8/8ZYSknYxvXHwubJrU82F3Lm1Fp6789VQ0/3RJKfsmRXjqfaTA++23CvC3hqmqe/2GEt6Kw==", + "peer": true, "dependencies": { "cluster-key-slot": "1.1.2", "generic-pool": "3.9.0", @@ -4766,6 +4776,7 @@ "resolved": "https://registry.npmjs.org/@types/eslint/-/eslint-9.6.1.tgz", "integrity": "sha512-FXx2pKgId/WyYo2jXw63kk7/+TY7u7AziEJxJAnSFzHlqTAS3Ync6SvgYAN/k4/PQpnnVuzoMuVnByKK2qp0ag==", "dev": true, + "peer": true, "dependencies": { "@types/estree": "*", "@types/json-schema": "*" @@ -4887,6 +4898,7 @@ "version": "22.19.3", "resolved": "https://registry.npmjs.org/@types/node/-/node-22.19.3.tgz", "integrity": "sha512-1N9SBnWYOJTrNZCdh/yJE+t910Y128BoyY+zBLWhL3r0TYzlTmFdXrPwHL9DyFZmlEXNQQolTZh3KHV31QDhyA==", + "peer": true, "dependencies": { "undici-types": "~6.21.0" } @@ -5051,6 +5063,7 @@ "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.52.0.tgz", "integrity": "sha512-iIACsx8pxRnguSYhHiMn2PvhvfpopO9FXHyn1mG5txZIsAaB6F0KwbFnUQN3KCiG3Jcuad/Cao2FAs1Wp7vAyg==", "dev": true, + "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "8.52.0", "@typescript-eslint/types": "8.52.0", @@ -5688,6 +5701,7 @@ "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz", "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "dev": true, + "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -5741,6 +5755,7 @@ "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", "dev": true, + "peer": true, "dependencies": { "fast-deep-equal": "^3.1.1", "fast-json-stable-stringify": "^2.0.0", @@ -5932,6 +5947,7 @@ "resolved": "https://registry.npmjs.org/axios/-/axios-1.13.6.tgz", "integrity": "sha512-ChTCHMouEe2kn713WHbQGcuYrr6fXTBiu460OTwWrWob16g1bXn4vtz07Ope7ewMozJAnEquLk5lWQWtBig9DQ==", "license": "MIT", + "peer": true, "dependencies": { "follow-redirects": "^1.15.11", "form-data": "^4.0.5", @@ -6245,6 +6261,7 @@ "url": "https://github.com/sponsors/ai" } ], + "peer": true, "dependencies": { "baseline-browser-mapping": "^2.9.0", "caniuse-lite": "^1.0.30001759", @@ -6317,6 +6334,7 @@ "version": "5.66.4", "resolved": "https://registry.npmjs.org/bullmq/-/bullmq-5.66.4.tgz", "integrity": "sha512-y2VRk2z7d1YNI2JQDD7iThoD0X/0iZZ3VEp8lqT5s5U0XDl9CIjXp1LQgmE9EKy6ReHtzmYXS1f328PnUbZGtQ==", + "peer": true, "dependencies": { "cron-parser": "4.9.0", "ioredis": "5.8.2", @@ -6428,6 +6446,7 @@ "version": "7.2.7", "resolved": "https://registry.npmjs.org/cache-manager/-/cache-manager-7.2.7.tgz", "integrity": "sha512-TKeeb9nSybk1e9E5yAiPVJ6YKdX9FYhwqqy8fBfVKAFVTJYZUNmeIvwjURW6+UikNsO6l2ta27thYgo/oumDsw==", + "peer": true, "dependencies": { "@cacheable/utils": "^2.3.2", "keyv": "^5.5.4" @@ -6697,12 +6716,14 @@ "node_modules/class-transformer": { "version": "0.5.1", "resolved": "https://registry.npmjs.org/class-transformer/-/class-transformer-0.5.1.tgz", - "integrity": "sha512-SQa1Ws6hUbfC98vKGxZH3KFY0Y1lm5Zm0SY8XX9zbK7FJCyVEac3ATW0RIpwzW+oOfmHE5PMPufDG9hCfoEOMw==" + "integrity": "sha512-SQa1Ws6hUbfC98vKGxZH3KFY0Y1lm5Zm0SY8XX9zbK7FJCyVEac3ATW0RIpwzW+oOfmHE5PMPufDG9hCfoEOMw==", + "peer": true }, "node_modules/class-validator": { "version": "0.14.3", "resolved": "https://registry.npmjs.org/class-validator/-/class-validator-0.14.3.tgz", "integrity": "sha512-rXXekcjofVN1LTOSw+u4u9WXVEUvNBVjORW154q/IdmYWy1nMbOU9aNtZB0t8m+FJQ9q91jlr2f9CwwUFdFMRA==", + "peer": true, "dependencies": { "@types/validator": "^13.15.3", "libphonenumber-js": "^1.11.1", @@ -7581,8 +7602,7 @@ "version": "2.0.0", "resolved": "https://registry.npmjs.org/es-module-lexer/-/es-module-lexer-2.0.0.tgz", "integrity": "sha512-5POEcUuZybH7IdmGsD8wlf0AI55wMecM9rVBTI/qEAy2c1kTOm3DjFYjrBdI2K3BaJjJYfYFeRtM0t9ssnRuxw==", - "dev": true, - "peer": true + "dev": true }, "node_modules/es-object-atoms": { "version": "1.1.1", @@ -7640,6 +7660,7 @@ "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.39.2.tgz", "integrity": "sha512-LEyamqS7W5HB3ujJyvi0HQK/dtVINZvd5mAAp9eT5S/ujByGjiZLCzPcHVzuXbpJDJF/cxwHlfceVUDZ2lnSTw==", "dev": true, + "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.8.0", "@eslint-community/regexpp": "^4.12.1", @@ -7699,6 +7720,7 @@ "resolved": "https://registry.npmjs.org/eslint-config-prettier/-/eslint-config-prettier-10.1.8.tgz", "integrity": "sha512-82GZUjRS0p/jganf6q1rEO25VSoHH0hKPCTrgillPjdI/3bgBhAE1QzHrHTizjpRvy6pGAvKjDJtk2pF9NDq8w==", "dev": true, + "peer": true, "bin": { "eslint-config-prettier": "bin/cli.js" }, @@ -7929,6 +7951,7 @@ "version": "5.2.1", "resolved": "https://registry.npmjs.org/express/-/express-5.2.1.tgz", "integrity": "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==", + "peer": true, "dependencies": { "accepts": "^2.0.0", "body-parser": "^2.2.1", @@ -9190,6 +9213,7 @@ "resolved": "https://registry.npmjs.org/jest/-/jest-30.2.0.tgz", "integrity": "sha512-F26gjC0yWN8uAA5m5Ss8ZQf5nDHWGlN/xWZIh8S5SRbsEKBovwZhxGd6LJlbZYxBgCYOtreSUyb8hpXyGC5O4A==", "dev": true, + "peer": true, "dependencies": { "@jest/core": "30.2.0", "@jest/types": "30.2.0", @@ -10041,6 +10065,7 @@ "version": "5.5.5", "resolved": "https://registry.npmjs.org/keyv/-/keyv-5.5.5.tgz", "integrity": "sha512-FA5LmZVF1VziNc0bIdCSA1IoSVnDCqE8HJIZZv2/W8YmoAM50+tnUgJR/gQZwEeIMleuIOnRnHA/UaZRNeV4iQ==", + "peer": true, "dependencies": { "@keyv/serialize": "^1.1.1" } @@ -10859,7 +10884,6 @@ "version": "3.0.0", "resolved": "https://registry.npmjs.org/object-hash/-/object-hash-3.0.0.tgz", "integrity": "sha512-RSn9F68PjH9HqtltsSnqYC1XXoWe9Bju5+213R98cNGttag9q9yAOTzdbsqvIa7aNm5WffBZFpWYr2aWrklWAw==", - "peer": true, "engines": { "node": ">= 6" } @@ -11097,6 +11121,7 @@ "version": "0.7.0", "resolved": "https://registry.npmjs.org/passport/-/passport-0.7.0.tgz", "integrity": "sha512-cPLl+qZpSc+ireUvt+IzqbED1cHHkDoVYMo30jbJIdOOjQ1MQYZBPiNvmi8UM6lJuOpTPXJGZQk0DtC4y61MYQ==", + "peer": true, "dependencies": { "passport-strategy": "1.x.x", "pause": "0.0.1", @@ -11233,6 +11258,7 @@ "version": "10.1.0", "resolved": "https://registry.npmjs.org/pino/-/pino-10.1.0.tgz", "integrity": "sha512-0zZC2ygfdqvqK8zJIr1e+wT1T/L+LF6qvqvbzEQ6tiMAoTqEVK9a1K3YRu8HEUvGEvNqZyPJTtb2sNIoTkB83w==", + "peer": true, "dependencies": { "@pinojs/redact": "^0.4.0", "atomic-sleep": "^1.0.0", @@ -11262,6 +11288,7 @@ "version": "11.0.0", "resolved": "https://registry.npmjs.org/pino-http/-/pino-http-11.0.0.tgz", "integrity": "sha512-wqg5XIAGRRIWtTk8qPGxkbrfiwEWz1lgedVLvhLALudKXvg1/L2lTFgTGPJ4Z2e3qcRmxoFxDuSdMdMGNM6I1g==", + "peer": true, "dependencies": { "get-caller-file": "^2.0.5", "pino": "^10.0.0", @@ -11480,6 +11507,7 @@ "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.7.4.tgz", "integrity": "sha512-v6UNi1+3hSlVvv8fSaoUbggEM5VErKmmpGA7Pl3HF8V6uKY7rvClBOJlH6yNwQtfTueNkGVpOv/mtWL9L4bgRA==", "dev": true, + "peer": true, "bin": { "prettier": "bin/prettier.cjs" }, @@ -11533,6 +11561,7 @@ "resolved": "https://registry.npmjs.org/prisma/-/prisma-6.19.3.tgz", "integrity": "sha512-++ZJ0ijLrDJF6hNB4t4uxg2br3fC4H9Yc9tcbjr2fcNFP3rh/SBNrAgjhsqBU4Ght8JPrVofG/ZkXfnSfnYsFg==", "hasInstallScript": true, + "peer": true, "dependencies": { "@prisma/config": "6.19.3", "@prisma/engines": "6.19.3" @@ -12685,6 +12714,7 @@ "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz", "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", "dev": true, + "peer": true, "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", @@ -13007,6 +13037,7 @@ "resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.9.2.tgz", "integrity": "sha512-f0FFpIdcHgn8zcPSbf1dRevwt047YMnaiJM3u2w2RewrB+fob/zePZcrOyQoLMMO7aBIddLcQIEK5dYjkLnGrQ==", "dev": true, + "peer": true, "dependencies": { "@cspotcode/source-map-support": "^0.8.0", "@tsconfig/node10": "^1.0.7", @@ -13162,6 +13193,7 @@ "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "devOptional": true, + "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -13509,7 +13541,6 @@ "resolved": "https://registry.npmjs.org/ajv-formats/-/ajv-formats-2.1.1.tgz", "integrity": "sha512-Wx0Kx52hxE7C18hkMEggYlEifqWZtYaRgouJor+WMdPnQyEK13vgEWyVNup7SoeeoLMsr4kf5h6dOW11I15MUA==", "dev": true, - "peer": true, "dependencies": { "ajv": "^8.0.0" }, @@ -13527,7 +13558,6 @@ "resolved": "https://registry.npmjs.org/ajv-keywords/-/ajv-keywords-5.1.0.tgz", "integrity": "sha512-YCS/JNFAUyr5vAuhk1DWm1CBxRHW9LbJ2ozWeemrIqpbsqKjHVxYPyi5GC0rjZIT5JxJ3virVTS8wk4i/Z+krw==", "dev": true, - "peer": true, "dependencies": { "fast-deep-equal": "^3.1.3" }, @@ -13540,7 +13570,6 @@ "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-5.1.1.tgz", "integrity": "sha512-2NxwbF/hZ0KpepYN0cNbo+FN6XoK7GaHlQhgx/hIZl6Va0bF45RQOOwhLIy8lQDbuCiadSLCBnH2CFYquit5bw==", "dev": true, - "peer": true, "dependencies": { "esrecurse": "^4.3.0", "estraverse": "^4.1.1" @@ -13554,7 +13583,6 @@ "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-4.3.0.tgz", "integrity": "sha512-39nnKffWz8xN1BU/2c79n9nB9HDzo0niYUqx6xyqUnyoAnQyyWpOTdZEeiCch8BBu515t4wp9ZmgVfVhn9EBpw==", "dev": true, - "peer": true, "engines": { "node": ">=4.0" } @@ -13563,15 +13591,13 @@ "version": "1.0.0", "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==", - "dev": true, - "peer": true + "dev": true }, "node_modules/webpack/node_modules/mime-db": { "version": "1.52.0", "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", "dev": true, - "peer": true, "engines": { "node": ">= 0.6" } @@ -13581,7 +13607,6 @@ "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", "dev": true, - "peer": true, "dependencies": { "mime-db": "1.52.0" }, @@ -13594,7 +13619,6 @@ "resolved": "https://registry.npmjs.org/schema-utils/-/schema-utils-4.3.3.tgz", "integrity": "sha512-eflK8wEtyOE6+hsaRVPxvUKYCpRgzLqDTb8krvAsRIwOGlHoSgYLgBXoubGgLd2fT41/OUYdb48v4k4WWHQurA==", "dev": true, - "peer": true, "dependencies": { "@types/json-schema": "^7.0.9", "ajv": "^8.9.0", diff --git a/prisma.config.ts b/prisma.config.ts index f5bd20a..6214ac9 100644 --- a/prisma.config.ts +++ b/prisma.config.ts @@ -1,8 +1,8 @@ import path from 'node:path'; -import { defineConfig, env } from 'prisma/config'; +import { defineConfig, env } from '@prisma/config'; import { config } from 'dotenv'; -config({ path: '.env.local' }); +config({ path: '.env' }); export default defineConfig({ schema: path.join('prisma', 'schema.prisma'), diff --git a/prisma/migrations/20260512000000_add_max_columns_to_usage_limits/migration.sql b/prisma/migrations/20260512000000_add_max_columns_to_usage_limits/migration.sql new file mode 100644 index 0000000..b26f58b --- /dev/null +++ b/prisma/migrations/20260512000000_add_max_columns_to_usage_limits/migration.sql @@ -0,0 +1,3 @@ +-- AlterTable: add max_analyses and max_coupons columns to usage_limits +ALTER TABLE "usage_limits" ADD COLUMN IF NOT EXISTS "max_analyses" INTEGER NOT NULL DEFAULT 3; +ALTER TABLE "usage_limits" ADD COLUMN IF NOT EXISTS "max_coupons" INTEGER NOT NULL DEFAULT 1; diff --git a/prisma/migrations/20260512120000_add_ht_score_to_live_matches/migration.sql b/prisma/migrations/20260512120000_add_ht_score_to_live_matches/migration.sql new file mode 100644 index 0000000..5198bdc --- /dev/null +++ b/prisma/migrations/20260512120000_add_ht_score_to_live_matches/migration.sql @@ -0,0 +1,4 @@ +-- Add half-time score columns to live_matches to mirror matches table +ALTER TABLE "live_matches" + ADD COLUMN IF NOT EXISTS "ht_score_home" INTEGER, + ADD COLUMN IF NOT EXISTS "ht_score_away" INTEGER; diff --git a/prisma/schema.prisma b/prisma/schema.prisma index b400b49..6baef90 100755 --- a/prisma/schema.prisma +++ b/prisma/schema.prisma @@ -139,6 +139,8 @@ model LiveMatch { substate String? scoreHome Int? @map("score_home") scoreAway Int? @map("score_away") + htScoreHome Int? @map("ht_score_home") + htScoreAway Int? @map("ht_score_away") updatedAt DateTime @default(now()) @updatedAt @map("updated_at") odds Json? oddsUpdatedAt DateTime? @map("odds_updated_at") diff --git a/scripts/analyze_prediction_patterns.ts b/scripts/analyze_prediction_patterns.ts new file mode 100644 index 0000000..20d98f8 --- /dev/null +++ b/scripts/analyze_prediction_patterns.ts @@ -0,0 +1,210 @@ +/** + * Read-only analysis of prediction patterns for the last N finished football matches. + * + * Outputs systematic-bias indicators that inform the engine improvement brief: + * 1. Surprise transparency rate (how often surprise_reasons is empty) + * 2. Surprise miss rate (underdog won but is_surprise_risk was false) + * 3. REJECT-all rate + actual outcome distribution on those matches + * 4. Calibration shrinkage histogram (raw - calibrated per market) + * 5. Trap-market frequency (band_rate << implied_prob despite high model_prob) + * 6. Commentary "hafif favori" hit-rate vs actual result + * 7. Live-blind cases (LIVE matches whose latest prediction was pre-match) + * + * Usage: + * npx ts-node iddaai-be/scripts/analyze_prediction_patterns.ts [limit=200] + */ + +import { PrismaClient } from "@prisma/client"; + +const prisma = new PrismaClient(); +const LIMIT = parseInt(process.argv[2] || "200", 10); + +type Payload = Record; + +function readNum(v: any): number | null { + const n = Number(v); + return Number.isFinite(n) ? n : null; +} + +function bucket(value: number, edges: number[]): string { + for (let i = 0; i < edges.length; i++) { + if (value < edges[i]) { + const lo = i === 0 ? "-inf" : String(edges[i - 1]); + return `[${lo}, ${edges[i]})`; + } + } + return `[${edges[edges.length - 1]}, +inf)`; +} + +async function main() { + console.log(`\n=== PREDICTION PATTERN ANALYZER ===`); + console.log(`Pulling the most recent ${LIMIT} football matches with predictions.\n`); + + const matches = await prisma.match.findMany({ + where: { + sport: "football", + status: "FT", + scoreHome: { not: null }, + scoreAway: { not: null }, + prediction: { isNot: null }, + }, + include: { prediction: true }, + orderBy: { mstUtc: "desc" }, + take: LIMIT, + }); + + console.log(`Found ${matches.length} matches.\n`); + + // Counters + let surpriseEmpty = 0; + let surpriseFilled = 0; + + let upsetMatches = 0; // underdog (per odds) actually won + let upsetMissedBySystem = 0; // upset happened, is_surprise_risk false + let upsetCaughtBySystem = 0; + + let rejectAllCount = 0; + const rejectAllOutcomes = { homeWin: 0, draw: 0, awayWin: 0 }; + + const shrinkageByMarket = new Map(); // raw - calibrated per market + let trapMarketCount = 0; // band_rate < implied_prob - 0.10 AND main_pick selected anyway + let trapMarketSampled = 0; + + let hafifFavoriUseCount = 0; // commentary said "hafif favori" + let hafifFavoriCorrectCount = 0; // and that favorite actually won + + for (const m of matches) { + const payload = (m.prediction?.predictionJson as Payload) || {}; + + // 1. Surprise transparency + const risk = payload.risk || {}; + const surpriseReasons = Array.isArray(risk.surprise_reasons) ? risk.surprise_reasons : []; + if (surpriseReasons.length === 0) surpriseEmpty++; + else surpriseFilled++; + + // 2. Upset detection vs reality + const finalHome = m.scoreHome ?? 0; + const finalAway = m.scoreAway ?? 0; + const actualWinner = finalHome > finalAway ? "H" : finalHome < finalAway ? "A" : "D"; + + const oddsSnap = payload.bet_summary && Array.isArray(payload.bet_summary) + ? payload.bet_summary.find((b: any) => b.market === "MS") + : null; + const msMain = payload.main_pick || {}; + // Crude favorite-side detection: scan bet_summary or market_board for MS implied probs + const msBoard = (payload.market_board || {}).MS || {}; + let favSide: "H" | "A" | "D" | null = null; + const implH = readNum(msBoard?.probs?.["1"]); + const implA = readNum(msBoard?.probs?.["2"]); + if (implH !== null && implA !== null) { + favSide = implH > implA ? "H" : implA > implH ? "A" : null; + } + if (favSide && actualWinner !== favSide && actualWinner !== "D") { + upsetMatches++; + if (risk.is_surprise_risk === true) upsetCaughtBySystem++; + else upsetMissedBySystem++; + } + + // 3. REJECT-all matches + const brain = payload.betting_brain || {}; + if ((brain.decision || "NO_BET").toUpperCase() === "NO_BET" && brain.approved_count === 0) { + rejectAllCount++; + if (actualWinner === "H") rejectAllOutcomes.homeWin++; + else if (actualWinner === "A") rejectAllOutcomes.awayWin++; + else rejectAllOutcomes.draw++; + } + + // 4. Calibration shrinkage by market + const summary: any[] = Array.isArray(payload.bet_summary) ? payload.bet_summary : []; + for (const row of summary) { + const market = String(row.market || "OTHER"); + const raw = readNum(row.raw_confidence); + const cal = readNum(row.calibrated_confidence); + if (raw !== null && cal !== null) { + const arr = shrinkageByMarket.get(market) || []; + arr.push(raw - cal); + shrinkageByMarket.set(market, arr); + } + } + + // 5. Trap market: model says high prob but band_rate is much lower than implied + const bb = (msMain.betting_brain || {}) as any; + const triple = bb.triple_value || null; + if (triple && typeof triple === "object") { + trapMarketSampled++; + const bandRate = readNum(triple.band_rate); + const implied = readNum(triple.implied_prob); + if (bandRate !== null && implied !== null && implied - bandRate > 0.10) { + trapMarketCount++; + } + } + + // 6. "hafif favori" usage vs reality + const commentary = payload.match_commentary || {}; + const summaryText = String(commentary.summary || ""); + if (summaryText.includes("hafif favori")) { + hafifFavoriUseCount++; + // If summary mentions home name first then says hafif favori, assume home favorite + const home = (payload.match_info || {}).home_team || ""; + const sayingHomeFav = summaryText.indexOf(home) >= 0 && summaryText.indexOf(home) < summaryText.indexOf("hafif favori"); + const predictedSide = sayingHomeFav ? "H" : "A"; + if (predictedSide === actualWinner) hafifFavoriCorrectCount++; + } + } + + // ─── Output ───────────────────────────────────────────────── + console.log(`\n--- 1. SURPRISE TRANSPARENCY ---`); + console.log(` Empty surprise_reasons: ${surpriseEmpty}/${matches.length} (${((surpriseEmpty / matches.length) * 100).toFixed(1)}%)`); + console.log(` Filled surprise_reasons: ${surpriseFilled}/${matches.length}`); + + console.log(`\n--- 2. UPSET DETECTION ---`); + console.log(` Actual upsets (underdog wins): ${upsetMatches}/${matches.length}`); + console.log(` Caught by is_surprise_risk: ${upsetCaughtBySystem}`); + console.log(` MISSED (no surprise flag): ${upsetMissedBySystem}`); + if (upsetMatches > 0) { + console.log(` Miss rate: ${((upsetMissedBySystem / upsetMatches) * 100).toFixed(1)}%`); + } + + console.log(`\n--- 3. REJECT-ALL MATCHES ---`); + console.log(` Count: ${rejectAllCount}/${matches.length} (${((rejectAllCount / matches.length) * 100).toFixed(1)}%)`); + console.log(` Outcome distribution on those matches:`); + console.log(` Home wins: ${rejectAllOutcomes.homeWin}`); + console.log(` Draws: ${rejectAllOutcomes.draw}`); + console.log(` Away wins: ${rejectAllOutcomes.awayWin}`); + + console.log(`\n--- 4. CALIBRATION SHRINKAGE (raw - calibrated) BY MARKET ---`); + const buckets = [-5, 0, 5, 10, 15, 20]; + for (const [market, arr] of Array.from(shrinkageByMarket.entries()).sort()) { + const sorted = [...arr].sort((a, b) => a - b); + const median = sorted[Math.floor(sorted.length / 2)] ?? 0; + const p90 = sorted[Math.floor(sorted.length * 0.9)] ?? 0; + const avg = arr.reduce((s, v) => s + v, 0) / arr.length; + console.log(` ${market.padEnd(10)} n=${String(arr.length).padStart(4)} avg=${avg.toFixed(2).padStart(6)} median=${median.toFixed(2).padStart(6)} p90=${p90.toFixed(2).padStart(6)}`); + } + + console.log(`\n--- 5. TRAP MARKET PREVALENCE (main_pick) ---`); + console.log(` Sampled main_picks with triple_value: ${trapMarketSampled}`); + console.log(` Trap candidates (implied - band_rate > 0.10): ${trapMarketCount}`); + if (trapMarketSampled > 0) { + console.log(` Trap rate: ${((trapMarketCount / trapMarketSampled) * 100).toFixed(1)}%`); + } + + console.log(`\n--- 6. "hafif favori" COMMENTARY ACCURACY ---`); + console.log(` Used in commentary: ${hafifFavoriUseCount}`); + console.log(` Correctly predicted winner: ${hafifFavoriCorrectCount}`); + if (hafifFavoriUseCount > 0) { + console.log(` Accuracy: ${((hafifFavoriCorrectCount / hafifFavoriUseCount) * 100).toFixed(1)}%`); + } + + console.log(`\n=== DONE ===\n`); + void bucket; +} + +main() + .catch((e) => { + console.error(e); + process.exit(1); + }) + .finally(async () => { + await prisma.$disconnect(); + }); diff --git a/src/modules/admin/admin.controller.ts b/src/modules/admin/admin.controller.ts index 67b1565..69bd505 100755 --- a/src/modules/admin/admin.controller.ts +++ b/src/modules/admin/admin.controller.ts @@ -28,6 +28,8 @@ import { import { Roles } from "../../common/decorators"; import { PrismaService } from "../../database/prisma.service"; import { PaginationDto } from "../../common/dto/pagination.dto"; +import { AdminUsersQueryDto } from "./dto/admin-users-query.dto"; +import { UpdateUserSubscriptionDto } from "./dto/update-user-subscription.dto"; import { ApiResponse, createSuccessResponse, @@ -57,17 +59,33 @@ export class AdminController { @ApiOperation({ summary: "Get all users (admin)" }) @SwaggerResponse({ status: 200, type: [UserResponseDto] }) async getAllUsers( - @Query() pagination: PaginationDto, + @Query() query: AdminUsersQueryDto, ): Promise>> { - const { skip, take, orderBy } = pagination; + const { skip, take, orderBy, search, role, subscriptionStatus } = query; + + const where: any = {}; + if (search) { + where.OR = [ + { email: { contains: search, mode: "insensitive" } }, + { firstName: { contains: search, mode: "insensitive" } }, + { lastName: { contains: search, mode: "insensitive" } }, + ]; + } + if (role) { + where.role = role; + } + if (subscriptionStatus) { + where.subscriptionStatus = subscriptionStatus; + } const [users, total] = await Promise.all([ this.prisma.user.findMany({ + where, skip, take, orderBy, }), - this.prisma.user.count(), + this.prisma.user.count({ where }), ]); const dtos = plainToInstance( @@ -78,8 +96,8 @@ export class AdminController { return createPaginatedResponse( dtos, total, - pagination.page || 1, - pagination.limit || 10, + query.page || 1, + query.limit || 10, ); } @@ -284,20 +302,41 @@ export class AdminController { @SwaggerResponse({ status: 200 }) async updateUserSubscription( @Param("userId") userId: string, - @Body() data: { plan: string }, + @Body() data: UpdateUserSubscriptionDto, ): Promise> { const user = await this.prisma.user.findUnique({ where: { id: userId } }); if (!user) throw new NotFoundException("USER_NOT_FOUND"); - const validPlans = [PlanType.FREE, PlanType.PLUS, PlanType.PREMIUM]; + const validPlans = [PlanType.FREE, PlanType.PLUS, PlanType.PREMIUM, "past_due", "cancelled"]; const newPlan = data.plan as PlanType; if (!validPlans.includes(newPlan)) { throw new BadRequestException("INVALID_PLAN_TYPE"); } + const updateData: any = { subscriptionStatus: newPlan }; + + if (data.expiresAt) { + const parsedDate = new Date(data.expiresAt); + + // Business Logic: If upgrading to Premium/Plus, the expiry date cannot be in the past + const today = new Date(); + today.setHours(0, 0, 0, 0); // Strip time + + const expiry = new Date(parsedDate); + expiry.setHours(0, 0, 0, 0); + + if ((newPlan === PlanType.PREMIUM || newPlan === PlanType.PLUS) && expiry < today) { + throw new BadRequestException("EXPIRES_AT_CANNOT_BE_IN_PAST"); + } + + updateData.subscriptionExpiresAt = parsedDate; + } else if (data.expiresAt === null) { + updateData.subscriptionExpiresAt = null; + } + await this.prisma.user.update({ where: { id: userId }, - data: { subscriptionStatus: newPlan }, + data: updateData, }); await this.subscriptionsService.syncLimitsWithPlan(userId, newPlan); diff --git a/src/modules/admin/dto/admin-users-query.dto.ts b/src/modules/admin/dto/admin-users-query.dto.ts new file mode 100644 index 0000000..755a822 --- /dev/null +++ b/src/modules/admin/dto/admin-users-query.dto.ts @@ -0,0 +1,12 @@ +import { IsOptional, IsString } from "class-validator"; +import { PaginationDto } from "../../../common/dto/pagination.dto"; + +export class AdminUsersQueryDto extends PaginationDto { + @IsOptional() + @IsString() + role?: string; + + @IsOptional() + @IsString() + subscriptionStatus?: string; +} diff --git a/src/modules/admin/dto/update-user-subscription.dto.ts b/src/modules/admin/dto/update-user-subscription.dto.ts new file mode 100644 index 0000000..44b0c12 --- /dev/null +++ b/src/modules/admin/dto/update-user-subscription.dto.ts @@ -0,0 +1,13 @@ +import { IsString, IsOptional, IsEnum, IsISO8601 } from "class-validator"; +import { ApiProperty } from "@nestjs/swagger"; + +export class UpdateUserSubscriptionDto { + @ApiProperty({ description: "Subscription Plan" }) + @IsString() + plan: string; + + @ApiProperty({ description: "Expiration Date in ISO format", required: false }) + @IsOptional() + @IsISO8601() + expiresAt?: string | null; +} diff --git a/src/modules/matches/matches.service.ts b/src/modules/matches/matches.service.ts index 6de27a6..5078813 100755 --- a/src/modules/matches/matches.service.ts +++ b/src/modules/matches/matches.service.ts @@ -623,6 +623,8 @@ export class MatchesService { score: { home: liveMatch.scoreHome, away: liveMatch.scoreAway, + htHome: (liveMatch as any).htScoreHome ?? null, + htAway: (liveMatch as any).htScoreAway ?? null, }, date: new Date(Number(liveMatch.mstUtc)), // Fill missing relations with empty arrays @@ -802,7 +804,12 @@ export class MatchesService { teamStats: normalizedTeamStats, mstUtc: Number(match.mstUtc), date: match.date || new Date(Number(match.mstUtc)), - score: match.score || { home: match.scoreHome, away: match.scoreAway }, + score: match.score || { + home: match.scoreHome, + away: match.scoreAway, + htHome: match.htScoreHome ?? null, + htAway: match.htScoreAway ?? null, + }, homeTeam: { ...match.homeTeam, logo: match.homeTeamId diff --git a/src/modules/subscriptions/subscriptions.service.ts b/src/modules/subscriptions/subscriptions.service.ts index 9d97ace..4c3996d 100644 --- a/src/modules/subscriptions/subscriptions.service.ts +++ b/src/modules/subscriptions/subscriptions.service.ts @@ -218,7 +218,12 @@ export class SubscriptionsService { // Sync user subscription status await this.prisma.user.update({ where: { id: userId }, - data: { subscriptionStatus: effectivePlan }, + data: { + subscriptionStatus: effectivePlan, + subscriptionExpiresAt: currentBillingPeriod?.ends_at + ? new Date(currentBillingPeriod.ends_at) + : null, + }, }); // Sync usage limits with plan diff --git a/src/modules/users/dto/user.dto.ts b/src/modules/users/dto/user.dto.ts index 202e17e..28bc32d 100755 --- a/src/modules/users/dto/user.dto.ts +++ b/src/modules/users/dto/user.dto.ts @@ -116,6 +116,9 @@ export class UserResponseDto { @Expose() subscriptionStatus: string; + @Expose() + subscriptionExpiresAt: Date | null; + @Expose() createdAt: Date; diff --git a/src/tasks/data-fetcher.task.ts b/src/tasks/data-fetcher.task.ts index e112892..1064bd9 100755 --- a/src/tasks/data-fetcher.task.ts +++ b/src/tasks/data-fetcher.task.ts @@ -60,6 +60,10 @@ interface LiveScorePayloadMatch { score: { home: number | null; away: number | null; + ht?: { + home: number | null; + away: number | null; + } | null; } | null; } @@ -278,6 +282,16 @@ export class DataFetcherTask { const matchData = response.data.data; const scoreHome = matchData.homeScore ?? null; const scoreAway = matchData.awayScore ?? null; + const htScoreHome = this.asInt( + matchData.score?.ht?.home ?? + matchData.htHomeScore ?? + matchData.homeHtScore, + ); + const htScoreAway = this.asInt( + matchData.score?.ht?.away ?? + matchData.htAwayScore ?? + matchData.awayHtScore, + ); const storedStatus = deriveStoredMatchStatus({ state: matchData.state, status: matchData.status, @@ -290,6 +304,8 @@ export class DataFetcherTask { data: { scoreHome, scoreAway, + htScoreHome, + htScoreAway, state: matchData.state || null, substate: matchData.substate || null, status: storedStatus, @@ -1022,6 +1038,8 @@ export class DataFetcherTask { // Safe score parsing const sHome = this.asInt(match.homeScore ?? match.score?.home); const sAway = this.asInt(match.awayScore ?? match.score?.away); + const sHtHome = this.asInt(match.score?.ht?.home); + const sHtAway = this.asInt(match.score?.ht?.away); const storedStatus = deriveStoredMatchStatus({ state: match.state, status: match.status, @@ -1062,6 +1080,8 @@ export class DataFetcherTask { status: storedStatus, scoreHome: sHome, scoreAway: sAway, + htScoreHome: sHtHome, + htScoreAway: sHtAway, homeTeamId: homeTeamId, awayTeamId: awayTeamId, updatedAt: new Date(), @@ -1078,6 +1098,8 @@ export class DataFetcherTask { mstUtc: BigInt(match.mstUtc || Date.now()), scoreHome: sHome, scoreAway: sAway, + htScoreHome: sHtHome, + htScoreAway: sHtAway, homeTeamId: homeTeamId, awayTeamId: awayTeamId, },