main

2026-05-17 02:17:22 +03:00
parent 17ace9bd12
commit 94c7a4481a
53 changed files with 29602 additions and 7832 deletions
@@ -0,0 +1,231 @@
+"""HT/MS Mixin — analyze_match_htms endpoint and helpers.
+
+Auto-extracted mixin module — split from services/single_match_orchestrator.py.
+All methods here are composed into SingleMatchOrchestrator via inheritance.
+`self` attributes (self.dsn, self.enrichment, self.v25_predictor, etc.) are
+initialised in the main __init__.
+"""
+
+from __future__ import annotations
+
+import json
+import re
+import time
+import math
+import os
+import pickle
+from collections import defaultdict
+from typing import Any, Dict, List, Optional, Set, Tuple, overload
+
+import pandas as pd
+import numpy as np
+
+import psycopg2
+from psycopg2.extras import RealDictCursor
+
+from data.db import get_clean_dsn
+from schemas.prediction import FullMatchPrediction
+from schemas.match_data import MatchData
+from models.v25_ensemble import V25Predictor, get_v25_predictor
+try:
+    from models.v27_predictor import V27Predictor, compute_divergence, compute_value_edge
+except ImportError:
+    class V27Predictor:  # type: ignore[no-redef]
+        def __init__(self): self.models = {}
+        def load_models(self): return False
+        def predict_all(self, features): return {}
+    def compute_divergence(*args, **kwargs):
+        return {}
+    def compute_value_edge(*args, **kwargs):
+        return {}
+from features.odds_band_analyzer import OddsBandAnalyzer
+try:
+    from models.basketball_v25 import (
+        BasketballMatchPrediction,
+        get_basketball_v25_predictor,
+    )
+except ImportError:
+    BasketballMatchPrediction = Any  # type: ignore[misc]
+    def get_basketball_v25_predictor() -> Any:
+        raise ImportError("Basketball predictor is not available")
+from core.engines.player_predictor import PlayerPrediction, get_player_predictor
+from services.feature_enrichment import FeatureEnrichmentService
+from services.betting_brain import BettingBrain
+from services.v26_shadow_engine import V26ShadowEngine, get_v26_shadow_engine
+from services.match_commentary import generate_match_commentary
+from utils.top_leagues import load_top_league_ids
+from utils.league_reliability import load_league_reliability
+from config.config_loader import build_threshold_dict, get_threshold_default
+from models.calibration import get_calibrator
+
+
+class HtmsMixin:
+    def analyze_match_htms(self, match_id: str) -> Optional[Dict[str, Any]]:
+        """
+        HT/MS focused response for upset-hunting workflows.
+
+        This endpoint is intentionally additive and does not mutate the
+        standard /v20plus/analyze package contract.
+        """
+        data = self._load_match_data(match_id)
+        if data is None:
+            return None
+
+        if str(data.sport or "").lower() != "football":
+            return {
+                "status": "skip",
+                "match_id": match_id,
+                "reason": "unsupported_sport",
+                "engine_used": "htms_router",
+            }
+
+        is_top_league = self._is_top_league(data.league_id)
+        engine_used = "v20plus_top_htms"
+
+        # Hard gate: HT/MS upset model is trained on top leagues only.
+        if not is_top_league:
+            return {
+                "status": "skip",
+                "match_id": match_id,
+                "reason": "out_of_training_scope",
+                "engine_used": engine_used,
+                "data_quality": {
+                    "label": "LOW",
+                    "flags": ["league_out_of_scope"],
+                },
+            }
+
+        missing_requirements = self._missing_htms_requirements(data)
+        if missing_requirements:
+            return {
+                "status": "skip",
+                "match_id": match_id,
+                "reason": "missing_critical_data",
+                "missing": missing_requirements,
+                "engine_used": engine_used,
+                "data_quality": {
+                    "label": "LOW",
+                    "flags": [f"missing_{item}" for item in missing_requirements],
+                },
+            }
+
+        base_package = self.analyze_match(match_id)
+        if not base_package:
+            return None
+        data_quality = base_package.get("data_quality", {})
+        market_board = base_package.get("market_board", {})
+        ms_market = market_board.get("MS", {})
+        ht_market = market_board.get("HT", {})
+        htft_probs = market_board.get("HTFT", {}).get("probs", {})
+
+        reversal_probs = {
+            "1/2": float(htft_probs.get("1/2", 0.0)),
+            "2/1": float(htft_probs.get("2/1", 0.0)),
+            "X/1": float(htft_probs.get("X/1", 0.0)),
+            "X/2": float(htft_probs.get("X/2", 0.0)),
+        }
+        top_reversal = max(reversal_probs.items(), key=lambda item: item[1])
+
+        ms_conf = float(ms_market.get("confidence", 0.0))
+        ht_conf = float(ht_market.get("confidence", 0.0))
+        base_conf = (ms_conf + ht_conf) / 2.0
+
+        confidence_cap = 100.0
+        penalties: List[str] = []
+        if data.lineup_source == "probable_xi":
+            confidence_cap = min(confidence_cap, 72.0)
+            penalties.append("lineup_probable_xi")
+        if data.lineup_source == "none":
+            confidence_cap = min(confidence_cap, 58.0)
+            penalties.append("lineup_unavailable")
+        if str(data_quality.get("label", "LOW")).upper() == "LOW":
+            confidence_cap = min(confidence_cap, 55.0)
+            penalties.append("low_data_quality")
+
+        final_conf = min(base_conf, confidence_cap)
+
+        upset_score = self._compute_htms_upset_score(
+            reversal_probs=reversal_probs,
+            odds_data=data.odds_data,
+            is_top_league=is_top_league,
+        )
+        upset_threshold = 58.0 if is_top_league else 54.0
+        upset_playable = (
+            upset_score >= upset_threshold
+            and top_reversal[1] >= 0.045
+            and final_conf >= 45.0
+            and "low_data_quality" not in penalties
+        )
+
+        return {
+            "status": "ok",
+            "engine_used": engine_used,
+            "match_info": base_package.get("match_info", {}),
+            "data_quality": data_quality,
+            "htms_core": {
+                "ms_pick": ms_market.get("pick"),
+                "ms_confidence": round(ms_conf, 1),
+                "ht_pick": ht_market.get("pick"),
+                "ht_confidence": round(ht_conf, 1),
+                "combined_confidence": round(final_conf, 1),
+                "confidence_cap": round(confidence_cap, 1),
+                "penalties": penalties,
+            },
+            "surprise_hunter": {
+                "upset_score": round(upset_score, 1),
+                "threshold": upset_threshold,
+                "playable": upset_playable,
+                "top_reversal_pick": top_reversal[0],
+                "top_reversal_prob": round(top_reversal[1], 4),
+                "reversal_probs": {
+                    key: round(value, 4) for key, value in reversal_probs.items()
+                },
+            },
+            "risk": base_package.get("risk", {}),
+            "reasoning_factors": base_package.get("reasoning_factors", []),
+        }
+
+    def _is_top_league(self, league_id: Optional[str]) -> bool:
+        if not league_id:
+            return False
+        return str(league_id) in self.top_league_ids
+
+    def _missing_htms_requirements(self, data: MatchData) -> List[str]:
+        missing: List[str] = []
+        ms_keys = ("ms_h", "ms_d", "ms_a")
+        ht_keys = ("ht_h", "ht_d", "ht_a")
+        if not all(float(data.odds_data.get(k, 0.0) or 0.0) > 1.0 for k in ms_keys):
+            missing.append("ms_odds")
+        if not all(float(data.odds_data.get(k, 0.0) or 0.0) > 1.0 for k in ht_keys):
+            missing.append("ht_odds")
+
+        return missing
+
+    def _compute_htms_upset_score(
+        self,
+        reversal_probs: Dict[str, float],
+        odds_data: Dict[str, float],
+        is_top_league: bool,
+    ) -> float:
+        ms_h = self._to_float(odds_data.get("ms_h"), 0.0)
+        ms_a = self._to_float(odds_data.get("ms_a"), 0.0)
+        if ms_h <= 1.0 or ms_a <= 1.0:
+            favorite_gap = 0.0
+        else:
+            favorite_gap = abs(ms_h - ms_a)
+
+        reversal_max = max(reversal_probs.values()) if reversal_probs else 0.0
+        reversal_sum = sum(reversal_probs.values())
+
+        # Strong favorite + reversal probability is the core upset signal.
+        gap_factor = min(1.0, favorite_gap / 2.0)
+        score = (
+            (reversal_max * 100.0 * 0.60)
+            + (reversal_sum * 100.0 * 0.25)
+            + (gap_factor * 100.0 * 0.15)
+        )
+
+        if not is_top_league:
+            # Non-top leagues are noisier; keep it slightly conservative.
+            score *= 0.92
+        return max(0.0, min(100.0, score))