"""HT/MS Mixin — analyze_match_htms endpoint and helpers. Auto-extracted mixin module — split from services/single_match_orchestrator.py. All methods here are composed into SingleMatchOrchestrator via inheritance. `self` attributes (self.dsn, self.enrichment, self.v25_predictor, etc.) are initialised in the main __init__. """ from __future__ import annotations import json import re import time import math import os import pickle from collections import defaultdict from typing import Any, Dict, List, Optional, Set, Tuple, overload import pandas as pd import numpy as np import psycopg2 from psycopg2.extras import RealDictCursor from data.db import get_clean_dsn from schemas.prediction import FullMatchPrediction from schemas.match_data import MatchData from models.v25_ensemble import V25Predictor, get_v25_predictor try: from models.v27_predictor import V27Predictor, compute_divergence, compute_value_edge except ImportError: class V27Predictor: # type: ignore[no-redef] def __init__(self): self.models = {} def load_models(self): return False def predict_all(self, features): return {} def compute_divergence(*args, **kwargs): return {} def compute_value_edge(*args, **kwargs): return {} from features.odds_band_analyzer import OddsBandAnalyzer try: from models.basketball_v25 import ( BasketballMatchPrediction, get_basketball_v25_predictor, ) except ImportError: BasketballMatchPrediction = Any # type: ignore[misc] def get_basketball_v25_predictor() -> Any: raise ImportError("Basketball predictor is not available") from core.engines.player_predictor import PlayerPrediction, get_player_predictor from services.feature_enrichment import FeatureEnrichmentService from services.betting_brain import BettingBrain from services.v26_shadow_engine import V26ShadowEngine, get_v26_shadow_engine from services.match_commentary import generate_match_commentary from utils.top_leagues import load_top_league_ids from utils.league_reliability import load_league_reliability from config.config_loader import build_threshold_dict, get_threshold_default from models.calibration import get_calibrator class HtmsMixin: def analyze_match_htms(self, match_id: str) -> Optional[Dict[str, Any]]: """ HT/MS focused response for upset-hunting workflows. This endpoint is intentionally additive and does not mutate the standard /v20plus/analyze package contract. """ data = self._load_match_data(match_id) if data is None: return None if str(data.sport or "").lower() != "football": return { "status": "skip", "match_id": match_id, "reason": "unsupported_sport", "engine_used": "htms_router", } is_top_league = self._is_top_league(data.league_id) engine_used = "v20plus_top_htms" # Hard gate: HT/MS upset model is trained on top leagues only. if not is_top_league: return { "status": "skip", "match_id": match_id, "reason": "out_of_training_scope", "engine_used": engine_used, "data_quality": { "label": "LOW", "flags": ["league_out_of_scope"], }, } missing_requirements = self._missing_htms_requirements(data) if missing_requirements: return { "status": "skip", "match_id": match_id, "reason": "missing_critical_data", "missing": missing_requirements, "engine_used": engine_used, "data_quality": { "label": "LOW", "flags": [f"missing_{item}" for item in missing_requirements], }, } base_package = self.analyze_match(match_id) if not base_package: return None data_quality = base_package.get("data_quality", {}) market_board = base_package.get("market_board", {}) ms_market = market_board.get("MS", {}) ht_market = market_board.get("HT", {}) htft_probs = market_board.get("HTFT", {}).get("probs", {}) reversal_probs = { "1/2": float(htft_probs.get("1/2", 0.0)), "2/1": float(htft_probs.get("2/1", 0.0)), "X/1": float(htft_probs.get("X/1", 0.0)), "X/2": float(htft_probs.get("X/2", 0.0)), } top_reversal = max(reversal_probs.items(), key=lambda item: item[1]) ms_conf = float(ms_market.get("confidence", 0.0)) ht_conf = float(ht_market.get("confidence", 0.0)) base_conf = (ms_conf + ht_conf) / 2.0 confidence_cap = 100.0 penalties: List[str] = [] if data.lineup_source == "probable_xi": confidence_cap = min(confidence_cap, 72.0) penalties.append("lineup_probable_xi") if data.lineup_source == "none": confidence_cap = min(confidence_cap, 58.0) penalties.append("lineup_unavailable") if str(data_quality.get("label", "LOW")).upper() == "LOW": confidence_cap = min(confidence_cap, 55.0) penalties.append("low_data_quality") final_conf = min(base_conf, confidence_cap) upset_score = self._compute_htms_upset_score( reversal_probs=reversal_probs, odds_data=data.odds_data, is_top_league=is_top_league, ) upset_threshold = 58.0 if is_top_league else 54.0 upset_playable = ( upset_score >= upset_threshold and top_reversal[1] >= 0.045 and final_conf >= 45.0 and "low_data_quality" not in penalties ) return { "status": "ok", "engine_used": engine_used, "match_info": base_package.get("match_info", {}), "data_quality": data_quality, "htms_core": { "ms_pick": ms_market.get("pick"), "ms_confidence": round(ms_conf, 1), "ht_pick": ht_market.get("pick"), "ht_confidence": round(ht_conf, 1), "combined_confidence": round(final_conf, 1), "confidence_cap": round(confidence_cap, 1), "penalties": penalties, }, "surprise_hunter": { "upset_score": round(upset_score, 1), "threshold": upset_threshold, "playable": upset_playable, "top_reversal_pick": top_reversal[0], "top_reversal_prob": round(top_reversal[1], 4), "reversal_probs": { key: round(value, 4) for key, value in reversal_probs.items() }, }, "risk": base_package.get("risk", {}), "reasoning_factors": base_package.get("reasoning_factors", []), } def _is_top_league(self, league_id: Optional[str]) -> bool: if not league_id: return False return str(league_id) in self.top_league_ids def _missing_htms_requirements(self, data: MatchData) -> List[str]: missing: List[str] = [] ms_keys = ("ms_h", "ms_d", "ms_a") ht_keys = ("ht_h", "ht_d", "ht_a") if not all(float(data.odds_data.get(k, 0.0) or 0.0) > 1.0 for k in ms_keys): missing.append("ms_odds") if not all(float(data.odds_data.get(k, 0.0) or 0.0) > 1.0 for k in ht_keys): missing.append("ht_odds") return missing def _compute_htms_upset_score( self, reversal_probs: Dict[str, float], odds_data: Dict[str, float], is_top_league: bool, ) -> float: ms_h = self._to_float(odds_data.get("ms_h"), 0.0) ms_a = self._to_float(odds_data.get("ms_a"), 0.0) if ms_h <= 1.0 or ms_a <= 1.0: favorite_gap = 0.0 else: favorite_gap = abs(ms_h - ms_a) reversal_max = max(reversal_probs.values()) if reversal_probs else 0.0 reversal_sum = sum(reversal_probs.values()) # Strong favorite + reversal probability is the core upset signal. gap_factor = min(1.0, favorite_gap / 2.0) score = ( (reversal_max * 100.0 * 0.60) + (reversal_sum * 100.0 * 0.25) + (gap_factor * 100.0 * 0.15) ) if not is_top_league: # Non-top leagues are noisier; keep it slightly conservative. score *= 0.92 return max(0.0, min(100.0, score))