This commit is contained in:
@@ -91,22 +91,26 @@ class Calibrator:
|
||||
def __init__(self):
|
||||
self.calibrators: Dict[str, IsotonicRegression] = {}
|
||||
self.metrics: Dict[str, CalibrationMetrics] = {}
|
||||
# Less aggressive shrinkage — only meaningful overconfident bands are pulled.
|
||||
# Default raised from ~0.85-0.90 to 0.95+ since the orchestrator and config
|
||||
# already apply market-level multipliers; double-shrinkage was the root cause
|
||||
# of 24-35pt avg calibrated-vs-raw drops in production traces.
|
||||
self.heuristic_fallback: Dict[str, float] = {
|
||||
"ms": 0.90,
|
||||
"ms_home": 0.90,
|
||||
"ms_home_heavy_fav": 0.95,
|
||||
"ms_home_fav": 0.90,
|
||||
"ms_home_balanced": 0.85,
|
||||
"ms_home_underdog": 0.80,
|
||||
"ms_draw": 0.90,
|
||||
"ms_away": 0.90,
|
||||
"ou15": 0.90,
|
||||
"ou25": 0.90,
|
||||
"ou35": 0.90,
|
||||
"btts": 0.90,
|
||||
"ht_ft": 0.85,
|
||||
"dc": 0.93,
|
||||
"ht": 0.85,
|
||||
"ms": 0.96,
|
||||
"ms_home": 0.96,
|
||||
"ms_home_heavy_fav": 0.98,
|
||||
"ms_home_fav": 0.96,
|
||||
"ms_home_balanced": 0.94,
|
||||
"ms_home_underdog": 0.92,
|
||||
"ms_draw": 0.94,
|
||||
"ms_away": 0.96,
|
||||
"ou15": 0.96,
|
||||
"ou25": 0.96,
|
||||
"ou35": 0.94,
|
||||
"btts": 0.96,
|
||||
"ht_ft": 0.92,
|
||||
"dc": 0.97,
|
||||
"ht": 0.92,
|
||||
}
|
||||
self._load_calibrators()
|
||||
|
||||
@@ -139,21 +143,32 @@ class Calibrator:
|
||||
except Exception as e:
|
||||
print(f"[Calibrator] Warning: Failed to load metrics for {market}: {e}")
|
||||
|
||||
# Below this sample count, blend isotonic with raw_prob to dampen overfit jumps.
|
||||
# Above this count, trust isotonic fully.
|
||||
TRUSTED_SAMPLE_FLOOR = 30
|
||||
TRUSTED_SAMPLE_CEILING = 200
|
||||
# Hard cap on how far calibration can move probability in either direction.
|
||||
MAX_DELTA = 0.20
|
||||
|
||||
def calibrate(self, market_type: str, raw_prob: float, odds_val: Optional[float] = None) -> float:
|
||||
"""
|
||||
Calibrate a raw probability using Isotonic Regression.
|
||||
|
||||
Calibrate a raw probability using Isotonic Regression with safeguards.
|
||||
|
||||
Args:
|
||||
market_type (str): 'ms_home', 'ou25', 'btts', 'ht_ft', etc.
|
||||
raw_prob (float): The raw probability from XGBoost (0.0 - 1.0)
|
||||
odds_val (float, optional): The pre-match odds, used for context-aware bucket mapping
|
||||
|
||||
|
||||
Returns:
|
||||
float: Calibrated probability (0.0 - 1.0)
|
||||
|
||||
Safeguards:
|
||||
* Low-sample trained models are blended with raw_prob to dampen overfit.
|
||||
* MAX_DELTA caps the per-call adjustment (prevents 40pp swings).
|
||||
"""
|
||||
# Normalize market type
|
||||
market_key = market_type.lower().replace("-", "_")
|
||||
|
||||
|
||||
# Route to bucket if ms_home and odds provided
|
||||
if market_key == "ms_home" and odds_val is not None and odds_val > 1.0:
|
||||
if odds_val <= 1.40:
|
||||
@@ -164,20 +179,42 @@ class Calibrator:
|
||||
bucket_key = "ms_home_balanced"
|
||||
else:
|
||||
bucket_key = "ms_home_underdog"
|
||||
|
||||
|
||||
if bucket_key in self.calibrators:
|
||||
market_key = bucket_key
|
||||
|
||||
# If we have a trained Isotonic Regression model, use it
|
||||
|
||||
# If we have a trained Isotonic Regression model, use it (with safeguards)
|
||||
if market_key in self.calibrators:
|
||||
try:
|
||||
calibrated = self.calibrators[market_key].predict([raw_prob])[0]
|
||||
# Ensure output is valid probability
|
||||
return float(np.clip(calibrated, 0.01, 0.99))
|
||||
iso_pred = float(self.calibrators[market_key].predict([raw_prob])[0])
|
||||
|
||||
# Sample-count weighted blend with raw probability.
|
||||
# Sparse models barely move probability; mature models dominate.
|
||||
metrics = self.metrics.get(market_key)
|
||||
n_samples = metrics.sample_count if metrics else 0
|
||||
if n_samples >= self.TRUSTED_SAMPLE_CEILING:
|
||||
iso_weight = 1.0
|
||||
elif n_samples <= self.TRUSTED_SAMPLE_FLOOR:
|
||||
# Very sparse: at least 30% trust to surface the signal
|
||||
iso_weight = max(0.30, n_samples / self.TRUSTED_SAMPLE_CEILING)
|
||||
else:
|
||||
# Linearly ramp 30% → 100% between floor and ceiling
|
||||
span = self.TRUSTED_SAMPLE_CEILING - self.TRUSTED_SAMPLE_FLOOR
|
||||
iso_weight = 0.30 + 0.70 * (n_samples - self.TRUSTED_SAMPLE_FLOOR) / span
|
||||
blended = iso_weight * iso_pred + (1.0 - iso_weight) * raw_prob
|
||||
|
||||
# Cap delta to avoid huge swings on noisy calibrators
|
||||
delta = blended - raw_prob
|
||||
if delta > self.MAX_DELTA:
|
||||
blended = raw_prob + self.MAX_DELTA
|
||||
elif delta < -self.MAX_DELTA:
|
||||
blended = raw_prob - self.MAX_DELTA
|
||||
|
||||
return float(np.clip(blended, 0.01, 0.99))
|
||||
except Exception as e:
|
||||
print(f"[Calibrator] Warning: Isotonic failed for {market_key}: {e}")
|
||||
# Fall through to heuristic
|
||||
|
||||
|
||||
# Fallback to heuristic calibration
|
||||
return self._heuristic_calibrate(market_key, raw_prob)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user