""" League Odds Reliability Calculator =================================== Computes per-league Brier Score from historical match results + odds, then derives an odds_reliability factor (0.0 – 1.0) for each league. Output: ai-engine/data/league_reliability.json Used by: SingleMatchOrchestrator to weight odds-based edge calculations. Usage: python3 scripts/compute_league_reliability.py """ from __future__ import annotations import json import os import sys from typing import Any, Dict, List import psycopg2 import psycopg2.extras # ─── Config ────────────────────────────────────────────────────────────── SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) AI_ENGINE_DIR = os.path.join(SCRIPT_DIR, "..") OUTPUT_PATH = os.path.join(AI_ENGINE_DIR, "data", "league_reliability.json") MIN_MATCHES = 50 # Minimum completed matches to compute reliability BRIER_BASELINE = 0.50 # Random-guess Brier Score for 3-way (worst case) BRIER_PERFECT = 0.33 # Theoretical best for well-calibrated 3-way odds def get_dsn() -> str: """Build DSN from environment, matching the AI Engine's own config.""" from dotenv import load_dotenv env_path = os.path.join(AI_ENGINE_DIR, "..", ".env") load_dotenv(env_path) raw = os.getenv("DATABASE_URL", "") if raw.startswith("postgresql://"): return raw.split("?")[0] host = os.getenv("DB_HOST", "localhost") port = os.getenv("DB_PORT", "15432") user = os.getenv("DB_USER", "suggestbet") pw = os.getenv("DB_PASS", "SuGGesT2026SecuRe") db = os.getenv("DB_NAME", "boilerplate_db") return f"postgresql://{user}:{pw}@{host}:{port}/{db}" def compute_league_reliability(conn: Any) -> List[Dict[str, Any]]: """ For each league with enough data, compute: - brier_score: calibration quality of the odds - heavy_fav_win_pct: how often <1.50 favorites actually win - upset_rate: how often heavy favorites lose - odds_reliability: composite 0.0-1.0 score """ cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) print("📊 Computing per-league Brier Scores from match results + odds...") cur.execute(""" WITH ms_odds AS ( SELECT oc.match_id, MAX(CASE WHEN os.name = '1' THEN os.odd_value::float END) AS odds_h, MAX(CASE WHEN os.name = 'X' THEN os.odd_value::float END) AS odds_d, MAX(CASE WHEN os.name = '2' THEN os.odd_value::float END) AS odds_a FROM odd_categories oc JOIN odd_selections os ON os.odd_category_db_id = oc.db_id WHERE oc.name = 'Maç Sonucu' GROUP BY oc.match_id HAVING MAX(CASE WHEN os.name = '1' THEN os.odd_value::float END) > 1.0 AND MAX(CASE WHEN os.name = '2' THEN os.odd_value::float END) > 1.0 ), match_results AS ( SELECT m.league_id, l.name AS league_name, CASE WHEN m.score_home > m.score_away THEN '1' WHEN m.score_home = m.score_away THEN 'X' ELSE '2' END AS result, o.odds_h, o.odds_d, o.odds_a, -- Normalized implied probabilities (1.0 / o.odds_h) / ( (1.0 / o.odds_h) + (1.0 / COALESCE(o.odds_d, 3.3)) + (1.0 / o.odds_a) ) AS ip_home, (1.0 / o.odds_a) / ( (1.0 / o.odds_h) + (1.0 / COALESCE(o.odds_d, 3.3)) + (1.0 / o.odds_a) ) AS ip_away, CASE WHEN o.odds_h < o.odds_a THEN 'H' ELSE 'A' END AS fav_side, LEAST(o.odds_h, o.odds_a) AS fav_odds FROM matches m JOIN ms_odds o ON o.match_id = m.id JOIN leagues l ON m.league_id = l.id WHERE m.status = 'FT' AND m.score_home IS NOT NULL AND m.sport = 'football' ) SELECT league_id, league_name, COUNT(*) AS match_count, -- Brier Score (lower = better odds calibration) AVG( POWER(ip_home - CASE WHEN result = '1' THEN 1.0 ELSE 0.0 END, 2) + POWER(ip_away - CASE WHEN result = '2' THEN 1.0 ELSE 0.0 END, 2) ) AS brier_score, -- Heavy favorite metrics COUNT(CASE WHEN fav_odds < 1.50 THEN 1 END) AS heavy_fav_count, AVG(CASE WHEN fav_odds < 1.50 AND ((fav_side = 'H' AND result = '1') OR (fav_side = 'A' AND result = '2')) THEN 1.0 WHEN fav_odds < 1.50 THEN 0.0 END) AS heavy_fav_win_rate, -- Overall favorite win rate AVG(CASE WHEN (fav_side = 'H' AND result = '1') OR (fav_side = 'A' AND result = '2') THEN 1.0 ELSE 0.0 END) AS fav_win_rate, -- Chaos metric STDDEV( CASE WHEN result = '1' THEN 1 WHEN result = '2' THEN -1 ELSE 0 END ) AS result_volatility FROM match_results GROUP BY league_id, league_name HAVING COUNT(*) >= %s ORDER BY COUNT(*) DESC """, (MIN_MATCHES,)) rows = cur.fetchall() cur.close() print(f" ✅ Found {len(rows)} leagues with >= {MIN_MATCHES} matches") # ── Compute composite odds_reliability ────────────────────────────── results: List[Dict[str, Any]] = [] for row in rows: brier = float(row["brier_score"]) match_count = int(row["match_count"]) heavy_fav_win = float(row["heavy_fav_win_rate"] or 0.65) fav_win = float(row["fav_win_rate"]) # Component 1: Brier-based reliability (0-1, higher = better) # Maps [BRIER_BASELINE .. BRIER_PERFECT] → [0.0 .. 1.0] brier_reliability = max(0.0, min(1.0, (BRIER_BASELINE - brier) / (BRIER_BASELINE - BRIER_PERFECT) )) # Component 2: Sample size confidence (log scale, caps at 500 matches) import math sample_confidence = min(1.0, math.log(max(1, match_count)) / math.log(500)) # Component 3: Heavy favorite predictability # If heavy fav wins 80%+ → odds are very reliable; if 55% → chaotic fav_reliability = max(0.0, min(1.0, (heavy_fav_win - 0.55) / (0.80 - 0.55))) # Composite: weighted blend # Brier is the primary signal (60%), sample size (20%), fav reliability (20%) odds_reliability = ( brier_reliability * 0.60 + sample_confidence * 0.20 + fav_reliability * 0.20 ) results.append({ "league_id": row["league_id"], "league_name": row["league_name"], "match_count": match_count, "brier_score": round(brier, 4), "heavy_fav_win_pct": round(heavy_fav_win * 100, 1), "fav_win_pct": round(fav_win * 100, 1), "odds_reliability": round(odds_reliability, 4), }) # Sort by reliability descending results.sort(key=lambda x: x["odds_reliability"], reverse=True) return results def build_lookup(results: List[Dict[str, Any]]) -> Dict[str, float]: """Build league_id → odds_reliability lookup for the orchestrator.""" return {r["league_id"]: r["odds_reliability"] for r in results} def main() -> None: dsn = get_dsn() print(f"🔗 Connecting to database...") conn = psycopg2.connect(dsn) try: results = compute_league_reliability(conn) # Build output structure output = { "version": "v1", "description": "Per-league odds reliability scores computed from Brier Score analysis", "min_matches_threshold": MIN_MATCHES, "total_leagues": len(results), "default_reliability": 0.35, # fallback for unknown leagues "lookup": build_lookup(results), "details": results[:50], # top 50 for human reference } # Ensure output directory exists os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True) with open(OUTPUT_PATH, "w", encoding="utf-8") as f: json.dump(output, f, indent=2, ensure_ascii=False) print(f"\n✅ Saved {len(results)} league reliability scores to {OUTPUT_PATH}") print(f"\n📈 Top 10 most reliable leagues:") for i, r in enumerate(results[:10], 1): print(f" {i:2d}. {r['league_name']:25s} | Brier: {r['brier_score']:.4f} | " f"Reliability: {r['odds_reliability']:.4f} | " f"Heavy Fav: {r['heavy_fav_win_pct']:.1f}% | " f"N={r['match_count']}") print(f"\n📉 Bottom 10 (least reliable):") for i, r in enumerate(results[-10:], 1): print(f" {i:2d}. {r['league_name']:25s} | Brier: {r['brier_score']:.4f} | " f"Reliability: {r['odds_reliability']:.4f} | " f"Heavy Fav: {r['heavy_fav_win_pct']:.1f}% | " f"N={r['match_count']}") finally: conn.close() if __name__ == "__main__": main()