This commit is contained in:
@@ -0,0 +1,248 @@
|
||||
"""
|
||||
League Odds Reliability Calculator
|
||||
===================================
|
||||
Computes per-league Brier Score from historical match results + odds,
|
||||
then derives an odds_reliability factor (0.0 – 1.0) for each league.
|
||||
|
||||
Output: ai-engine/data/league_reliability.json
|
||||
Used by: SingleMatchOrchestrator to weight odds-based edge calculations.
|
||||
|
||||
Usage:
|
||||
python3 scripts/compute_league_reliability.py
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
|
||||
# ─── Config ──────────────────────────────────────────────────────────────
|
||||
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
AI_ENGINE_DIR = os.path.join(SCRIPT_DIR, "..")
|
||||
OUTPUT_PATH = os.path.join(AI_ENGINE_DIR, "data", "league_reliability.json")
|
||||
|
||||
MIN_MATCHES = 50 # Minimum completed matches to compute reliability
|
||||
BRIER_BASELINE = 0.50 # Random-guess Brier Score for 3-way (worst case)
|
||||
BRIER_PERFECT = 0.33 # Theoretical best for well-calibrated 3-way odds
|
||||
|
||||
|
||||
def get_dsn() -> str:
|
||||
"""Build DSN from environment, matching the AI Engine's own config."""
|
||||
from dotenv import load_dotenv
|
||||
|
||||
env_path = os.path.join(AI_ENGINE_DIR, "..", ".env")
|
||||
load_dotenv(env_path)
|
||||
|
||||
raw = os.getenv("DATABASE_URL", "")
|
||||
if raw.startswith("postgresql://"):
|
||||
return raw.split("?")[0]
|
||||
|
||||
host = os.getenv("DB_HOST", "localhost")
|
||||
port = os.getenv("DB_PORT", "15432")
|
||||
user = os.getenv("DB_USER", "suggestbet")
|
||||
pw = os.getenv("DB_PASS", "SuGGesT2026SecuRe")
|
||||
db = os.getenv("DB_NAME", "boilerplate_db")
|
||||
return f"postgresql://{user}:{pw}@{host}:{port}/{db}"
|
||||
|
||||
|
||||
def compute_league_reliability(conn: Any) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
For each league with enough data, compute:
|
||||
- brier_score: calibration quality of the odds
|
||||
- heavy_fav_win_pct: how often <1.50 favorites actually win
|
||||
- upset_rate: how often heavy favorites lose
|
||||
- odds_reliability: composite 0.0-1.0 score
|
||||
"""
|
||||
cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
|
||||
|
||||
print("📊 Computing per-league Brier Scores from match results + odds...")
|
||||
|
||||
cur.execute("""
|
||||
WITH ms_odds AS (
|
||||
SELECT
|
||||
oc.match_id,
|
||||
MAX(CASE WHEN os.name = '1' THEN os.odd_value::float END) AS odds_h,
|
||||
MAX(CASE WHEN os.name = 'X' THEN os.odd_value::float END) AS odds_d,
|
||||
MAX(CASE WHEN os.name = '2' THEN os.odd_value::float END) AS odds_a
|
||||
FROM odd_categories oc
|
||||
JOIN odd_selections os ON os.odd_category_db_id = oc.db_id
|
||||
WHERE oc.name = 'Maç Sonucu'
|
||||
GROUP BY oc.match_id
|
||||
HAVING MAX(CASE WHEN os.name = '1' THEN os.odd_value::float END) > 1.0
|
||||
AND MAX(CASE WHEN os.name = '2' THEN os.odd_value::float END) > 1.0
|
||||
),
|
||||
match_results AS (
|
||||
SELECT
|
||||
m.league_id,
|
||||
l.name AS league_name,
|
||||
CASE
|
||||
WHEN m.score_home > m.score_away THEN '1'
|
||||
WHEN m.score_home = m.score_away THEN 'X'
|
||||
ELSE '2'
|
||||
END AS result,
|
||||
o.odds_h, o.odds_d, o.odds_a,
|
||||
-- Normalized implied probabilities
|
||||
(1.0 / o.odds_h) / (
|
||||
(1.0 / o.odds_h) +
|
||||
(1.0 / COALESCE(o.odds_d, 3.3)) +
|
||||
(1.0 / o.odds_a)
|
||||
) AS ip_home,
|
||||
(1.0 / o.odds_a) / (
|
||||
(1.0 / o.odds_h) +
|
||||
(1.0 / COALESCE(o.odds_d, 3.3)) +
|
||||
(1.0 / o.odds_a)
|
||||
) AS ip_away,
|
||||
CASE WHEN o.odds_h < o.odds_a THEN 'H' ELSE 'A' END AS fav_side,
|
||||
LEAST(o.odds_h, o.odds_a) AS fav_odds
|
||||
FROM matches m
|
||||
JOIN ms_odds o ON o.match_id = m.id
|
||||
JOIN leagues l ON m.league_id = l.id
|
||||
WHERE m.status = 'FT'
|
||||
AND m.score_home IS NOT NULL
|
||||
AND m.sport = 'football'
|
||||
)
|
||||
SELECT
|
||||
league_id,
|
||||
league_name,
|
||||
COUNT(*) AS match_count,
|
||||
|
||||
-- Brier Score (lower = better odds calibration)
|
||||
AVG(
|
||||
POWER(ip_home - CASE WHEN result = '1' THEN 1.0 ELSE 0.0 END, 2) +
|
||||
POWER(ip_away - CASE WHEN result = '2' THEN 1.0 ELSE 0.0 END, 2)
|
||||
) AS brier_score,
|
||||
|
||||
-- Heavy favorite metrics
|
||||
COUNT(CASE WHEN fav_odds < 1.50 THEN 1 END) AS heavy_fav_count,
|
||||
AVG(CASE
|
||||
WHEN fav_odds < 1.50
|
||||
AND ((fav_side = 'H' AND result = '1') OR (fav_side = 'A' AND result = '2'))
|
||||
THEN 1.0
|
||||
WHEN fav_odds < 1.50 THEN 0.0
|
||||
END) AS heavy_fav_win_rate,
|
||||
|
||||
-- Overall favorite win rate
|
||||
AVG(CASE
|
||||
WHEN (fav_side = 'H' AND result = '1') OR (fav_side = 'A' AND result = '2')
|
||||
THEN 1.0 ELSE 0.0
|
||||
END) AS fav_win_rate,
|
||||
|
||||
-- Chaos metric
|
||||
STDDEV(
|
||||
CASE WHEN result = '1' THEN 1 WHEN result = '2' THEN -1 ELSE 0 END
|
||||
) AS result_volatility
|
||||
|
||||
FROM match_results
|
||||
GROUP BY league_id, league_name
|
||||
HAVING COUNT(*) >= %s
|
||||
ORDER BY COUNT(*) DESC
|
||||
""", (MIN_MATCHES,))
|
||||
|
||||
rows = cur.fetchall()
|
||||
cur.close()
|
||||
|
||||
print(f" ✅ Found {len(rows)} leagues with >= {MIN_MATCHES} matches")
|
||||
|
||||
# ── Compute composite odds_reliability ──────────────────────────────
|
||||
results: List[Dict[str, Any]] = []
|
||||
|
||||
for row in rows:
|
||||
brier = float(row["brier_score"])
|
||||
match_count = int(row["match_count"])
|
||||
heavy_fav_win = float(row["heavy_fav_win_rate"] or 0.65)
|
||||
fav_win = float(row["fav_win_rate"])
|
||||
|
||||
# Component 1: Brier-based reliability (0-1, higher = better)
|
||||
# Maps [BRIER_BASELINE .. BRIER_PERFECT] → [0.0 .. 1.0]
|
||||
brier_reliability = max(0.0, min(1.0,
|
||||
(BRIER_BASELINE - brier) / (BRIER_BASELINE - BRIER_PERFECT)
|
||||
))
|
||||
|
||||
# Component 2: Sample size confidence (log scale, caps at 500 matches)
|
||||
import math
|
||||
sample_confidence = min(1.0, math.log(max(1, match_count)) / math.log(500))
|
||||
|
||||
# Component 3: Heavy favorite predictability
|
||||
# If heavy fav wins 80%+ → odds are very reliable; if 55% → chaotic
|
||||
fav_reliability = max(0.0, min(1.0, (heavy_fav_win - 0.55) / (0.80 - 0.55)))
|
||||
|
||||
# Composite: weighted blend
|
||||
# Brier is the primary signal (60%), sample size (20%), fav reliability (20%)
|
||||
odds_reliability = (
|
||||
brier_reliability * 0.60 +
|
||||
sample_confidence * 0.20 +
|
||||
fav_reliability * 0.20
|
||||
)
|
||||
|
||||
results.append({
|
||||
"league_id": row["league_id"],
|
||||
"league_name": row["league_name"],
|
||||
"match_count": match_count,
|
||||
"brier_score": round(brier, 4),
|
||||
"heavy_fav_win_pct": round(heavy_fav_win * 100, 1),
|
||||
"fav_win_pct": round(fav_win * 100, 1),
|
||||
"odds_reliability": round(odds_reliability, 4),
|
||||
})
|
||||
|
||||
# Sort by reliability descending
|
||||
results.sort(key=lambda x: x["odds_reliability"], reverse=True)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def build_lookup(results: List[Dict[str, Any]]) -> Dict[str, float]:
|
||||
"""Build league_id → odds_reliability lookup for the orchestrator."""
|
||||
return {r["league_id"]: r["odds_reliability"] for r in results}
|
||||
|
||||
|
||||
def main() -> None:
|
||||
dsn = get_dsn()
|
||||
print(f"🔗 Connecting to database...")
|
||||
conn = psycopg2.connect(dsn)
|
||||
|
||||
try:
|
||||
results = compute_league_reliability(conn)
|
||||
|
||||
# Build output structure
|
||||
output = {
|
||||
"version": "v1",
|
||||
"description": "Per-league odds reliability scores computed from Brier Score analysis",
|
||||
"min_matches_threshold": MIN_MATCHES,
|
||||
"total_leagues": len(results),
|
||||
"default_reliability": 0.35, # fallback for unknown leagues
|
||||
"lookup": build_lookup(results),
|
||||
"details": results[:50], # top 50 for human reference
|
||||
}
|
||||
|
||||
# Ensure output directory exists
|
||||
os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True)
|
||||
|
||||
with open(OUTPUT_PATH, "w", encoding="utf-8") as f:
|
||||
json.dump(output, f, indent=2, ensure_ascii=False)
|
||||
|
||||
print(f"\n✅ Saved {len(results)} league reliability scores to {OUTPUT_PATH}")
|
||||
print(f"\n📈 Top 10 most reliable leagues:")
|
||||
for i, r in enumerate(results[:10], 1):
|
||||
print(f" {i:2d}. {r['league_name']:25s} | Brier: {r['brier_score']:.4f} | "
|
||||
f"Reliability: {r['odds_reliability']:.4f} | "
|
||||
f"Heavy Fav: {r['heavy_fav_win_pct']:.1f}% | "
|
||||
f"N={r['match_count']}")
|
||||
|
||||
print(f"\n📉 Bottom 10 (least reliable):")
|
||||
for i, r in enumerate(results[-10:], 1):
|
||||
print(f" {i:2d}. {r['league_name']:25s} | Brier: {r['brier_score']:.4f} | "
|
||||
f"Reliability: {r['odds_reliability']:.4f} | "
|
||||
f"Heavy Fav: {r['heavy_fav_win_pct']:.1f}% | "
|
||||
f"N={r['match_count']}")
|
||||
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user