first (part 2: other directories)
Deploy Iddaai Backend / build-and-deploy (push) Failing after 18s

This commit is contained in:
2026-04-16 15:11:25 +03:00
parent 7814e0bc6b
commit 2f0b85a0c7
203 changed files with 59989 additions and 0 deletions
@@ -0,0 +1,248 @@
"""
League Odds Reliability Calculator
===================================
Computes per-league Brier Score from historical match results + odds,
then derives an odds_reliability factor (0.0 1.0) for each league.
Output: ai-engine/data/league_reliability.json
Used by: SingleMatchOrchestrator to weight odds-based edge calculations.
Usage:
python3 scripts/compute_league_reliability.py
"""
from __future__ import annotations
import json
import os
import sys
from typing import Any, Dict, List
import psycopg2
import psycopg2.extras
# ─── Config ──────────────────────────────────────────────────────────────
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
AI_ENGINE_DIR = os.path.join(SCRIPT_DIR, "..")
OUTPUT_PATH = os.path.join(AI_ENGINE_DIR, "data", "league_reliability.json")
MIN_MATCHES = 50 # Minimum completed matches to compute reliability
BRIER_BASELINE = 0.50 # Random-guess Brier Score for 3-way (worst case)
BRIER_PERFECT = 0.33 # Theoretical best for well-calibrated 3-way odds
def get_dsn() -> str:
"""Build DSN from environment, matching the AI Engine's own config."""
from dotenv import load_dotenv
env_path = os.path.join(AI_ENGINE_DIR, "..", ".env")
load_dotenv(env_path)
raw = os.getenv("DATABASE_URL", "")
if raw.startswith("postgresql://"):
return raw.split("?")[0]
host = os.getenv("DB_HOST", "localhost")
port = os.getenv("DB_PORT", "15432")
user = os.getenv("DB_USER", "suggestbet")
pw = os.getenv("DB_PASS", "SuGGesT2026SecuRe")
db = os.getenv("DB_NAME", "boilerplate_db")
return f"postgresql://{user}:{pw}@{host}:{port}/{db}"
def compute_league_reliability(conn: Any) -> List[Dict[str, Any]]:
"""
For each league with enough data, compute:
- brier_score: calibration quality of the odds
- heavy_fav_win_pct: how often <1.50 favorites actually win
- upset_rate: how often heavy favorites lose
- odds_reliability: composite 0.0-1.0 score
"""
cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
print("📊 Computing per-league Brier Scores from match results + odds...")
cur.execute("""
WITH ms_odds AS (
SELECT
oc.match_id,
MAX(CASE WHEN os.name = '1' THEN os.odd_value::float END) AS odds_h,
MAX(CASE WHEN os.name = 'X' THEN os.odd_value::float END) AS odds_d,
MAX(CASE WHEN os.name = '2' THEN os.odd_value::float END) AS odds_a
FROM odd_categories oc
JOIN odd_selections os ON os.odd_category_db_id = oc.db_id
WHERE oc.name = 'Maç Sonucu'
GROUP BY oc.match_id
HAVING MAX(CASE WHEN os.name = '1' THEN os.odd_value::float END) > 1.0
AND MAX(CASE WHEN os.name = '2' THEN os.odd_value::float END) > 1.0
),
match_results AS (
SELECT
m.league_id,
l.name AS league_name,
CASE
WHEN m.score_home > m.score_away THEN '1'
WHEN m.score_home = m.score_away THEN 'X'
ELSE '2'
END AS result,
o.odds_h, o.odds_d, o.odds_a,
-- Normalized implied probabilities
(1.0 / o.odds_h) / (
(1.0 / o.odds_h) +
(1.0 / COALESCE(o.odds_d, 3.3)) +
(1.0 / o.odds_a)
) AS ip_home,
(1.0 / o.odds_a) / (
(1.0 / o.odds_h) +
(1.0 / COALESCE(o.odds_d, 3.3)) +
(1.0 / o.odds_a)
) AS ip_away,
CASE WHEN o.odds_h < o.odds_a THEN 'H' ELSE 'A' END AS fav_side,
LEAST(o.odds_h, o.odds_a) AS fav_odds
FROM matches m
JOIN ms_odds o ON o.match_id = m.id
JOIN leagues l ON m.league_id = l.id
WHERE m.status = 'FT'
AND m.score_home IS NOT NULL
AND m.sport = 'football'
)
SELECT
league_id,
league_name,
COUNT(*) AS match_count,
-- Brier Score (lower = better odds calibration)
AVG(
POWER(ip_home - CASE WHEN result = '1' THEN 1.0 ELSE 0.0 END, 2) +
POWER(ip_away - CASE WHEN result = '2' THEN 1.0 ELSE 0.0 END, 2)
) AS brier_score,
-- Heavy favorite metrics
COUNT(CASE WHEN fav_odds < 1.50 THEN 1 END) AS heavy_fav_count,
AVG(CASE
WHEN fav_odds < 1.50
AND ((fav_side = 'H' AND result = '1') OR (fav_side = 'A' AND result = '2'))
THEN 1.0
WHEN fav_odds < 1.50 THEN 0.0
END) AS heavy_fav_win_rate,
-- Overall favorite win rate
AVG(CASE
WHEN (fav_side = 'H' AND result = '1') OR (fav_side = 'A' AND result = '2')
THEN 1.0 ELSE 0.0
END) AS fav_win_rate,
-- Chaos metric
STDDEV(
CASE WHEN result = '1' THEN 1 WHEN result = '2' THEN -1 ELSE 0 END
) AS result_volatility
FROM match_results
GROUP BY league_id, league_name
HAVING COUNT(*) >= %s
ORDER BY COUNT(*) DESC
""", (MIN_MATCHES,))
rows = cur.fetchall()
cur.close()
print(f" ✅ Found {len(rows)} leagues with >= {MIN_MATCHES} matches")
# ── Compute composite odds_reliability ──────────────────────────────
results: List[Dict[str, Any]] = []
for row in rows:
brier = float(row["brier_score"])
match_count = int(row["match_count"])
heavy_fav_win = float(row["heavy_fav_win_rate"] or 0.65)
fav_win = float(row["fav_win_rate"])
# Component 1: Brier-based reliability (0-1, higher = better)
# Maps [BRIER_BASELINE .. BRIER_PERFECT] → [0.0 .. 1.0]
brier_reliability = max(0.0, min(1.0,
(BRIER_BASELINE - brier) / (BRIER_BASELINE - BRIER_PERFECT)
))
# Component 2: Sample size confidence (log scale, caps at 500 matches)
import math
sample_confidence = min(1.0, math.log(max(1, match_count)) / math.log(500))
# Component 3: Heavy favorite predictability
# If heavy fav wins 80%+ → odds are very reliable; if 55% → chaotic
fav_reliability = max(0.0, min(1.0, (heavy_fav_win - 0.55) / (0.80 - 0.55)))
# Composite: weighted blend
# Brier is the primary signal (60%), sample size (20%), fav reliability (20%)
odds_reliability = (
brier_reliability * 0.60 +
sample_confidence * 0.20 +
fav_reliability * 0.20
)
results.append({
"league_id": row["league_id"],
"league_name": row["league_name"],
"match_count": match_count,
"brier_score": round(brier, 4),
"heavy_fav_win_pct": round(heavy_fav_win * 100, 1),
"fav_win_pct": round(fav_win * 100, 1),
"odds_reliability": round(odds_reliability, 4),
})
# Sort by reliability descending
results.sort(key=lambda x: x["odds_reliability"], reverse=True)
return results
def build_lookup(results: List[Dict[str, Any]]) -> Dict[str, float]:
"""Build league_id → odds_reliability lookup for the orchestrator."""
return {r["league_id"]: r["odds_reliability"] for r in results}
def main() -> None:
dsn = get_dsn()
print(f"🔗 Connecting to database...")
conn = psycopg2.connect(dsn)
try:
results = compute_league_reliability(conn)
# Build output structure
output = {
"version": "v1",
"description": "Per-league odds reliability scores computed from Brier Score analysis",
"min_matches_threshold": MIN_MATCHES,
"total_leagues": len(results),
"default_reliability": 0.35, # fallback for unknown leagues
"lookup": build_lookup(results),
"details": results[:50], # top 50 for human reference
}
# Ensure output directory exists
os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True)
with open(OUTPUT_PATH, "w", encoding="utf-8") as f:
json.dump(output, f, indent=2, ensure_ascii=False)
print(f"\n✅ Saved {len(results)} league reliability scores to {OUTPUT_PATH}")
print(f"\n📈 Top 10 most reliable leagues:")
for i, r in enumerate(results[:10], 1):
print(f" {i:2d}. {r['league_name']:25s} | Brier: {r['brier_score']:.4f} | "
f"Reliability: {r['odds_reliability']:.4f} | "
f"Heavy Fav: {r['heavy_fav_win_pct']:.1f}% | "
f"N={r['match_count']}")
print(f"\n📉 Bottom 10 (least reliable):")
for i, r in enumerate(results[-10:], 1):
print(f" {i:2d}. {r['league_name']:25s} | Brier: {r['brier_score']:.4f} | "
f"Reliability: {r['odds_reliability']:.4f} | "
f"Heavy Fav: {r['heavy_fav_win_pct']:.1f}% | "
f"N={r['match_count']}")
finally:
conn.close()
if __name__ == "__main__":
main()