""" Multi-market hit-rate backtest. Runs the orchestrator against historical finished matches and measures raw V25 pick accuracy per market — independent of the "playable" gate. This isolates model quality from the value-detection thresholds. Usage: python scripts/backtest_hitrate.py --start 2026-05-01 --end 2026-05-09 [--limit 500] """ from __future__ import annotations import argparse import json import os import sys import time from collections import defaultdict from typing import Any, Dict, List, Optional, Tuple import psycopg2 from psycopg2.extras import RealDictCursor sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from data.db import get_clean_dsn from services.single_match_orchestrator import SingleMatchOrchestrator def fetch_matches(cur, start_date: str, end_date: str, limit: Optional[int]) -> List[Dict[str, Any]]: cur.execute( """ SELECT m.id, m.score_home, m.score_away, m.ht_score_home, m.ht_score_away, m.mst_utc, t1.name as home_name, t2.name as away_name FROM matches m LEFT JOIN teams t1 ON m.home_team_id = t1.id LEFT JOIN teams t2 ON m.away_team_id = t2.id WHERE m.status IN ('FT', 'AET', 'PEN') AND m.sport = 'football' AND to_timestamp(m.mst_utc / 1000.0)::date BETWEEN %s::date AND %s::date AND m.score_home IS NOT NULL AND m.score_away IS NOT NULL ORDER BY m.mst_utc ASC """ + (f" LIMIT {int(limit)}" if limit else ""), (start_date, end_date), ) return cur.fetchall() def actual_ms(h: int, a: int) -> str: return "1" if h > a else ("X" if h == a else "2") def actual_ht(hh: Optional[int], ha: Optional[int]) -> Optional[str]: if hh is None or ha is None: return None return "1" if hh > ha else ("X" if hh == ha else "2") OVER_TOKENS = {"over", "üst", "ust"} UNDER_TOKENS = {"under", "alt"} YES_TOKENS = {"yes", "var", "kg var"} NO_TOKENS = {"no", "yok", "kg yok"} ODD_TOKENS = {"odd", "tek"} EVEN_TOKENS = {"even", "çift", "cift"} def _norm(s: str) -> str: return str(s or "").strip().lower() def score_pick(market: str, predicted: str, h: int, a: int, hh: Optional[int], ha: Optional[int]) -> Optional[bool]: """Return True/False for hit, or None if cannot evaluate.""" total = h + a ht_total = (hh + ha) if hh is not None and ha is not None else None p = _norm(predicted) if market == "MS": return p.upper() == actual_ms(h, a) if market in ("OU15", "OU25", "OU35"): line = {"OU15": 1.5, "OU25": 2.5, "OU35": 3.5}[market] if p in OVER_TOKENS: return total > line if p in UNDER_TOKENS: return total < line return None if market == "BTTS": btts = h > 0 and a > 0 if p in YES_TOKENS: return btts if p in NO_TOKENS: return not btts return None if market == "HT": ht = actual_ht(hh, ha) return None if ht is None else p.upper() == ht if market in ("HT_OU05", "HT_OU15"): if ht_total is None: return None line = 0.5 if market == "HT_OU05" else 1.5 if p in OVER_TOKENS: return ht_total > line if p in UNDER_TOKENS: return ht_total < line return None if market == "HTFT": ht = actual_ht(hh, ha) if ht is None: return None full = actual_ms(h, a) norm = p.replace(" ", "").upper().replace("0", "X") return norm == f"{ht}/{full}" if market == "OE": odd = total % 2 == 1 if p in ODD_TOKENS: return odd if p in EVEN_TOKENS: return not odd return None if market == "DC": ms = actual_ms(h, a) compact = p.replace("-", "").upper() if compact == "1X": return ms in ("1", "X") if compact == "X2": return ms in ("X", "2") if compact == "12": return ms in ("1", "2") return None # CARDS / HCAP cannot be scored without extra data return None def top_pick(probs: Dict[str, float]) -> Tuple[Optional[str], float]: if not probs: return None, 0.0 key = max(probs, key=lambda k: float(probs.get(k, 0) or 0)) return key, float(probs.get(key, 0) or 0) def run(start_date: str, end_date: str, limit: Optional[int], out_path: Optional[str]) -> None: dsn = get_clean_dsn() print(f"DSN host={dsn.split('@')[-1].split('/')[0]}") conn = psycopg2.connect(dsn) cur = conn.cursor(cursor_factory=RealDictCursor) matches = fetch_matches(cur, start_date, end_date, limit) print(f"Found {len(matches)} matches between {start_date} and {end_date}") if not matches: return orchestrator = SingleMatchOrchestrator() market_stats: Dict[str, Dict[str, Any]] = defaultdict(lambda: { "total": 0, "hits": 0, "skipped": 0, "playable_total": 0, "playable_hits": 0, "conf_sum": 0.0, }) detailed_rows: List[Dict[str, Any]] = [] errors = 0 started = time.time() for idx, m in enumerate(matches, 1): try: pkg = orchestrator.analyze_match(m["id"]) except Exception as e: errors += 1 if errors <= 5: print(f"[ERR] {m['id']}: {e}") continue if not pkg: continue board = pkg.get("market_board", {}) or {} h = int(m["score_home"]) a = int(m["score_away"]) hh = m.get("ht_score_home") ha = m.get("ht_score_away") for market, entry in board.items(): if not isinstance(entry, dict): continue probs = entry.get("probs") or {} pick, prob = top_pick(probs) if pick is None: continue hit = score_pick(market, pick, h, a, hh, ha) stats = market_stats[market] if hit is None: stats["skipped"] += 1 continue stats["total"] += 1 stats["conf_sum"] += prob if hit: stats["hits"] += 1 if entry.get("playable") is True: stats["playable_total"] += 1 if hit: stats["playable_hits"] += 1 detailed_rows.append({ "match_id": m["id"], "market": market, "pick": pick, "prob": round(prob, 4), "hit": hit, "playable": bool(entry.get("playable")), "score": f"{h}-{a}", "ht_score": f"{hh}-{ha}" if hh is not None else None, }) if idx % 25 == 0: elapsed = time.time() - started print(f" ... processed {idx}/{len(matches)} ({elapsed:.1f}s)") elapsed = time.time() - started print("\n" + "=" * 72) print(f"BACKTEST {start_date} .. {end_date} | matches={len(matches)} errors={errors} elapsed={elapsed:.1f}s") print("=" * 72) header = f"{'Market':<10} {'N':>5} {'Hit':>5} {'Rate':>7} {'AvgConf':>8} | {'PlayN':>6} {'PlayHit':>7} {'PlayRate':>8}" print(header) print("-" * 72) for market in sorted(market_stats.keys()): s = market_stats[market] n = s["total"] rate = (s["hits"] / n * 100) if n else 0.0 avg_conf = (s["conf_sum"] / n * 100) if n else 0.0 pn = s["playable_total"] prate = (s["playable_hits"] / pn * 100) if pn else 0.0 print(f"{market:<10} {n:>5} {s['hits']:>5} {rate:>6.1f}% {avg_conf:>7.1f}% | {pn:>6} {s['playable_hits']:>7} {prate:>7.1f}%") if out_path: payload = { "range": {"start": start_date, "end": end_date}, "match_count": len(matches), "errors": errors, "elapsed_sec": round(elapsed, 1), "market_stats": {k: dict(v) for k, v in market_stats.items()}, "rows": detailed_rows, } with open(out_path, "w") as f: json.dump(payload, f, indent=2, ensure_ascii=False) print(f"\nSaved details to {out_path}") def main() -> None: p = argparse.ArgumentParser() p.add_argument("--start", required=True, help="YYYY-MM-DD") p.add_argument("--end", required=True, help="YYYY-MM-DD") p.add_argument("--limit", type=int, default=None) p.add_argument("--out", default=None, help="Optional JSON output path") args = p.parse_args() run(args.start, args.end, args.limit, args.out) if __name__ == "__main__": main()