iddaai-be/ai-engine/scripts/backtest_hitrate.py

"""
Multi-market hit-rate backtest.

Runs the orchestrator against historical finished matches and measures raw V25
pick accuracy per market — independent of the "playable" gate. This isolates
model quality from the value-detection thresholds.

Usage:
    python scripts/backtest_hitrate.py --start 2026-05-01 --end 2026-05-09 [--limit 500]
"""

from __future__ import annotations

import argparse
import json
import os
import sys
import time
from collections import defaultdict
from typing import Any, Dict, List, Optional, Tuple

import psycopg2
from psycopg2.extras import RealDictCursor

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from data.db import get_clean_dsn
from services.single_match_orchestrator import SingleMatchOrchestrator


def fetch_matches(cur, start_date: str, end_date: str, limit: Optional[int]) -> List[Dict[str, Any]]:
    cur.execute(
        """
        SELECT m.id, m.score_home, m.score_away, m.ht_score_home, m.ht_score_away,
               m.mst_utc, t1.name as home_name, t2.name as away_name
        FROM matches m
        LEFT JOIN teams t1 ON m.home_team_id = t1.id
        LEFT JOIN teams t2 ON m.away_team_id = t2.id
        WHERE m.status IN ('FT', 'AET', 'PEN')
          AND m.sport = 'football'
          AND to_timestamp(m.mst_utc / 1000.0)::date BETWEEN %s::date AND %s::date
          AND m.score_home IS NOT NULL
          AND m.score_away IS NOT NULL
        ORDER BY m.mst_utc ASC
        """ + (f" LIMIT {int(limit)}" if limit else ""),
        (start_date, end_date),
    )
    return cur.fetchall()


def actual_ms(h: int, a: int) -> str:
    return "1" if h > a else ("X" if h == a else "2")


def actual_ht(hh: Optional[int], ha: Optional[int]) -> Optional[str]:
    if hh is None or ha is None:
        return None
    return "1" if hh > ha else ("X" if hh == ha else "2")


OVER_TOKENS = {"over", "üst", "ust"}
UNDER_TOKENS = {"under", "alt"}
YES_TOKENS = {"yes", "var", "kg var"}
NO_TOKENS = {"no", "yok", "kg yok"}
ODD_TOKENS = {"odd", "tek"}
EVEN_TOKENS = {"even", "çift", "cift"}


def _norm(s: str) -> str:
    return str(s or "").strip().lower()


def score_pick(market: str, predicted: str, h: int, a: int, hh: Optional[int], ha: Optional[int]) -> Optional[bool]:
    """Return True/False for hit, or None if cannot evaluate."""
    total = h + a
    ht_total = (hh + ha) if hh is not None and ha is not None else None
    p = _norm(predicted)

    if market == "MS":
        return p.upper() == actual_ms(h, a)
    if market in ("OU15", "OU25", "OU35"):
        line = {"OU15": 1.5, "OU25": 2.5, "OU35": 3.5}[market]
        if p in OVER_TOKENS:
            return total > line
        if p in UNDER_TOKENS:
            return total < line
        return None
    if market == "BTTS":
        btts = h > 0 and a > 0
        if p in YES_TOKENS:
            return btts
        if p in NO_TOKENS:
            return not btts
        return None
    if market == "HT":
        ht = actual_ht(hh, ha)
        return None if ht is None else p.upper() == ht
    if market in ("HT_OU05", "HT_OU15"):
        if ht_total is None:
            return None
        line = 0.5 if market == "HT_OU05" else 1.5
        if p in OVER_TOKENS:
            return ht_total > line
        if p in UNDER_TOKENS:
            return ht_total < line
        return None
    if market == "HTFT":
        ht = actual_ht(hh, ha)
        if ht is None:
            return None
        full = actual_ms(h, a)
        norm = p.replace(" ", "").upper().replace("0", "X")
        return norm == f"{ht}/{full}"
    if market == "OE":
        odd = total % 2 == 1
        if p in ODD_TOKENS:
            return odd
        if p in EVEN_TOKENS:
            return not odd
        return None
    if market == "DC":
        ms = actual_ms(h, a)
        compact = p.replace("-", "").upper()
        if compact == "1X":
            return ms in ("1", "X")
        if compact == "X2":
            return ms in ("X", "2")
        if compact == "12":
            return ms in ("1", "2")
        return None
    # CARDS / HCAP cannot be scored without extra data
    return None


def top_pick(probs: Dict[str, float]) -> Tuple[Optional[str], float]:
    if not probs:
        return None, 0.0
    key = max(probs, key=lambda k: float(probs.get(k, 0) or 0))
    return key, float(probs.get(key, 0) or 0)


def run(start_date: str, end_date: str, limit: Optional[int], out_path: Optional[str]) -> None:
    dsn = get_clean_dsn()
    print(f"DSN host={dsn.split('@')[-1].split('/')[0]}")
    conn = psycopg2.connect(dsn)
    cur = conn.cursor(cursor_factory=RealDictCursor)

    matches = fetch_matches(cur, start_date, end_date, limit)
    print(f"Found {len(matches)} matches between {start_date} and {end_date}")
    if not matches:
        return

    orchestrator = SingleMatchOrchestrator()

    market_stats: Dict[str, Dict[str, Any]] = defaultdict(lambda: {
        "total": 0, "hits": 0, "skipped": 0,
        "playable_total": 0, "playable_hits": 0,
        "conf_sum": 0.0,
    })
    detailed_rows: List[Dict[str, Any]] = []
    errors = 0
    started = time.time()

    for idx, m in enumerate(matches, 1):
        try:
            pkg = orchestrator.analyze_match(m["id"])
        except Exception as e:
            errors += 1
            if errors <= 5:
                print(f"[ERR] {m['id']}: {e}")
            continue
        if not pkg:
            continue

        board = pkg.get("market_board", {}) or {}
        h = int(m["score_home"])
        a = int(m["score_away"])
        hh = m.get("ht_score_home")
        ha = m.get("ht_score_away")

        for market, entry in board.items():
            if not isinstance(entry, dict):
                continue
            probs = entry.get("probs") or {}
            pick, prob = top_pick(probs)
            if pick is None:
                continue
            hit = score_pick(market, pick, h, a, hh, ha)
            stats = market_stats[market]
            if hit is None:
                stats["skipped"] += 1
                continue
            stats["total"] += 1
            stats["conf_sum"] += prob
            if hit:
                stats["hits"] += 1
            if entry.get("playable") is True:
                stats["playable_total"] += 1
                if hit:
                    stats["playable_hits"] += 1
            detailed_rows.append({
                "match_id": m["id"],
                "market": market,
                "pick": pick,
                "prob": round(prob, 4),
                "hit": hit,
                "playable": bool(entry.get("playable")),
                "score": f"{h}-{a}",
                "ht_score": f"{hh}-{ha}" if hh is not None else None,
            })

        if idx % 25 == 0:
            elapsed = time.time() - started
            print(f"  ... processed {idx}/{len(matches)} ({elapsed:.1f}s)")

    elapsed = time.time() - started
    print("\n" + "=" * 72)
    print(f"BACKTEST {start_date} .. {end_date}  |  matches={len(matches)}  errors={errors}  elapsed={elapsed:.1f}s")
    print("=" * 72)
    header = f"{'Market':<10} {'N':>5} {'Hit':>5} {'Rate':>7} {'AvgConf':>8} | {'PlayN':>6} {'PlayHit':>7} {'PlayRate':>8}"
    print(header)
    print("-" * 72)
    for market in sorted(market_stats.keys()):
        s = market_stats[market]
        n = s["total"]
        rate = (s["hits"] / n * 100) if n else 0.0
        avg_conf = (s["conf_sum"] / n * 100) if n else 0.0
        pn = s["playable_total"]
        prate = (s["playable_hits"] / pn * 100) if pn else 0.0
        print(f"{market:<10} {n:>5} {s['hits']:>5} {rate:>6.1f}% {avg_conf:>7.1f}% | {pn:>6} {s['playable_hits']:>7} {prate:>7.1f}%")

    if out_path:
        payload = {
            "range": {"start": start_date, "end": end_date},
            "match_count": len(matches),
            "errors": errors,
            "elapsed_sec": round(elapsed, 1),
            "market_stats": {k: dict(v) for k, v in market_stats.items()},
            "rows": detailed_rows,
        }
        with open(out_path, "w") as f:
            json.dump(payload, f, indent=2, ensure_ascii=False)
        print(f"\nSaved details to {out_path}")


def main() -> None:
    p = argparse.ArgumentParser()
    p.add_argument("--start", required=True, help="YYYY-MM-DD")
    p.add_argument("--end", required=True, help="YYYY-MM-DD")
    p.add_argument("--limit", type=int, default=None)
    p.add_argument("--out", default=None, help="Optional JSON output path")
    args = p.parse_args()
    run(args.start, args.end, args.limit, args.out)


if __name__ == "__main__":
    main()