iddaai-be/ai-engine/scripts/clv_report.py

"""
CLV Report — the single most important edge metric.
===================================================
Closing Line Value = did we bet at better odds than the market's closing line?
Consistently positive CLV is the only reliable proof of a real betting edge;
negative CLV means no edge, regardless of short-term wins/losses.

This codebase stores the BET-TIME odds for ~92% of runs (prediction_runs.
odds_snapshot.source = 'live_match' with the live odds blob, and the pick's
odds in payload main_pick.odds). For the closing line we use, in order:
  1. odds_snapshot.closing_odds  (captured by capture_closing_odds.py, forward)
  2. odd_selections current value (the static near-final capture — a proxy)

CLV per bet = bet_odds / closing_odds - 1   (positive = beat the close = good).

Read-only. SELECT only.
Usage:
  python scripts/clv_report.py
  python scripts/clv_report.py --staked-only
"""
from __future__ import annotations

import argparse
import json
import os
import sys
from collections import defaultdict
from typing import Any, Dict, Optional, Tuple

if sys.stdout and hasattr(sys.stdout, "reconfigure"):
    try:
        sys.stdout.reconfigure(encoding="utf-8")
    except Exception:
        pass

SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
AI_ENGINE_DIR = os.path.dirname(SCRIPT_DIR)
sys.path.insert(0, AI_ENGINE_DIR)

from data.db import get_clean_dsn  # noqa: E402
import psycopg2  # noqa: E402
from psycopg2.extras import RealDictCursor  # noqa: E402

# market code -> (Turkish odds-category name, pick-normalizer -> selection key)
OU_CATS = {"OU05": "0,5 Alt/Üst", "OU15": "1,5 Alt/Üst", "OU25": "2,5 Alt/Üst",
           "OU35": "3,5 Alt/Üst", "OU45": "4,5 Alt/Üst"}


def _f(x: Any, d: Optional[float] = None) -> Optional[float]:
    try:
        return float(x) if x is not None else d
    except (TypeError, ValueError):
        return d


def _parse(j: Any) -> Dict[str, Any]:
    if isinstance(j, str):
        try:
            return json.loads(j)
        except Exception:
            return {}
    return j or {}


def map_pick(market: str, pick: str) -> Optional[Tuple[str, str]]:
    """Return (category_name, selection_key) for the live-odds JSON / odd_selections."""
    m = (market or "").upper()
    p = (pick or "").strip()
    pl = p.casefold()
    if m in ("MS", "ML", "1X2"):
        return ("Maç Sonucu", p if p in ("1", "X", "2") else None) if p in ("1", "X", "2") else None
    if m == "HT":
        return ("1. Yarı Sonucu", p) if p in ("1", "X", "2") else None
    if m in OU_CATS:
        if "üst" in pl or "ust" in pl or "over" in pl:
            return (OU_CATS[m], "Üst")
        if "alt" in pl or "under" in pl:
            return (OU_CATS[m], "Alt")
        return None
    if m == "DC":
        key = p.upper().replace(" ", "").replace("/", "-")
        norm = {"1X": "1-X", "X1": "1-X", "X2": "X-2", "2X": "X-2",
                "12": "1-2", "21": "1-2", "1-X": "1-X", "X-2": "X-2", "1-2": "1-2"}.get(key)
        return ("Çifte Şans", norm) if norm else None
    if m == "BTTS":
        if "var" in pl or "yes" in pl:
            return ("Karşılıklı Gol", "Var")
        if "yok" in pl or "no" in pl:
            return ("Karşılıklı Gol", "Yok")
        return None
    if m == "OE":
        if "tek" in pl or "odd" in pl:
            return ("Tek/Çift", "Tek")
        if "çift" in pl or "cift" in pl or "even" in pl:
            return ("Tek/Çift", "Çift")
        return None
    return None


def closing_from_blob(blob: Any, cat: str, sel: str) -> Optional[float]:
    blob = _parse(blob)
    cat_map = blob.get(cat) if isinstance(blob, dict) else None
    if isinstance(cat_map, dict):
        return _f(cat_map.get(sel))
    return None


def main() -> int:
    ap = argparse.ArgumentParser(description=__doc__)
    ap.add_argument("--staked-only", action="store_true",
                    help="Only playable/staked bets (default: all picks with a mappable market)")
    args = ap.parse_args()

    rows_out = []
    with psycopg2.connect(get_clean_dsn()) as conn:
        with conn.cursor(cursor_factory=RealDictCursor) as cur:
            cur.execute("""
                SELECT match_id, engine_version, odds_snapshot, payload_summary,
                       eventual_outcome, unit_profit
                FROM prediction_runs
                WHERE odds_snapshot->>'source' = 'live_match'
                ORDER BY generated_at ASC
            """)
            runs = cur.fetchall()

            for r in runs:
                snap = _parse(r["odds_snapshot"])
                ps = _parse(r["payload_summary"])
                mp = ps.get("main_pick") or {}
                market = mp.get("market")
                pick = mp.get("pick")
                bet_odds = _f(mp.get("odds"))
                playable = bool(mp.get("playable"))
                if args.staked_only and not playable:
                    continue
                if not market or not pick or not bet_odds or bet_odds <= 1.0:
                    continue
                mapped = map_pick(market, pick)
                if not mapped or not mapped[1]:
                    continue
                cat, sel = mapped

                # closing line: prefer captured closing_odds, else static odd_selections
                closing = closing_from_blob(snap.get("closing_odds"), cat, sel)
                src = "captured"
                if closing is None:
                    cur.execute("""
                        SELECT os.odd_value FROM odd_categories oc
                        JOIN odd_selections os ON os.odd_category_db_id = oc.db_id
                        WHERE oc.match_id = %s AND oc.name = %s AND os.name = %s
                        LIMIT 1
                    """, (r["match_id"], cat, sel))
                    row = cur.fetchone()
                    closing = _f(row["odd_value"]) if row else None
                    src = "static_proxy"
                if closing is None or closing <= 1.0:
                    continue

                clv = bet_odds / closing - 1.0
                rows_out.append({
                    "market": market, "playable": playable,
                    "bet_odds": bet_odds, "closing": closing, "clv": clv,
                    "src": src, "profit": _f(r["unit_profit"], 0.0) or 0.0,
                    "settled": r["eventual_outcome"] is not None
                    and not str(r["eventual_outcome"]).startswith("NO_BET"),
                })

    if not rows_out:
        print("No mappable runs with both bet-time and closing odds found.")
        return 0

    def agg(rs):
        n = len(rs)
        clvs = [x["clv"] for x in rs]
        pos = sum(1 for c in clvs if c > 0)
        return {
            "n": n,
            "mean_clv_pct": round(100.0 * sum(clvs) / n, 2),
            "pct_positive": round(100.0 * pos / n, 1),
            "captured": sum(1 for x in rs if x["src"] == "captured"),
        }

    print("=" * 70)
    print("CLV REPORT  —  did we beat the closing line? (the edge compass)")
    print("=" * 70)
    o = agg(rows_out)
    print(f"runs analyzed: {o['n']}   (closing source: {o['captured']} captured, "
          f"{o['n'] - o['captured']} static-proxy)")
    print(f"\nOVERALL mean CLV: {o['mean_clv_pct']}%   "
          f"bets beating close: {o['pct_positive']}%")
    print("  (positive mean CLV = real edge; ~0 or negative = no edge)\n")

    staked = [x for x in rows_out if x["playable"]]
    if staked:
        s = agg(staked)
        print(f"STAKED only: n={s['n']}  mean CLV={s['mean_clv_pct']}%  "
              f"beating close={s['pct_positive']}%\n")

    print("BY MARKET")
    by_m = defaultdict(list)
    for x in rows_out:
        by_m[x["market"]].append(x)
    for m, rs in sorted(by_m.items(), key=lambda kv: -len(kv[1])):
        a = agg(rs)
        print(f"  {m:<8} n={a['n']:>4}  mean CLV={a['mean_clv_pct']:>7}%  "
              f"beating close={a['pct_positive']:>5}%")

    # CLV vs outcome sanity: do positive-CLV bets actually win more / lose less?
    print("\nCLV vs realized P/L (settled staked)")
    ss = [x for x in rows_out if x["playable"] and x["settled"]]
    if ss:
        posc = [x for x in ss if x["clv"] > 0]
        negc = [x for x in ss if x["clv"] <= 0]
        for label, grp in (("CLV>0", posc), ("CLV<=0", negc)):
            if grp:
                pr = sum(x["profit"] for x in grp)
                print(f"  {label:<7} n={len(grp):>3}  profit={pr:>7.2f}u  "
                      f"ROI(flat1u)={round(100*pr/len(grp),1)}%")
    print("=" * 70)
    return 0


if __name__ == "__main__":
    raise SystemExit(main())