iddaai-be/ai-engine/scripts/analyze_backtest_csv.py

"""
Deep root-cause analysis on diagnostic_backtest CSV.
Tests specific hypotheses with hard numbers and proposes actionable
filter rules with estimated impact (units saved, ROI shift).
"""

import sys, os, glob
import pandas as pd
import numpy as np

REPORTS_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "reports")

def latest_csv():
    files = sorted(glob.glob(os.path.join(REPORTS_DIR, "diagnostic_backtest_*.csv")),
                   key=os.path.getmtime, reverse=True)
    return files[0] if files else None

def fmt_pct(x):
    return f"{x:>6.2f}%" if pd.notna(x) else "  ----"

def cell(df, label, mask):
    sub = df[mask]
    n = len(sub)
    if n == 0:
        return f"  {label:<60} n=0"
    wins = (sub["won"] == True).sum()
    losses = (sub["won"] == False).sum()
    settled = wins + losses
    hr = 100.0 * wins / settled if settled else 0
    profit = sub["unit_profit"].sum()
    staked = sub["stake_units"].sum()
    roi = 100.0 * profit / staked if staked else 0
    return (f"  {label:<60} n={n:>4} hit={hr:>6.2f}% "
            f"profit={profit:>+7.2f}u roi={roi:>+7.2f}%")

def hypothesis_block(title, rows):
    print(f"\n{'─' * 78}")
    print(f"  {title}")
    print(f"{'─' * 78}")
    for row in rows:
        print(row)

def main():
    csv_path = latest_csv()
    if not csv_path:
        print("No backtest CSV found")
        return
    print(f"Reading {csv_path}")
    df = pd.read_csv(csv_path)
    print(f"Loaded {len(df)} rows")

    # Filter only playable + settled
    pdf = df[(df["playable"] == True) & (df["won"].notna())].copy()
    pdf["won"] = pdf["won"].astype(bool)
    print(f"Playable + settled: {len(pdf)}")

    overall_hr = (pdf["won"].sum() / len(pdf)) * 100
    overall_roi = 100.0 * pdf["unit_profit"].sum() / pdf["stake_units"].sum()
    print(f"\nOVERALL: hit={overall_hr:.2f}% roi={overall_roi:.2f}%")

    # ─────────────────────────────────────────────────────────────────────
    # H1: TRIPLE VALUE CONFIRMATION
    # ─────────────────────────────────────────────────────────────────────
    triple_confirmed_mask = ~pdf["bb_issues"].fillna("").str.contains(
        "triple_value_not_confirmed", na=False
    )
    hypothesis_block(
        "H1: TRIPLE VALUE CONFIRMED vs NOT CONFIRMED",
        [
            cell(pdf, "triple_value CONFIRMED", triple_confirmed_mask),
            cell(pdf, "triple_value NOT CONFIRMED", ~triple_confirmed_mask),
        ]
    )

    # ─────────────────────────────────────────────────────────────────────
    # H2: TRAP MARKET FLAG
    # ─────────────────────────────────────────────────────────────────────
    trap_mask = pdf["bb_trap_market"] == True
    hypothesis_block(
        "H2: TRAP MARKET FLAG (model says band rate < implied → market overpriced)",
        [
            cell(pdf, "trap_market_flag = TRUE  (model warned)", trap_mask),
            cell(pdf, "trap_market_flag = FALSE", ~trap_mask),
        ]
    )

    # ─────────────────────────────────────────────────────────────────────
    # H3: V25/V27 CONSENSUS
    # ─────────────────────────────────────────────────────────────────────
    agree_mask = pdf["v27_consensus"] == "AGREE"
    disagree_mask = pdf["v27_consensus"] == "DISAGREE"
    hypothesis_block(
        "H3: V25 ↔ V27 CONSENSUS",
        [
            cell(pdf, "AGREE", agree_mask),
            cell(pdf, "DISAGREE", disagree_mask),
            cell(pdf, "neither/null", ~(agree_mask | disagree_mask)),
        ]
    )

    # ─────────────────────────────────────────────────────────────────────
    # H4: ODDS RELIABILITY (league quality)
    # ─────────────────────────────────────────────────────────────────────
    pdf["rel_band"] = pd.cut(
        pdf["odds_reliability"].fillna(0.35),
        [0, 0.30, 0.45, 0.55, 1.0],
        labels=["<0.30 verylow", "0.30-0.45 low", "0.45-0.55 mid", ">=0.55 high"]
    )
    hypothesis_block(
        "H4: LEAGUE ODDS RELIABILITY",
        [cell(pdf, str(b), pdf["rel_band"] == b) for b in pdf["rel_band"].cat.categories]
    )

    # ─────────────────────────────────────────────────────────────────────
    # H5: CALIBRATOR IMPACT (raw vs calibrated)
    # ─────────────────────────────────────────────────────────────────────
    pdf["calib_delta"] = pdf["calibrated_confidence"] - pdf["raw_confidence"]
    pdf["delta_band"] = pd.cut(
        pdf["calib_delta"].fillna(0),
        [-100, -10, -3, 3, 10, 100],
        labels=["cal<<raw (-10+)", "cal<raw (-3..-10)", "≈equal (±3)",
                "cal>raw (3..10)", "cal>>raw (+10+)"]
    )
    hypothesis_block(
        "H5: CALIBRATOR DELTA (calibrated_conf - raw_conf)",
        [cell(pdf, str(b), pdf["delta_band"] == b) for b in pdf["delta_band"].cat.categories]
    )

    # ─────────────────────────────────────────────────────────────────────
    # H6: EV EDGE
    # ─────────────────────────────────────────────────────────────────────
    pdf["edge_band"] = pd.cut(
        pdf["ev_edge"].fillna(0),
        [-10, -0.05, 0.0, 0.05, 0.10, 0.20, 10],
        labels=["edge<-5%", "-5%-0%", "0-5%", "5-10%", "10-20%", ">20%"]
    )
    hypothesis_block(
        "H6: EV EDGE (model_prob - implied_prob)",
        [cell(pdf, str(b), pdf["edge_band"] == b) for b in pdf["edge_band"].cat.categories]
    )

    # ─────────────────────────────────────────────────────────────────────
    # H7: ODDS x MARKET cross
    # ─────────────────────────────────────────────────────────────────────
    pdf["odds_band"] = pd.cut(
        pdf["odds"].fillna(0),
        [0, 1.30, 1.50, 1.80, 2.20, 3.00, 100],
        labels=["<1.30", "1.30-1.50", "1.50-1.80", "1.80-2.20", "2.20-3.00", ">3.00"]
    )

    print(f"\n{'─' * 78}")
    print(f"  H7: ODDS BAND × MARKET (per cell hit% / roi% / n)")
    print(f"{'─' * 78}")
    pivot_n = pdf.pivot_table(index="market", columns="odds_band",
                                values="match_id", aggfunc="count", fill_value=0,
                                observed=False)
    pivot_roi = pdf.pivot_table(index="market", columns="odds_band",
                                  values="unit_profit", aggfunc="sum", fill_value=0,
                                  observed=False)
    pivot_stake = pdf.pivot_table(index="market", columns="odds_band",
                                    values="stake_units", aggfunc="sum", fill_value=0,
                                    observed=False)
    pivot_roi_pct = (100.0 * pivot_roi / pivot_stake.replace(0, np.nan)).round(1)
    print("\n  Bet count per cell:")
    print(pivot_n.to_string())
    print("\n  ROI% per cell:")
    print(pivot_roi_pct.to_string())

    # ─────────────────────────────────────────────────────────────────────
    # H8: COMBINED FILTER SIMULATION
    # ─────────────────────────────────────────────────────────────────────
    print(f"\n{'─' * 78}")
    print("  H8: COMBINED FILTER SIMULATION (what if we add rules)")
    print(f"{'─' * 78}")

    def simulate(filter_name, keep_mask):
        kept = pdf[keep_mask]
        rejected = pdf[~keep_mask]
        if len(kept) == 0:
            return f"  {filter_name:<55} → 0 bet remain"
        kept_hr = 100.0 * kept["won"].sum() / len(kept)
        kept_profit = kept["unit_profit"].sum()
        kept_staked = kept["stake_units"].sum()
        kept_roi = 100.0 * kept_profit / kept_staked if kept_staked else 0
        saved = -rejected["unit_profit"].sum()  # money we WOULD HAVE LOST
        return (f"  {filter_name:<55} keep={len(kept):>3} hit={kept_hr:>5.1f}% "
                f"roi={kept_roi:>+6.2f}% saved={saved:>+6.2f}u")

    print(simulate("BASELINE (no extra filter)", pd.Series([True] * len(pdf), index=pdf.index)))
    print(simulate("REJECT triple_value_not_confirmed",
                   ~pdf["bb_issues"].fillna("").str.contains("triple_value_not_confirmed")))
    print(simulate("REJECT trap_market_flag",
                   ~(pdf["bb_trap_market"] == True)))
    print(simulate("REJECT v27 DISAGREE",
                   pdf["v27_consensus"] != "DISAGREE"))
    print(simulate("REJECT odds_reliability < 0.45",
                   pdf["odds_reliability"].fillna(1.0) >= 0.45))
    print(simulate("REJECT odds in 1.80-2.20",
                   (pdf["odds"].fillna(0) < 1.80) | (pdf["odds"].fillna(0) >= 2.20)))
    print(simulate("REJECT ev_edge < 0",
                   pdf["ev_edge"].fillna(0) >= 0))
    print(simulate("REJECT ev_edge < 0.05",
                   pdf["ev_edge"].fillna(0) >= 0.05))
    print()
    print("  COMBINED rules:")
    # Stack 1: drop triple_not_confirmed + trap_market + DISAGREE
    s1 = (
        ~pdf["bb_issues"].fillna("").str.contains("triple_value_not_confirmed")
        & ~(pdf["bb_trap_market"] == True)
        & (pdf["v27_consensus"] != "DISAGREE")
    )
    print(simulate("STACK1: !triple_not_conf & !trap & !disagree", s1))
    # Stack 2: + edge>=0
    s2 = s1 & (pdf["ev_edge"].fillna(0) >= 0)
    print(simulate("STACK2: STACK1 + edge >= 0", s2))
    # Stack 3: + reliability>=0.45
    s3 = s2 & (pdf["odds_reliability"].fillna(1.0) >= 0.45)
    print(simulate("STACK3: STACK2 + reliability >= 0.45", s3))
    # Stack 4: + odds outside 1.80-2.20
    s4 = s3 & ((pdf["odds"].fillna(0) < 1.80) | (pdf["odds"].fillna(0) >= 2.20))
    print(simulate("STACK4: STACK3 + odds NOT in 1.80-2.20", s4))

    print(f"\n{'─' * 78}")
    print("DONE.")

if __name__ == "__main__":
    main()