Files
iddaai-be/ai-engine/scripts/analyze_backtest_csv.py
T
fahricansecer 988ee2f50d Add backtest pipeline, betting_brain filters, score coherence + social v3
betting_brain.py:
- HARD_MIN_SAMPLES=50 floor for calibrator bypass
- ev_edge < 0 + >= 0.20 hard vetoes
- BTTS muted (grid search found no profitable config)
- Per-market optimal envelopes (MS, OU25)
- Score coherence filter: main_pick must agree with score prediction
- HTFT reversal cross-check for MS picks

feature_builder.py / data_loader.py:
- Real home/away_position from data (was hardcoded 10)
- Cup detection wired into UpsetEngine
- _estimate_league_position with 300-day season filter

New scripts:
- diagnostic_backtest.py: per-bet diagnostic backtest with loss patterns
- optimize_filters.py: grid search per-market optimal thresholds
- analyze_backtest_csv.py: root-cause hypothesis testing on CSV
- compare_backtests.py: side-by-side validation with verdict
- test_score_coherence.py: smoke test for coherence filter (20/20 pass)

Reports:
- diagnostic_backtest_20260525_024437 (50-match smoke)
- diagnostic_backtest_20260525_035649 (1000-match in-sample)
- filter_optimization_patch.json (grid search winners per market)

Social poster v3:
- satori + resvg HTML/CSS rendering pipeline
- Twemoji football/basketball + flag SVGs
- caption SEO: 12 curated hashtags per post
- image SEO: descriptive filenames + .json metadata sidecar
- /health, /preview-png, /run-now endpoints

Docs:
- mds/SESSION_HANDOFF.md: full session state for cross-machine continuity
- mds/SOCIAL_POSTER_SETUP.md: API keys + test commands

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-25 20:43:28 +03:00

228 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Deep root-cause analysis on diagnostic_backtest CSV.
Tests specific hypotheses with hard numbers and proposes actionable
filter rules with estimated impact (units saved, ROI shift).
"""
import sys, os, glob
import pandas as pd
import numpy as np
REPORTS_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "reports")
def latest_csv():
files = sorted(glob.glob(os.path.join(REPORTS_DIR, "diagnostic_backtest_*.csv")),
key=os.path.getmtime, reverse=True)
return files[0] if files else None
def fmt_pct(x):
return f"{x:>6.2f}%" if pd.notna(x) else " ----"
def cell(df, label, mask):
sub = df[mask]
n = len(sub)
if n == 0:
return f" {label:<60} n=0"
wins = (sub["won"] == True).sum()
losses = (sub["won"] == False).sum()
settled = wins + losses
hr = 100.0 * wins / settled if settled else 0
profit = sub["unit_profit"].sum()
staked = sub["stake_units"].sum()
roi = 100.0 * profit / staked if staked else 0
return (f" {label:<60} n={n:>4} hit={hr:>6.2f}% "
f"profit={profit:>+7.2f}u roi={roi:>+7.2f}%")
def hypothesis_block(title, rows):
print(f"\n{'' * 78}")
print(f" {title}")
print(f"{'' * 78}")
for row in rows:
print(row)
def main():
csv_path = latest_csv()
if not csv_path:
print("No backtest CSV found")
return
print(f"Reading {csv_path}")
df = pd.read_csv(csv_path)
print(f"Loaded {len(df)} rows")
# Filter only playable + settled
pdf = df[(df["playable"] == True) & (df["won"].notna())].copy()
pdf["won"] = pdf["won"].astype(bool)
print(f"Playable + settled: {len(pdf)}")
overall_hr = (pdf["won"].sum() / len(pdf)) * 100
overall_roi = 100.0 * pdf["unit_profit"].sum() / pdf["stake_units"].sum()
print(f"\nOVERALL: hit={overall_hr:.2f}% roi={overall_roi:.2f}%")
# ─────────────────────────────────────────────────────────────────────
# H1: TRIPLE VALUE CONFIRMATION
# ─────────────────────────────────────────────────────────────────────
triple_confirmed_mask = ~pdf["bb_issues"].fillna("").str.contains(
"triple_value_not_confirmed", na=False
)
hypothesis_block(
"H1: TRIPLE VALUE CONFIRMED vs NOT CONFIRMED",
[
cell(pdf, "triple_value CONFIRMED", triple_confirmed_mask),
cell(pdf, "triple_value NOT CONFIRMED", ~triple_confirmed_mask),
]
)
# ─────────────────────────────────────────────────────────────────────
# H2: TRAP MARKET FLAG
# ─────────────────────────────────────────────────────────────────────
trap_mask = pdf["bb_trap_market"] == True
hypothesis_block(
"H2: TRAP MARKET FLAG (model says band rate < implied → market overpriced)",
[
cell(pdf, "trap_market_flag = TRUE (model warned)", trap_mask),
cell(pdf, "trap_market_flag = FALSE", ~trap_mask),
]
)
# ─────────────────────────────────────────────────────────────────────
# H3: V25/V27 CONSENSUS
# ─────────────────────────────────────────────────────────────────────
agree_mask = pdf["v27_consensus"] == "AGREE"
disagree_mask = pdf["v27_consensus"] == "DISAGREE"
hypothesis_block(
"H3: V25 ↔ V27 CONSENSUS",
[
cell(pdf, "AGREE", agree_mask),
cell(pdf, "DISAGREE", disagree_mask),
cell(pdf, "neither/null", ~(agree_mask | disagree_mask)),
]
)
# ─────────────────────────────────────────────────────────────────────
# H4: ODDS RELIABILITY (league quality)
# ─────────────────────────────────────────────────────────────────────
pdf["rel_band"] = pd.cut(
pdf["odds_reliability"].fillna(0.35),
[0, 0.30, 0.45, 0.55, 1.0],
labels=["<0.30 verylow", "0.30-0.45 low", "0.45-0.55 mid", ">=0.55 high"]
)
hypothesis_block(
"H4: LEAGUE ODDS RELIABILITY",
[cell(pdf, str(b), pdf["rel_band"] == b) for b in pdf["rel_band"].cat.categories]
)
# ─────────────────────────────────────────────────────────────────────
# H5: CALIBRATOR IMPACT (raw vs calibrated)
# ─────────────────────────────────────────────────────────────────────
pdf["calib_delta"] = pdf["calibrated_confidence"] - pdf["raw_confidence"]
pdf["delta_band"] = pd.cut(
pdf["calib_delta"].fillna(0),
[-100, -10, -3, 3, 10, 100],
labels=["cal<<raw (-10+)", "cal<raw (-3..-10)", "≈equal (±3)",
"cal>raw (3..10)", "cal>>raw (+10+)"]
)
hypothesis_block(
"H5: CALIBRATOR DELTA (calibrated_conf - raw_conf)",
[cell(pdf, str(b), pdf["delta_band"] == b) for b in pdf["delta_band"].cat.categories]
)
# ─────────────────────────────────────────────────────────────────────
# H6: EV EDGE
# ─────────────────────────────────────────────────────────────────────
pdf["edge_band"] = pd.cut(
pdf["ev_edge"].fillna(0),
[-10, -0.05, 0.0, 0.05, 0.10, 0.20, 10],
labels=["edge<-5%", "-5%-0%", "0-5%", "5-10%", "10-20%", ">20%"]
)
hypothesis_block(
"H6: EV EDGE (model_prob - implied_prob)",
[cell(pdf, str(b), pdf["edge_band"] == b) for b in pdf["edge_band"].cat.categories]
)
# ─────────────────────────────────────────────────────────────────────
# H7: ODDS x MARKET cross
# ─────────────────────────────────────────────────────────────────────
pdf["odds_band"] = pd.cut(
pdf["odds"].fillna(0),
[0, 1.30, 1.50, 1.80, 2.20, 3.00, 100],
labels=["<1.30", "1.30-1.50", "1.50-1.80", "1.80-2.20", "2.20-3.00", ">3.00"]
)
print(f"\n{'' * 78}")
print(f" H7: ODDS BAND × MARKET (per cell hit% / roi% / n)")
print(f"{'' * 78}")
pivot_n = pdf.pivot_table(index="market", columns="odds_band",
values="match_id", aggfunc="count", fill_value=0,
observed=False)
pivot_roi = pdf.pivot_table(index="market", columns="odds_band",
values="unit_profit", aggfunc="sum", fill_value=0,
observed=False)
pivot_stake = pdf.pivot_table(index="market", columns="odds_band",
values="stake_units", aggfunc="sum", fill_value=0,
observed=False)
pivot_roi_pct = (100.0 * pivot_roi / pivot_stake.replace(0, np.nan)).round(1)
print("\n Bet count per cell:")
print(pivot_n.to_string())
print("\n ROI% per cell:")
print(pivot_roi_pct.to_string())
# ─────────────────────────────────────────────────────────────────────
# H8: COMBINED FILTER SIMULATION
# ─────────────────────────────────────────────────────────────────────
print(f"\n{'' * 78}")
print(" H8: COMBINED FILTER SIMULATION (what if we add rules)")
print(f"{'' * 78}")
def simulate(filter_name, keep_mask):
kept = pdf[keep_mask]
rejected = pdf[~keep_mask]
if len(kept) == 0:
return f" {filter_name:<55} → 0 bet remain"
kept_hr = 100.0 * kept["won"].sum() / len(kept)
kept_profit = kept["unit_profit"].sum()
kept_staked = kept["stake_units"].sum()
kept_roi = 100.0 * kept_profit / kept_staked if kept_staked else 0
saved = -rejected["unit_profit"].sum() # money we WOULD HAVE LOST
return (f" {filter_name:<55} keep={len(kept):>3} hit={kept_hr:>5.1f}% "
f"roi={kept_roi:>+6.2f}% saved={saved:>+6.2f}u")
print(simulate("BASELINE (no extra filter)", pd.Series([True] * len(pdf), index=pdf.index)))
print(simulate("REJECT triple_value_not_confirmed",
~pdf["bb_issues"].fillna("").str.contains("triple_value_not_confirmed")))
print(simulate("REJECT trap_market_flag",
~(pdf["bb_trap_market"] == True)))
print(simulate("REJECT v27 DISAGREE",
pdf["v27_consensus"] != "DISAGREE"))
print(simulate("REJECT odds_reliability < 0.45",
pdf["odds_reliability"].fillna(1.0) >= 0.45))
print(simulate("REJECT odds in 1.80-2.20",
(pdf["odds"].fillna(0) < 1.80) | (pdf["odds"].fillna(0) >= 2.20)))
print(simulate("REJECT ev_edge < 0",
pdf["ev_edge"].fillna(0) >= 0))
print(simulate("REJECT ev_edge < 0.05",
pdf["ev_edge"].fillna(0) >= 0.05))
print()
print(" COMBINED rules:")
# Stack 1: drop triple_not_confirmed + trap_market + DISAGREE
s1 = (
~pdf["bb_issues"].fillna("").str.contains("triple_value_not_confirmed")
& ~(pdf["bb_trap_market"] == True)
& (pdf["v27_consensus"] != "DISAGREE")
)
print(simulate("STACK1: !triple_not_conf & !trap & !disagree", s1))
# Stack 2: + edge>=0
s2 = s1 & (pdf["ev_edge"].fillna(0) >= 0)
print(simulate("STACK2: STACK1 + edge >= 0", s2))
# Stack 3: + reliability>=0.45
s3 = s2 & (pdf["odds_reliability"].fillna(1.0) >= 0.45)
print(simulate("STACK3: STACK2 + reliability >= 0.45", s3))
# Stack 4: + odds outside 1.80-2.20
s4 = s3 & ((pdf["odds"].fillna(0) < 1.80) | (pdf["odds"].fillna(0) >= 2.20))
print(simulate("STACK4: STACK3 + odds NOT in 1.80-2.20", s4))
print(f"\n{'' * 78}")
print("DONE.")
if __name__ == "__main__":
main()