988ee2f50d
betting_brain.py: - HARD_MIN_SAMPLES=50 floor for calibrator bypass - ev_edge < 0 + >= 0.20 hard vetoes - BTTS muted (grid search found no profitable config) - Per-market optimal envelopes (MS, OU25) - Score coherence filter: main_pick must agree with score prediction - HTFT reversal cross-check for MS picks feature_builder.py / data_loader.py: - Real home/away_position from data (was hardcoded 10) - Cup detection wired into UpsetEngine - _estimate_league_position with 300-day season filter New scripts: - diagnostic_backtest.py: per-bet diagnostic backtest with loss patterns - optimize_filters.py: grid search per-market optimal thresholds - analyze_backtest_csv.py: root-cause hypothesis testing on CSV - compare_backtests.py: side-by-side validation with verdict - test_score_coherence.py: smoke test for coherence filter (20/20 pass) Reports: - diagnostic_backtest_20260525_024437 (50-match smoke) - diagnostic_backtest_20260525_035649 (1000-match in-sample) - filter_optimization_patch.json (grid search winners per market) Social poster v3: - satori + resvg HTML/CSS rendering pipeline - Twemoji football/basketball + flag SVGs - caption SEO: 12 curated hashtags per post - image SEO: descriptive filenames + .json metadata sidecar - /health, /preview-png, /run-now endpoints Docs: - mds/SESSION_HANDOFF.md: full session state for cross-machine continuity - mds/SOCIAL_POSTER_SETUP.md: API keys + test commands Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
675 lines
26 KiB
Python
675 lines
26 KiB
Python
"""
|
|
Diagnostic Backtest
|
|
===================
|
|
Run the full V28 orchestrator (in-process — no HTTP) on a window of completed
|
|
matches, capture the recommendation + key signal features + the actual outcome,
|
|
and produce a *diagnostic* report: not just "what was the hit rate" but
|
|
"which feature clusters drive the losing bets".
|
|
|
|
Outputs:
|
|
- reports/diagnostic_backtest_YYYYMMDD.csv (per-bet detail)
|
|
- reports/diagnostic_backtest_YYYYMMDD.json (aggregate metrics)
|
|
- reports/diagnostic_backtest_YYYYMMDD.txt (human-readable summary)
|
|
|
|
Usage:
|
|
python scripts/diagnostic_backtest.py --days 14 --max-matches 2000
|
|
python scripts/diagnostic_backtest.py --start 2026-05-10 --end 2026-05-24
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import sys
|
|
import time
|
|
import traceback
|
|
from collections import defaultdict, Counter
|
|
from datetime import datetime, timedelta, timezone
|
|
from typing import Any, Dict, List, Optional, Tuple
|
|
|
|
import psycopg2
|
|
from psycopg2.extras import RealDictCursor
|
|
|
|
# Path bootstrap so we can import the ai-engine package from anywhere
|
|
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
AI_ENGINE_DIR = os.path.dirname(SCRIPT_DIR)
|
|
sys.path.insert(0, AI_ENGINE_DIR)
|
|
|
|
from data.db import get_clean_dsn
|
|
from services.single_match_orchestrator import get_single_match_orchestrator
|
|
|
|
REPORTS_DIR = os.path.join(AI_ENGINE_DIR, "reports")
|
|
os.makedirs(REPORTS_DIR, exist_ok=True)
|
|
|
|
# Days with confirmed feeder gaps — exclude from sample
|
|
EXCLUDED_DATES = {"2026-05-03", "2026-04-29"}
|
|
|
|
|
|
# ── Outcome resolution ────────────────────────────────────────────────
|
|
def _norm_pick(pick: Optional[str]) -> str:
|
|
return str(pick or "").strip().casefold()
|
|
|
|
|
|
def resolve_outcome(market: str, pick: str, sh: int, sa: int,
|
|
htsh: Optional[int], htsa: Optional[int]) -> Optional[bool]:
|
|
"""Mirror of prediction-settlement.market-resolver.ts (TS side).
|
|
Returns True/False on settle, None if cannot resolve."""
|
|
m = (market or "").upper().replace(" ", "").replace("-", "_")
|
|
p = _norm_pick(pick)
|
|
|
|
if m in ("MS", "ML", "1X2"):
|
|
outcome = "1" if sh > sa else "2" if sa > sh else "x"
|
|
return p in {outcome, outcome.upper(), outcome.lower(), "0" if outcome == "x" else outcome}
|
|
|
|
if m in ("HT", "IY"):
|
|
if htsh is None or htsa is None:
|
|
return None
|
|
outcome = "1" if htsh > htsa else "2" if htsa > htsh else "x"
|
|
return p in {outcome, "0" if outcome == "x" else outcome}
|
|
|
|
if m in ("OU05", "OU15", "OU25", "OU35", "OU45", "TOTAL"):
|
|
line = {"OU05": 0.5, "OU15": 1.5, "OU25": 2.5, "OU35": 3.5,
|
|
"OU45": 4.5, "TOTAL": 2.5}[m]
|
|
total = sh + sa
|
|
if total == line:
|
|
return None
|
|
is_over = total > line
|
|
if "over" in p or "üst" in p or "ust" in p:
|
|
return is_over
|
|
if "alt" in p or "under" in p:
|
|
return not is_over
|
|
return None
|
|
|
|
if m in ("OU05_HT", "OU15_HT", "OU25_HT", "HT_OU05", "HT_OU15", "HT_OU25"):
|
|
if htsh is None or htsa is None:
|
|
return None
|
|
line = {"OU05_HT": 0.5, "OU15_HT": 1.5, "OU25_HT": 2.5,
|
|
"HT_OU05": 0.5, "HT_OU15": 1.5, "HT_OU25": 2.5}[m]
|
|
total = htsh + htsa
|
|
if total == line:
|
|
return None
|
|
is_over = total > line
|
|
if "over" in p or "üst" in p or "ust" in p:
|
|
return is_over
|
|
if "alt" in p or "under" in p:
|
|
return not is_over
|
|
return None
|
|
|
|
if m in ("BTTS", "KG"):
|
|
both = sh > 0 and sa > 0
|
|
if "yes" in p or "var" in p:
|
|
return both
|
|
if "no" in p or "yok" in p:
|
|
return not both
|
|
return None
|
|
|
|
if m in ("HTFT", "IYMS"):
|
|
if htsh is None or htsa is None or "/" not in p:
|
|
return None
|
|
ht_p, ft_p = p.split("/", 1)
|
|
ht_actual = "1" if htsh > htsa else "2" if htsa > htsh else "x"
|
|
ft_actual = "1" if sh > sa else "2" if sa > sh else "x"
|
|
return ht_p.strip() == ht_actual and ft_p.strip() == ft_actual
|
|
|
|
if m in ("DC", "CIFTE_SANS"):
|
|
ft = "1" if sh > sa else "2" if sa > sh else "X"
|
|
raw = p.upper().replace("-", "").replace("/", "")
|
|
if raw in ("1X", "X1"):
|
|
pair = ["1", "X"]
|
|
elif raw in ("X2", "2X"):
|
|
pair = ["X", "2"]
|
|
elif raw in ("12", "21"):
|
|
pair = ["1", "2"]
|
|
else:
|
|
return None
|
|
return ft in pair
|
|
|
|
if m in ("OE", "TEKCIFT"):
|
|
is_odd = (sh + sa) % 2 == 1
|
|
if "tek" in p or "odd" in p:
|
|
return is_odd
|
|
if "cift" in p or "çift" in p or "even" in p:
|
|
return not is_odd
|
|
return None
|
|
|
|
return None
|
|
|
|
|
|
def compute_unit_profit(won: Optional[bool], stake: float, odds: Optional[float]) -> float:
|
|
if won is None:
|
|
return 0.0
|
|
if not won:
|
|
return -abs(stake) if stake else -1.0
|
|
if not odds or odds <= 1.0:
|
|
return 0.0
|
|
return round(stake * (odds - 1.0), 4)
|
|
|
|
|
|
# ── Data fetch ────────────────────────────────────────────────────────
|
|
def fetch_match_window(args) -> List[Dict]:
|
|
dsn = get_clean_dsn()
|
|
if "?schema=" in dsn:
|
|
dsn = dsn.split("?schema=")[0]
|
|
|
|
if args.start and args.end:
|
|
start = datetime.strptime(args.start, "%Y-%m-%d")
|
|
end = datetime.strptime(args.end, "%Y-%m-%d") + timedelta(days=1)
|
|
else:
|
|
end = datetime.now(timezone.utc).replace(tzinfo=None)
|
|
start = end - timedelta(days=args.days)
|
|
|
|
start_ms = int(start.timestamp() * 1000)
|
|
end_ms = int(end.timestamp() * 1000)
|
|
|
|
excluded = sorted(EXCLUDED_DATES)
|
|
excluded_clause = ""
|
|
if excluded:
|
|
ex_csv = ",".join(f"'{d}'" for d in excluded)
|
|
excluded_clause = (
|
|
f" AND to_timestamp(mst_utc/1000)::date "
|
|
f"NOT IN ({ex_csv})"
|
|
)
|
|
|
|
with psycopg2.connect(dsn) as conn:
|
|
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
|
cur.execute(
|
|
f"""
|
|
SELECT id AS match_id,
|
|
score_home, score_away,
|
|
ht_score_home, ht_score_away,
|
|
league_id,
|
|
to_timestamp(mst_utc/1000)::date AS match_date
|
|
FROM matches
|
|
WHERE sport='football'
|
|
AND status='FT'
|
|
AND score_home IS NOT NULL
|
|
AND score_away IS NOT NULL
|
|
AND mst_utc >= %s
|
|
AND mst_utc < %s
|
|
{excluded_clause}
|
|
ORDER BY mst_utc DESC
|
|
LIMIT %s
|
|
""",
|
|
(start_ms, end_ms, args.max_matches),
|
|
)
|
|
return cur.fetchall()
|
|
|
|
|
|
# ── Per-bet capture ───────────────────────────────────────────────────
|
|
def capture_bet_row(match: Dict, package: Dict) -> Dict[str, Any]:
|
|
"""Distill orchestrator response + ground truth into one analytic row."""
|
|
main = package.get("main_pick") or {}
|
|
bb = main.get("betting_brain") or {}
|
|
advice = package.get("bet_advice") or {}
|
|
v27 = package.get("v27_engine") or {}
|
|
triple = (v27.get("triple_value") or {})
|
|
risk = package.get("risk") or {}
|
|
quality = package.get("data_quality") or {}
|
|
htft_payload = ((package.get("market_board") or {}).get("HTFT") or {})
|
|
htft_probs = htft_payload.get("probs") or {}
|
|
|
|
sh, sa = match["score_home"], match["score_away"]
|
|
htsh, htsa = match["ht_score_home"], match["ht_score_away"]
|
|
|
|
market = main.get("market")
|
|
pick = main.get("pick")
|
|
odds_val = _f(main.get("odds"))
|
|
stake = _f(main.get("stake_units"), 1.0)
|
|
playable = bool(main.get("playable")) and bool(advice.get("playable"))
|
|
|
|
won = resolve_outcome(market, pick, sh, sa, htsh, htsa) if market and pick else None
|
|
profit = compute_unit_profit(won, stake, odds_val) if playable else 0.0
|
|
|
|
# Reversal context (only meaningful for MS picks)
|
|
rev_prob = None
|
|
if market == "MS" and pick in ("1", "2"):
|
|
if pick == "1":
|
|
rev_prob = _f(htft_probs.get("1/2"), 0.0) + _f(htft_probs.get("1/X"), 0.0)
|
|
else:
|
|
rev_prob = _f(htft_probs.get("2/1"), 0.0) + _f(htft_probs.get("2/X"), 0.0)
|
|
|
|
return {
|
|
"match_id": match["match_id"],
|
|
"match_date": str(match["match_date"]),
|
|
"league_id": match.get("league_id"),
|
|
"score_home": sh,
|
|
"score_away": sa,
|
|
"ht_score_home": htsh,
|
|
"ht_score_away": htsa,
|
|
"market": market,
|
|
"pick": pick,
|
|
"odds": odds_val,
|
|
"stake_units": stake,
|
|
"playable": playable,
|
|
"won": won,
|
|
"unit_profit": profit,
|
|
"raw_confidence": _f(main.get("raw_confidence")),
|
|
"calibrated_confidence": _f(main.get("calibrated_confidence")),
|
|
"play_score": _f(main.get("play_score")),
|
|
"ev_edge": _f(main.get("ev_edge")),
|
|
"bet_grade": main.get("bet_grade"),
|
|
"is_value_sniper": bool(main.get("is_value_sniper")),
|
|
"bb_score": _f(bb.get("score")),
|
|
"bb_action": bb.get("action"),
|
|
"bb_vetoes": ";".join(bb.get("vetoes") or []),
|
|
"bb_issues": ";".join(bb.get("issues") or []),
|
|
"bb_positives": ";".join(bb.get("positives") or []),
|
|
"bb_model_prob": _f(bb.get("model_prob")),
|
|
"bb_implied_prob": _f(bb.get("implied_prob")),
|
|
"bb_model_market_gap": _f(bb.get("model_market_gap")),
|
|
"bb_divergence": _f(bb.get("divergence")),
|
|
"bb_trap_market": bool(bb.get("trap_market_flag")),
|
|
"v27_consensus": v27.get("consensus"),
|
|
"data_quality_score": _f(quality.get("score")),
|
|
"data_quality_flags": ";".join(quality.get("flags") or []),
|
|
"risk_level": (risk.get("level") if isinstance(risk, dict) else None),
|
|
"odds_reliability": _f(main.get("odds_reliability")),
|
|
"htft_reversal_prob": rev_prob,
|
|
"htft_top_pick": _argmax(htft_probs),
|
|
"league_name": (package.get("match_info") or {}).get("league_name"),
|
|
"is_cup": _is_cup((package.get("match_info") or {}).get("league_name") or ""),
|
|
"model_version": package.get("model_version"),
|
|
"decision_reason": main.get("pick_reason") or advice.get("reason"),
|
|
}
|
|
|
|
|
|
def _f(x: Any, default: Optional[float] = None) -> Optional[float]:
|
|
try:
|
|
return float(x) if x is not None else default
|
|
except (TypeError, ValueError):
|
|
return default
|
|
|
|
|
|
def _argmax(d: Dict[str, Any]) -> Optional[str]:
|
|
best, val = None, -1.0
|
|
for k, v in d.items():
|
|
fv = _f(v, 0.0) or 0.0
|
|
if fv > val:
|
|
best, val = k, fv
|
|
return best
|
|
|
|
|
|
_CUP_KEYWORDS = ("kupa", "cup", "coupe", "copa", "coppa", "pokal", "trophy",
|
|
"shield", "ziraat", "süper kupa", "super cup", "beker", "taça", "taca")
|
|
|
|
|
|
def _is_cup(name: str) -> bool:
|
|
n = (name or "").lower()
|
|
return any(kw in n for kw in _CUP_KEYWORDS)
|
|
|
|
|
|
# ── Aggregation helpers ────────────────────────────────────────────────
|
|
def _bucket(value: Optional[float], edges: List[float]) -> Optional[str]:
|
|
if value is None:
|
|
return None
|
|
for i, edge in enumerate(edges):
|
|
if value < edge:
|
|
if i == 0:
|
|
return f"<{edge}"
|
|
return f"{edges[i-1]}-{edge}"
|
|
return f">={edges[-1]}"
|
|
|
|
|
|
def _summary_stats(rows: List[Dict]) -> Dict[str, Any]:
|
|
if not rows:
|
|
return {"n": 0}
|
|
settled = [r for r in rows if r["playable"] and r["won"] is not None]
|
|
won = sum(1 for r in settled if r["won"])
|
|
lost = sum(1 for r in settled if not r["won"])
|
|
profit = sum(float(r["unit_profit"]) for r in settled)
|
|
staked = sum(float(r["stake_units"]) for r in settled)
|
|
return {
|
|
"n_total": len(rows),
|
|
"n_playable_settled": len(settled),
|
|
"wins": won,
|
|
"losses": lost,
|
|
"hit_rate_pct": round(100.0 * won / len(settled), 2) if settled else None,
|
|
"unit_profit": round(profit, 3),
|
|
"staked": round(staked, 3),
|
|
"roi_pct": round(100.0 * profit / staked, 2) if staked else None,
|
|
}
|
|
|
|
|
|
def aggregate(rows: List[Dict]) -> Dict[str, Any]:
|
|
out: Dict[str, Any] = {"overall": _summary_stats(rows)}
|
|
|
|
by = lambda key_fn: defaultdict(list)
|
|
market_buckets = by(None)
|
|
conf_buckets = by(None)
|
|
odds_buckets = by(None)
|
|
grade_buckets = by(None)
|
|
cup_buckets = by(None)
|
|
motivation_buckets = by(None)
|
|
|
|
for r in rows:
|
|
if r["playable"]:
|
|
market_buckets[r["market"] or "?"].append(r)
|
|
conf_buckets[_bucket(r["calibrated_confidence"],
|
|
[45, 50, 55, 60, 65, 70, 80])].append(r)
|
|
odds_buckets[_bucket(r["odds"], [1.3, 1.5, 1.8, 2.2, 3.0, 5.0])].append(r)
|
|
grade_buckets[r["bet_grade"] or "?"].append(r)
|
|
cup_buckets["cup" if r["is_cup"] else "league"].append(r)
|
|
|
|
out["by_market"] = {k: _summary_stats(v) for k, v in market_buckets.items()}
|
|
out["by_confidence"] = {k: _summary_stats(v) for k, v in conf_buckets.items() if k}
|
|
out["by_odds"] = {k: _summary_stats(v) for k, v in odds_buckets.items() if k}
|
|
out["by_grade"] = {k: _summary_stats(v) for k, v in grade_buckets.items()}
|
|
out["by_competition"] = {k: _summary_stats(v) for k, v in cup_buckets.items()}
|
|
return out
|
|
|
|
|
|
def loss_diagnostics(rows: List[Dict]) -> Dict[str, Any]:
|
|
losses = [r for r in rows if r["playable"] and r["won"] is False]
|
|
if not losses:
|
|
return {"n_losses": 0}
|
|
n = len(losses)
|
|
|
|
def share(predicate) -> Tuple[int, float]:
|
|
c = sum(1 for r in losses if predicate(r))
|
|
return c, round(100.0 * c / n, 2)
|
|
|
|
diagnostics = {
|
|
"n_losses": n,
|
|
"total_loss_units": round(sum(float(r["unit_profit"]) for r in losses), 3),
|
|
"patterns": {
|
|
"high_htft_reversal_prob (>=0.20)": share(
|
|
lambda r: (r.get("htft_reversal_prob") or 0) >= 0.20
|
|
),
|
|
"cup_match": share(lambda r: r["is_cup"]),
|
|
"low_league_reliability (<0.45)": share(
|
|
lambda r: (r.get("odds_reliability") or 1) < 0.45
|
|
),
|
|
"v27_disagree": share(lambda r: r.get("v27_consensus") == "DISAGREE"),
|
|
"trap_market_flagged": share(lambda r: r.get("bb_trap_market")),
|
|
"low_calibrated_conf (<55)": share(
|
|
lambda r: (r.get("calibrated_confidence") or 0) < 55
|
|
),
|
|
"high_odds_underdog (>=2.5)": share(
|
|
lambda r: (r.get("odds") or 0) >= 2.5
|
|
),
|
|
"low_data_quality (<0.55)": share(
|
|
lambda r: (r.get("data_quality_score") or 1) < 0.55
|
|
),
|
|
"high_risk_level": share(
|
|
lambda r: r.get("risk_level") in ("HIGH", "EXTREME")
|
|
),
|
|
"inferred_features": share(
|
|
lambda r: "ai_features_inferred_from_history" in (r.get("data_quality_flags") or "")
|
|
),
|
|
},
|
|
"by_market": Counter(r["market"] for r in losses).most_common(),
|
|
"by_league": Counter(r.get("league_name") for r in losses).most_common(10),
|
|
}
|
|
|
|
# Top issue tags from betting_brain across losses
|
|
issue_counter = Counter()
|
|
veto_counter = Counter()
|
|
for r in losses:
|
|
for tag in (r.get("bb_issues") or "").split(";"):
|
|
if tag:
|
|
issue_counter[tag] += 1
|
|
for tag in (r.get("bb_vetoes") or "").split(";"):
|
|
if tag:
|
|
veto_counter[tag] += 1
|
|
diagnostics["top_bb_issues_in_losses"] = issue_counter.most_common(15)
|
|
diagnostics["top_bb_vetoes_in_losses"] = veto_counter.most_common(15)
|
|
return diagnostics
|
|
|
|
|
|
# ── Recommendations ────────────────────────────────────────────────────
|
|
def make_recommendations(rows: List[Dict], agg: Dict[str, Any],
|
|
diag: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
recs: List[Dict[str, Any]] = []
|
|
overall = agg.get("overall") or {}
|
|
if not overall.get("n_playable_settled"):
|
|
return recs
|
|
|
|
# Cross-reference market hit rate vs overall — flag chronic losers.
|
|
overall_hit = overall.get("hit_rate_pct") or 0.0
|
|
for market, stats in (agg.get("by_market") or {}).items():
|
|
n = stats.get("n_playable_settled") or 0
|
|
hit = stats.get("hit_rate_pct")
|
|
roi = stats.get("roi_pct")
|
|
if n < 30:
|
|
continue
|
|
if hit is not None and roi is not None and roi < -10 and hit < overall_hit - 10:
|
|
recs.append({
|
|
"type": "drop_market",
|
|
"market": market,
|
|
"evidence": f"hit={hit}%, roi={roi}%, n={n} — chronic loser",
|
|
"suggested_fix": f"Add veto in betting_brain when market=={market} unless overwhelming evidence",
|
|
"estimated_loss_prevented_units": round(-(stats.get("unit_profit") or 0), 2),
|
|
})
|
|
|
|
# Confidence band tuning — flag bands where ROI < 0 despite passing eşik
|
|
for band, stats in (agg.get("by_confidence") or {}).items():
|
|
n = stats.get("n_playable_settled") or 0
|
|
roi = stats.get("roi_pct")
|
|
if n >= 40 and roi is not None and roi < -8:
|
|
recs.append({
|
|
"type": "raise_confidence_threshold",
|
|
"confidence_band": band,
|
|
"evidence": f"n={n}, roi={roi}%",
|
|
"suggested_fix": f"Raise MIN_BET_SCORE or market_min_conf above {band.split('-')[0]}",
|
|
})
|
|
|
|
# Loss diagnostic — if cup matches dominate losses, recommend cup-aware filter
|
|
patterns = (diag.get("patterns") or {})
|
|
cup_share = patterns.get("cup_match", (0, 0))[1]
|
|
if cup_share >= 25:
|
|
recs.append({
|
|
"type": "cup_match_filter",
|
|
"evidence": f"{cup_share}% of losses are cup matches",
|
|
"suggested_fix": "Tighten betting_brain thresholds for is_cup_match=True picks",
|
|
})
|
|
|
|
rev_share = patterns.get("high_htft_reversal_prob (>=0.20)", (0, 0))[1]
|
|
if rev_share >= 15:
|
|
recs.append({
|
|
"type": "tighten_reversal_check",
|
|
"evidence": f"{rev_share}% of losses had HTFT reversal prob >=0.20 (already partial fix)",
|
|
"suggested_fix": "Lower reversal threshold in betting_brain from 0.25 to 0.20 for veto trigger",
|
|
})
|
|
|
|
rel_share = patterns.get("low_league_reliability (<0.45)", (0, 0))[1]
|
|
if rel_share >= 20:
|
|
recs.append({
|
|
"type": "league_reliability_filter",
|
|
"evidence": f"{rel_share}% of losses in low-reliability leagues (<0.45)",
|
|
"suggested_fix": "Add hard veto when odds_reliability<0.45 for non-value-sniper picks",
|
|
})
|
|
|
|
return recs
|
|
|
|
|
|
# ── CSV / report writers ───────────────────────────────────────────────
|
|
def write_csv(rows: List[Dict], path: str):
|
|
if not rows:
|
|
return
|
|
import csv
|
|
fields = list(rows[0].keys())
|
|
with open(path, "w", newline="", encoding="utf-8") as f:
|
|
w = csv.DictWriter(f, fieldnames=fields)
|
|
w.writeheader()
|
|
for r in rows:
|
|
w.writerow(r)
|
|
|
|
|
|
def write_text_summary(rows: List[Dict], agg: Dict, diag: Dict,
|
|
recs: List[Dict], path: str, args):
|
|
lines: List[str] = []
|
|
push = lines.append
|
|
push("=" * 78)
|
|
push("DIAGNOSTIC BACKTEST REPORT")
|
|
push("=" * 78)
|
|
push(f"Generated: {datetime.now().isoformat(timespec='seconds')}")
|
|
push(f"Sample window: start={args.start or f'-{args.days}d'}, end={args.end or 'now'}")
|
|
push(f"Max matches: {args.max_matches}")
|
|
push(f"Excluded days: {sorted(EXCLUDED_DATES)}")
|
|
push("")
|
|
push("OVERALL")
|
|
push("-" * 78)
|
|
overall = agg.get("overall") or {}
|
|
for k in ("n_total", "n_playable_settled", "wins", "losses",
|
|
"hit_rate_pct", "unit_profit", "staked", "roi_pct"):
|
|
push(f" {k:25}: {overall.get(k)}")
|
|
push("")
|
|
push("PER MARKET")
|
|
push("-" * 78)
|
|
push(f" {'market':<8} {'n':>6} {'hit%':>7} {'profit':>9} {'roi%':>7}")
|
|
for market, s in sorted((agg.get("by_market") or {}).items(),
|
|
key=lambda kv: -(kv[1].get("n_playable_settled") or 0)):
|
|
push(f" {market:<8} {s.get('n_playable_settled',0):>6} "
|
|
f"{str(s.get('hit_rate_pct','')):>7} "
|
|
f"{str(s.get('unit_profit','')):>9} "
|
|
f"{str(s.get('roi_pct','')):>7}")
|
|
push("")
|
|
push("PER CALIBRATED CONFIDENCE BAND")
|
|
push("-" * 78)
|
|
push(f" {'band':<10} {'n':>6} {'hit%':>7} {'roi%':>7}")
|
|
for band, s in sorted((agg.get("by_confidence") or {}).items()):
|
|
push(f" {band:<10} {s.get('n_playable_settled',0):>6} "
|
|
f"{str(s.get('hit_rate_pct','')):>7} "
|
|
f"{str(s.get('roi_pct','')):>7}")
|
|
push("")
|
|
push("PER ODDS BAND")
|
|
push("-" * 78)
|
|
push(f" {'band':<10} {'n':>6} {'hit%':>7} {'roi%':>7}")
|
|
for band, s in sorted((agg.get("by_odds") or {}).items()):
|
|
push(f" {band:<10} {s.get('n_playable_settled',0):>6} "
|
|
f"{str(s.get('hit_rate_pct','')):>7} "
|
|
f"{str(s.get('roi_pct','')):>7}")
|
|
push("")
|
|
push("LEAGUE vs CUP")
|
|
push("-" * 78)
|
|
for k, s in (agg.get("by_competition") or {}).items():
|
|
push(f" {k:<8} n={s.get('n_playable_settled',0):>4} "
|
|
f"hit={s.get('hit_rate_pct','-')}% roi={s.get('roi_pct','-')}%")
|
|
push("")
|
|
push("LOSS DIAGNOSTICS")
|
|
push("-" * 78)
|
|
push(f" total losses: {diag.get('n_losses')}")
|
|
push(f" total lost units: {diag.get('total_loss_units')}")
|
|
push(f" By market: {diag.get('by_market')}")
|
|
push(" Loss patterns (count, % of losses):")
|
|
for pattern, (c, pct) in (diag.get("patterns") or {}).items():
|
|
push(f" {pattern:<55} {c:>4} ({pct}%)")
|
|
push(" Top betting_brain issues seen in losses:")
|
|
for issue, c in (diag.get("top_bb_issues_in_losses") or []):
|
|
push(f" {issue:<55} {c}")
|
|
push(" Top betting_brain vetoes (in losses — i.e. veto fired but bet still went through value-sniper override):")
|
|
for veto, c in (diag.get("top_bb_vetoes_in_losses") or []):
|
|
push(f" {veto:<55} {c}")
|
|
push("")
|
|
push("RECOMMENDATIONS")
|
|
push("-" * 78)
|
|
if not recs:
|
|
push(" (none surfaced — sample too small or no clear pattern)")
|
|
for r in recs:
|
|
push(f" • [{r['type']}]")
|
|
for k, v in r.items():
|
|
if k == "type":
|
|
continue
|
|
push(f" {k}: {v}")
|
|
push("")
|
|
push("=" * 78)
|
|
with open(path, "w", encoding="utf-8") as f:
|
|
f.write("\n".join(lines))
|
|
|
|
|
|
# ── Main loop ─────────────────────────────────────────────────────────
|
|
def main():
|
|
parser = argparse.ArgumentParser(description=__doc__)
|
|
parser.add_argument("--days", type=int, default=14,
|
|
help="Backwards window from now (default 14)")
|
|
parser.add_argument("--max-matches", type=int, default=2000,
|
|
help="Hard cap on matches processed (default 2000)")
|
|
parser.add_argument("--start", help="Start date YYYY-MM-DD (overrides --days)")
|
|
parser.add_argument("--end", help="End date YYYY-MM-DD")
|
|
parser.add_argument("--progress-interval", type=int, default=50)
|
|
args = parser.parse_args()
|
|
|
|
print("=" * 70)
|
|
print("DIAGNOSTIC BACKTEST")
|
|
print("=" * 70)
|
|
print(f"Loading orchestrator...")
|
|
orch = get_single_match_orchestrator()
|
|
# Warm V25 + V27 + basketball loaders so the first match doesn't pay it
|
|
try:
|
|
orch._get_v25_predictor()
|
|
except Exception as e:
|
|
print(f" v25 warmup: {e}")
|
|
try:
|
|
orch._get_v27_predictor()
|
|
except Exception as e:
|
|
print(f" v27 warmup: {e}")
|
|
|
|
print(f"Fetching match window...")
|
|
matches = fetch_match_window(args)
|
|
n = len(matches)
|
|
print(f" {n} matches selected")
|
|
|
|
if not matches:
|
|
print("No matches to process. Exiting.")
|
|
return
|
|
|
|
rows: List[Dict[str, Any]] = []
|
|
errors: List[Tuple[str, str]] = []
|
|
t0 = time.time()
|
|
|
|
for i, m in enumerate(matches, start=1):
|
|
mid = str(m["match_id"])
|
|
try:
|
|
pkg = orch.analyze_match(mid)
|
|
if pkg is None:
|
|
continue
|
|
row = capture_bet_row(m, pkg)
|
|
rows.append(row)
|
|
except KeyboardInterrupt:
|
|
print("\nInterrupted, writing partial results...")
|
|
break
|
|
except Exception as e:
|
|
errors.append((mid, str(e)))
|
|
if len(errors) <= 5:
|
|
traceback.print_exc()
|
|
|
|
if i % args.progress_interval == 0:
|
|
elapsed = time.time() - t0
|
|
rate = i / elapsed
|
|
eta = (n - i) / rate if rate else 0
|
|
playable_so_far = sum(1 for r in rows if r["playable"])
|
|
print(f" [{i}/{n}] rate={rate:.1f}/s eta={eta/60:.1f}min "
|
|
f"playable={playable_so_far} errors={len(errors)}")
|
|
|
|
print(f"\nProcessed {len(rows)} rows in {(time.time()-t0):.1f}s "
|
|
f"({len(errors)} errors)")
|
|
|
|
# Aggregate
|
|
print("Aggregating...")
|
|
agg = aggregate(rows)
|
|
diag = loss_diagnostics(rows)
|
|
recs = make_recommendations(rows, agg, diag)
|
|
|
|
stamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
csv_path = os.path.join(REPORTS_DIR, f"diagnostic_backtest_{stamp}.csv")
|
|
json_path = os.path.join(REPORTS_DIR, f"diagnostic_backtest_{stamp}.json")
|
|
txt_path = os.path.join(REPORTS_DIR, f"diagnostic_backtest_{stamp}.txt")
|
|
|
|
write_csv(rows, csv_path)
|
|
with open(json_path, "w", encoding="utf-8") as f:
|
|
json.dump({"args": vars(args), "aggregate": agg, "loss_diagnostics": diag,
|
|
"recommendations": recs, "errors_sample": errors[:20]},
|
|
f, indent=2, default=str)
|
|
write_text_summary(rows, agg, diag, recs, txt_path, args)
|
|
|
|
print(f"\nOutputs:")
|
|
print(f" CSV: {csv_path}")
|
|
print(f" JSON: {json_path}")
|
|
print(f" TXT: {txt_path}")
|
|
print("\nOverall:", agg.get("overall"))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|