from __future__ import annotations import argparse import csv import json import sys from collections import defaultdict from dataclasses import dataclass from datetime import datetime, timezone from pathlib import Path from typing import Any, Dict, Optional import psycopg2 from psycopg2.extras import RealDictCursor AI_ENGINE_DIR = Path(__file__).resolve().parents[1] if str(AI_ENGINE_DIR) not in sys.path: sys.path.insert(0, str(AI_ENGINE_DIR)) from services.single_match_orchestrator import SingleMatchOrchestrator STRATEGIES = ("v25_aggressive", "v26_surprise", "v26_aggressive", "v26_main_htft") REVERSAL_LABELS = ("1/2", "2/1", "X/1", "X/2") @dataclass class MatchContext: match_id: str match_date_ms: int league: str home_team: str away_team: str final_home: int final_away: int ht_home: Optional[int] ht_away: Optional[int] @property def match_name(self) -> str: return f"{self.home_team} vs {self.away_team}" @property def final_score(self) -> str: return f"{self.final_home}-{self.final_away}" @property def ht_score(self) -> str: if self.ht_home is None or self.ht_away is None: return "-" return f"{self.ht_home}-{self.ht_away}" def _resolve_dsn() -> str: env_path = AI_ENGINE_DIR / ".env" if env_path.exists(): for line in env_path.read_text(encoding="utf-8").splitlines(): if line.startswith("DATABASE_URL="): return line.split("=", 1)[1].strip().split("?schema=")[0] raise SystemExit("DATABASE_URL not found in ai-engine/.env") def _fetch_matches(dsn: str, limit: int) -> list[MatchContext]: query = """ SELECT m.id, m.mst_utc, COALESCE(l.name, 'Unknown League') AS league, COALESCE(ht.name, 'Home') AS home_team, COALESCE(at.name, 'Away') AS away_team, COALESCE(m.score_home, 0) AS score_home, COALESCE(m.score_away, 0) AS score_away, m.ht_score_home, m.ht_score_away FROM matches m LEFT JOIN leagues l ON l.id = m.league_id LEFT JOIN teams ht ON ht.id = m.home_team_id LEFT JOIN teams at ON at.id = m.away_team_id WHERE m.status = 'FT' AND m.sport = 'football' AND m.score_home IS NOT NULL AND m.score_away IS NOT NULL AND m.ht_score_home IS NOT NULL AND m.ht_score_away IS NOT NULL ORDER BY m.mst_utc DESC LIMIT %s """ with psycopg2.connect(dsn) as conn: with conn.cursor(cursor_factory=RealDictCursor) as cur: cur.execute(query, (limit,)) rows = cur.fetchall() return [ MatchContext( match_id=str(row["id"]), match_date_ms=int(row["mst_utc"] or 0), league=str(row["league"] or "Unknown League"), home_team=str(row["home_team"] or "Home"), away_team=str(row["away_team"] or "Away"), final_home=int(row["score_home"] or 0), final_away=int(row["score_away"] or 0), ht_home=int(row["ht_score_home"]) if row.get("ht_score_home") is not None else None, ht_away=int(row["ht_score_away"]) if row.get("ht_score_away") is not None else None, ) for row in rows ] def _safe_float(value: Any) -> float: try: return float(value) except (TypeError, ValueError): return 0.0 def _outcome_symbol(home: int, away: int) -> str: if home > away: return "1" if home < away: return "2" return "X" def _resolve_htft(pick: str, context: MatchContext) -> Dict[str, Any]: if not pick or "/" not in str(pick): return {"result": "UNRESOLVED", "won": None, "note": "htft_pick_invalid"} actual = f"{_outcome_symbol(context.ht_home or 0, context.ht_away or 0)}/{_outcome_symbol(context.final_home, context.final_away)}" won = str(pick).strip().upper() == actual return {"result": "WON" if won else "LOST", "won": won, "note": f"actual={actual}"} def _market_odds(odds: Dict[str, Any], market: str, pick: str) -> float: mapping = { "HTFT": { "1/1": "htft_11", "1/X": "htft_1x", "1/2": "htft_12", "X/1": "htft_x1", "X/X": "htft_xx", "X/2": "htft_x2", "2/1": "htft_21", "2/X": "htft_2x", "2/2": "htft_22", }, "MS": {"1": "ms_h", "X": "ms_d", "2": "ms_a"}, } key = mapping.get(market, {}).get(str(pick)) if not key: return 0.0 value = _safe_float((odds or {}).get(key)) return value if value > 1.0 else 0.0 def _evaluate_pick( *, strategy: str, market: str, pick: str, odds: Any, playable: bool, confidence: Any, extra: Optional[Dict[str, Any]], context: MatchContext, ) -> Dict[str, Any]: odds_value = _safe_float(odds) if market == "HT/FT": market = "HTFT" resolution = _resolve_htft(pick, context) if market == "HTFT" else { "result": "UNRESOLVED", "won": None, "note": "non_htft_market", } counted = bool(playable and market == "HTFT" and odds_value > 1.01 and resolution["result"] in {"WON", "LOST"}) profit = 0.0 if counted: profit = (odds_value - 1.0) if resolution["result"] == "WON" else -1.0 row = { "strategy": strategy, "market": market, "pick": pick, "odds": round(odds_value, 2), "playable": playable, "confidence": round(_safe_float(confidence), 1), "result": resolution["result"], "counted_in_roi": counted, "profit_flat": round(profit, 4), "resolution_note": resolution["note"], } if extra: row.update(extra) return row def _extract_strategy_rows( *, context: MatchContext, odds_data: Dict[str, Any], v25: Dict[str, Any], v26: Dict[str, Any], ) -> Dict[str, Optional[Dict[str, Any]]]: strategies: Dict[str, Optional[Dict[str, Any]]] = {name: None for name in STRATEGIES} v25_aggressive = v25.get("aggressive_pick") or {} if v25_aggressive.get("pick"): pick = str(v25_aggressive.get("pick")) strategies["v25_aggressive"] = _evaluate_pick( strategy="v25_aggressive", market=str(v25_aggressive.get("market") or "HTFT"), pick=pick, odds=_market_odds(odds_data, "HTFT", pick), playable=True, confidence=v25_aggressive.get("confidence"), extra={ "source": "v25.aggressive_pick", "reversal_pick": pick, }, context=context, ) v26_surprise = v26.get("surprise_pick") or {} v26_hunter = v26.get("surprise_hunter") or {} if v26_surprise.get("pick"): pick = str(v26_surprise.get("raw_pick") or v26_surprise.get("pick")) strategies["v26_surprise"] = _evaluate_pick( strategy="v26_surprise", market=str(v26_surprise.get("market") or "HTFT"), pick=pick, odds=v26_surprise.get("odds") or _market_odds(odds_data, "HTFT", pick), playable=bool(v26_surprise.get("playable")), confidence=v26_surprise.get("calibrated_confidence", v26_surprise.get("confidence")), extra={ "source": "v26.surprise_pick", "surprise_score": round(_safe_float(v26_surprise.get("surprise_score")), 1), "support_score": round(_safe_float(v26_surprise.get("support_score")), 1), "reversal_pick": v26_hunter.get("reversal_pick"), "reversal_prob": round(_safe_float(v26_hunter.get("reversal_prob")), 4), "favorite_gap": round(_safe_float(v26_hunter.get("favorite_gap")), 3), "favorite_odd": round(_safe_float(v26_hunter.get("favorite_odd")), 2), "odds_band_score": round(_safe_float(v26_hunter.get("odds_band_score")), 3), "odds_band_label": str(v26_hunter.get("odds_band_label") or ""), "league_reversal_rate": round(_safe_float(v26_hunter.get("league_reversal_rate")), 4), "league_strict_rev_rate": round(_safe_float(v26_hunter.get("league_strict_rev_rate")), 4), "referee_strict_rev_rate": round(_safe_float(v26_hunter.get("referee_strict_rev_rate")), 4), "reason_codes": ",".join(v26_hunter.get("reason_codes", [])), }, context=context, ) v26_aggressive = v26.get("aggressive_pick") or {} if v26_aggressive.get("pick"): pick = str(v26_aggressive.get("pick")) strategies["v26_aggressive"] = _evaluate_pick( strategy="v26_aggressive", market=str(v26_aggressive.get("market") or "HTFT"), pick=pick, odds=v26_aggressive.get("odds") or _market_odds(odds_data, "HTFT", pick), playable=True, confidence=v26_aggressive.get("confidence"), extra={ "source": "v26.aggressive_pick", "reversal_pick": pick, }, context=context, ) v26_main = v26.get("main_pick") or {} if str(v26_main.get("market") or "") == "HTFT" and v26_main.get("pick"): pick = str(v26_main.get("raw_pick") or v26_main.get("pick")) strategies["v26_main_htft"] = _evaluate_pick( strategy="v26_main_htft", market="HTFT", pick=pick, odds=v26_main.get("odds") or _market_odds(odds_data, "HTFT", pick), playable=bool(v26_main.get("playable")), confidence=v26_main.get("calibrated_confidence", v26_main.get("confidence")), extra={ "source": "v26.main_pick", "pick_reason": v26_main.get("pick_reason"), "surprise_score": round(_safe_float(v26_main.get("surprise_score")), 1), }, context=context, ) return strategies def _summarize_bucket(bucket: Dict[str, float]) -> Dict[str, Any]: played = int(bucket["played"]) won = int(bucket["won"]) lost = int(bucket["lost"]) candidate = int(bucket["candidate"]) profit = round(bucket["profit"], 4) roi = round((profit / played) * 100.0, 2) if played else 0.0 hit = round((won / played) * 100.0, 2) if played else 0.0 return { "candidates": candidate, "played": played, "won": won, "lost": lost, "profit_flat": profit, "roi_flat_pct": roi, "hit_rate_pct": hit, } def _format_date(ms: int) -> str: return datetime.fromtimestamp(ms / 1000, tz=timezone.utc).strftime("%Y-%m-%d") def _build_markdown(report: Dict[str, Any]) -> str: lines: list[str] = [] lines.append("# HT/FT + Upset Backtest") lines.append("") lines.append(f"- Sample: last {report['sample_size']} finished football matches") lines.append("- Scope: only HT/FT reversal and upset-oriented picks") lines.append("- ROI: flat `1 unit` per played pick") lines.append(f"- Generated at: {report['generated_at']}") lines.append("") lines.append("## Strategy Summary") lines.append("") lines.append("| Strategy | Candidates | Played | Won | Lost | Hit Rate | Profit | ROI |") lines.append("|---|---:|---:|---:|---:|---:|---:|---:|") for strategy in STRATEGIES: payload = report["summary"]["strategies"][strategy] lines.append( f"| {strategy} | {payload['candidates']} | {payload['played']} | {payload['won']} | " f"{payload['lost']} | {payload['hit_rate_pct']}% | {payload['profit_flat']:+.2f} | {payload['roi_flat_pct']:+.2f}% |" ) lines.append("") lines.append("## v26 Surprise By Reversal Type") lines.append("") lines.append("| Reversal | Candidates | Played | Won | Lost | Profit | ROI |") lines.append("|---|---:|---:|---:|---:|---:|---:|") for reversal, payload in report["summary"]["v26_surprise_by_pick"].items(): lines.append( f"| {reversal} | {payload['candidates']} | {payload['played']} | {payload['won']} | " f"{payload['lost']} | {payload['profit_flat']:+.2f} | {payload['roi_flat_pct']:+.2f}% |" ) lines.append("") lines.append("## Match Detail") lines.append("") lines.append("| Date | Match | HT | FT | v25 aggressive | v26 surprise | v26 aggressive | v26 main HTFT |") lines.append("|---|---|---|---|---|---|---|---|") for match in report["matches"]: lines.append( f"| {_format_date(match['match_date_ms'])} | {match['match_name']} | {match['ht_score']} | {match['final_score']} | " f"{match['v25_aggressive']} | {match['v26_surprise']} | {match['v26_aggressive']} | {match['v26_main_htft']} |" ) lines.append("") return "\n".join(lines) def main() -> None: parser = argparse.ArgumentParser(description="HT/FT + upset focused backtest.") parser.add_argument("--limit", type=int, default=120, help="Number of finished matches to analyze.") args = parser.parse_args() dsn = _resolve_dsn() orchestrator = SingleMatchOrchestrator() matches = _fetch_matches(dsn, max(1, args.limit)) strategy_buckets: Dict[str, Dict[str, float]] = {name: defaultdict(float) for name in STRATEGIES} v26_reversal_buckets: Dict[str, Dict[str, float]] = {label: defaultdict(float) for label in REVERSAL_LABELS} report_matches: list[Dict[str, Any]] = [] csv_rows: list[Dict[str, Any]] = [] for context in matches: data = orchestrator._load_match_data(context.match_id) # noqa: SLF001 if data is None: continue orchestrator.engine_mode = "v25" v25 = orchestrator.analyze_match(context.match_id) or {} orchestrator.engine_mode = "v26" v26 = orchestrator.analyze_match(context.match_id) or {} extracted = _extract_strategy_rows( context=context, odds_data=data.odds_data or {}, v25=v25, v26=v26, ) match_row: Dict[str, Any] = { "match_id": context.match_id, "match_name": context.match_name, "league": context.league, "match_date_ms": context.match_date_ms, "ht_score": context.ht_score, "final_score": context.final_score, } for strategy, payload in extracted.items(): if payload: strategy_buckets[strategy]["candidate"] += 1 if payload["counted_in_roi"]: strategy_buckets[strategy]["played"] += 1 if payload["result"] == "WON": strategy_buckets[strategy]["won"] += 1 else: strategy_buckets[strategy]["lost"] += 1 strategy_buckets[strategy]["profit"] += payload["profit_flat"] if strategy == "v26_surprise": reversal_label = str(payload.get("reversal_pick") or "") if reversal_label in v26_reversal_buckets: v26_reversal_buckets[reversal_label]["candidate"] += 1 if payload["counted_in_roi"]: v26_reversal_buckets[reversal_label]["played"] += 1 if payload["result"] == "WON": v26_reversal_buckets[reversal_label]["won"] += 1 else: v26_reversal_buckets[reversal_label]["lost"] += 1 v26_reversal_buckets[reversal_label]["profit"] += payload["profit_flat"] summary = ( f"{payload['pick']} ({payload['result']}, {'played' if payload['counted_in_roi'] else 'not played'}, {payload['profit_flat']:+.2f})" ) match_row[strategy] = summary csv_rows.append( { "match_id": context.match_id, "date": _format_date(context.match_date_ms), "league": context.league, "match": context.match_name, "ht_score": context.ht_score, "final_score": context.final_score, **payload, } ) else: match_row[strategy] = "-" report_matches.append(match_row) report = { "generated_at": datetime.now(timezone.utc).isoformat(), "sample_size": len(report_matches), "summary": { "strategies": { strategy: _summarize_bucket(bucket) for strategy, bucket in strategy_buckets.items() }, "v26_surprise_by_pick": { label: _summarize_bucket(bucket) for label, bucket in v26_reversal_buckets.items() }, }, "matches": report_matches, } report_dir = AI_ENGINE_DIR / "reports" json_path = report_dir / "backtest_v26_shadow_htft_upset.json" csv_path = report_dir / "backtest_v26_shadow_htft_upset.csv" md_path = report_dir / "backtest_v26_shadow_htft_upset.md" json_path.write_text(json.dumps(report, indent=2, ensure_ascii=False), encoding="utf-8") with csv_path.open("w", encoding="utf-8", newline="") as handle: writer = csv.DictWriter( handle, fieldnames=[ "match_id", "date", "league", "match", "ht_score", "final_score", "strategy", "market", "pick", "odds", "playable", "confidence", "result", "counted_in_roi", "profit_flat", "resolution_note", "source", "reversal_pick", "reversal_prob", "favorite_gap", "favorite_odd", "support_score", "odds_band_score", "odds_band_label", "league_reversal_rate", "league_strict_rev_rate", "referee_strict_rev_rate", "surprise_score", "reason_codes", "pick_reason", ], ) writer.writeheader() writer.writerows(csv_rows) md_path.write_text(_build_markdown(report), encoding="utf-8") print(f"[OK] JSON report written to {json_path}") print(f"[OK] CSV report written to {csv_path}") print(f"[OK] Markdown report written to {md_path}") if __name__ == "__main__": main()