""" Backtest the live V2 predictor stack against recent finished football matches. This script uses the same path as production: database -> feature extractor -> betting predictor -> quant ranking. """ from __future__ import annotations import argparse import asyncio import sys from dataclasses import dataclass from pathlib import Path from sqlalchemy import text ROOT_DIR = Path(__file__).resolve().parents[1] if str(ROOT_DIR) not in sys.path: sys.path.insert(0, str(ROOT_DIR)) from core.quant import MarketPick, analyze_market from data.database import dispose_engine, get_session from features.extractor import extract_features from models.betting_engine import get_predictor @dataclass class BacktestStats: sampled_matches: int = 0 analyzed_matches: int = 0 skipped_matches: int = 0 ms_correct: int = 0 ou25_correct: int = 0 btts_correct: int = 0 main_pick_count: int = 0 main_pick_correct: int = 0 playable_pick_count: int = 0 playable_pick_correct: int = 0 playable_units_staked: float = 0.0 playable_units_profit: float = 0.0 def _parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser() parser.add_argument("--limit", type=int, default=50) parser.add_argument("--days", type=int, default=45) return parser.parse_args() def _actual_ms(score_home: int, score_away: int) -> str: if score_home > score_away: return "1" if score_home < score_away: return "2" return "X" def _actual_ou25(score_home: int, score_away: int) -> str: return "Over" if (score_home + score_away) > 2 else "Under" def _actual_btts(score_home: int, score_away: int) -> str: return "Yes" if score_home > 0 and score_away > 0 else "No" def _odds_map_from_features(feats) -> dict[str, dict[str, float]]: return { "MS": {"1": feats.odds_home, "X": feats.odds_draw, "2": feats.odds_away}, "OU25": {"Under": feats.odds_under25, "Over": feats.odds_over25}, "BTTS": {"No": feats.odds_btts_no, "Yes": feats.odds_btts_yes}, } def _best_pick(feats, all_probs: dict[str, dict[str, float]]) -> MarketPick | None: odds_map = _odds_map_from_features(feats) picks = [ analyze_market("MS", all_probs["MS"], odds_map["MS"], feats.data_quality_score), analyze_market("OU25", all_probs["OU25"], odds_map["OU25"], feats.data_quality_score), analyze_market("BTTS", all_probs["BTTS"], odds_map["BTTS"], feats.data_quality_score), ] ranked = sorted( [pick for pick in picks if pick.pick], key=lambda pick: pick.play_score, reverse=True, ) return ranked[0] if ranked else None def _pick_won(pick: MarketPick, actuals: dict[str, str]) -> bool: return actuals.get(pick.market) == pick.pick async def _load_match_rows(limit: int, days: int) -> list[dict[str, object]]: min_mst_utc = days * 86400000 query = text(""" SELECT m.id, m.match_name, m.score_home, m.score_away, m.mst_utc FROM matches m WHERE m.sport = 'football' AND m.score_home IS NOT NULL AND m.score_away IS NOT NULL AND m.mst_utc >= ( EXTRACT(EPOCH FROM NOW()) * 1000 - :min_mst_utc ) AND EXISTS ( SELECT 1 FROM odd_categories oc WHERE oc.match_id = m.id AND oc.name IN ('Maç Sonucu', '2,5 Alt/Üst', 'Karşılıklı Gol') ) ORDER BY m.mst_utc DESC LIMIT :limit """) async with get_session() as session: result = await session.execute( query, {"limit": limit, "min_mst_utc": min_mst_utc}, ) rows = result.mappings().all() return [dict(row) for row in rows] async def _run(limit: int, days: int) -> BacktestStats: stats = BacktestStats() predictor = get_predictor() rows = await _load_match_rows(limit, days) stats.sampled_matches = len(rows) async with get_session() as session: for row in rows: match_id = str(row["id"]) score_home = int(row["score_home"]) score_away = int(row["score_away"]) feats = await extract_features(session, match_id) if feats is None: stats.skipped_matches += 1 continue if feats.data_quality_score <= 0.0: stats.skipped_matches += 1 continue all_probs = predictor.predict_all(feats.to_model_array(), feats) stats.analyzed_matches += 1 actuals = { "MS": _actual_ms(score_home, score_away), "OU25": _actual_ou25(score_home, score_away), "BTTS": _actual_btts(score_home, score_away), } if max(all_probs["MS"], key=all_probs["MS"].get) == actuals["MS"]: stats.ms_correct += 1 if max(all_probs["OU25"], key=all_probs["OU25"].get) == actuals["OU25"]: stats.ou25_correct += 1 if max(all_probs["BTTS"], key=all_probs["BTTS"].get) == actuals["BTTS"]: stats.btts_correct += 1 best_pick = _best_pick(feats, all_probs) if best_pick is None: continue stats.main_pick_count += 1 if _pick_won(best_pick, actuals): stats.main_pick_correct += 1 if best_pick.playable: stats.playable_pick_count += 1 stats.playable_units_staked += best_pick.stake_units if _pick_won(best_pick, actuals): stats.playable_pick_correct += 1 stats.playable_units_profit += best_pick.stake_units * (best_pick.odds - 1.0) else: stats.playable_units_profit -= best_pick.stake_units return stats def _pct(numerator: int, denominator: int) -> float: if denominator <= 0: return 0.0 return round((numerator / denominator) * 100.0, 2) def _roi(profit: float, staked: float) -> float: if staked <= 0: return 0.0 return round((profit / staked) * 100.0, 2) def _print_summary(stats: BacktestStats) -> None: print("=== V2 Runtime Backtest ===") print(f"Sampled matches : {stats.sampled_matches}") print(f"Analyzed matches : {stats.analyzed_matches}") print(f"Skipped matches : {stats.skipped_matches}") print(f"MS accuracy : {_pct(stats.ms_correct, stats.analyzed_matches)}%") print(f"OU2.5 accuracy : {_pct(stats.ou25_correct, stats.analyzed_matches)}%") print(f"BTTS accuracy : {_pct(stats.btts_correct, stats.analyzed_matches)}%") print( "Main pick accuracy : " f"{_pct(stats.main_pick_correct, stats.main_pick_count)}% " f"({stats.main_pick_correct}/{stats.main_pick_count})" ) print( "Playable accuracy : " f"{_pct(stats.playable_pick_correct, stats.playable_pick_count)}% " f"({stats.playable_pick_correct}/{stats.playable_pick_count})" ) print(f"Units staked : {stats.playable_units_staked:.2f}") print(f"Units profit : {stats.playable_units_profit:.2f}") print(f"ROI : {_roi(stats.playable_units_profit, stats.playable_units_staked)}%") async def _main() -> None: args = _parse_args() try: stats = await _run(args.limit, args.days) _print_summary(stats) finally: await dispose_engine() if __name__ == "__main__": asyncio.run(_main())