gg

2026-04-21 16:53:56 +03:00
parent 1346924387
commit 2ccd6831eb
26 changed files with 430403 additions and 3 deletions
@@ -0,0 +1,505 @@
+from __future__ import annotations
+
+import argparse
+import csv
+import json
+import sys
+from collections import defaultdict
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Dict, Optional
+
+import psycopg2
+from psycopg2.extras import RealDictCursor
+
+
+AI_ENGINE_DIR = Path(__file__).resolve().parents[1]
+if str(AI_ENGINE_DIR) not in sys.path:
+    sys.path.insert(0, str(AI_ENGINE_DIR))
+
+from services.single_match_orchestrator import SingleMatchOrchestrator
+
+
+STRATEGIES = ("v25_aggressive", "v26_surprise", "v26_aggressive", "v26_main_htft")
+REVERSAL_LABELS = ("1/2", "2/1", "X/1", "X/2")
+
+
+@dataclass
+class MatchContext:
+    match_id: str
+    match_date_ms: int
+    league: str
+    home_team: str
+    away_team: str
+    final_home: int
+    final_away: int
+    ht_home: Optional[int]
+    ht_away: Optional[int]
+
+    @property
+    def match_name(self) -> str:
+        return f"{self.home_team} vs {self.away_team}"
+
+    @property
+    def final_score(self) -> str:
+        return f"{self.final_home}-{self.final_away}"
+
+    @property
+    def ht_score(self) -> str:
+        if self.ht_home is None or self.ht_away is None:
+            return "-"
+        return f"{self.ht_home}-{self.ht_away}"
+
+
+def _resolve_dsn() -> str:
+    env_path = AI_ENGINE_DIR / ".env"
+    if env_path.exists():
+        for line in env_path.read_text(encoding="utf-8").splitlines():
+            if line.startswith("DATABASE_URL="):
+                return line.split("=", 1)[1].strip().split("?schema=")[0]
+    raise SystemExit("DATABASE_URL not found in ai-engine/.env")
+
+
+def _fetch_matches(dsn: str, limit: int) -> list[MatchContext]:
+    query = """
+        SELECT
+            m.id,
+            m.mst_utc,
+            COALESCE(l.name, 'Unknown League') AS league,
+            COALESCE(ht.name, 'Home') AS home_team,
+            COALESCE(at.name, 'Away') AS away_team,
+            COALESCE(m.score_home, 0) AS score_home,
+            COALESCE(m.score_away, 0) AS score_away,
+            m.ht_score_home,
+            m.ht_score_away
+        FROM matches m
+        LEFT JOIN leagues l ON l.id = m.league_id
+        LEFT JOIN teams ht ON ht.id = m.home_team_id
+        LEFT JOIN teams at ON at.id = m.away_team_id
+        WHERE m.status = 'FT'
+          AND m.sport = 'football'
+          AND m.score_home IS NOT NULL
+          AND m.score_away IS NOT NULL
+          AND m.ht_score_home IS NOT NULL
+          AND m.ht_score_away IS NOT NULL
+        ORDER BY m.mst_utc DESC
+        LIMIT %s
+    """
+    with psycopg2.connect(dsn) as conn:
+        with conn.cursor(cursor_factory=RealDictCursor) as cur:
+            cur.execute(query, (limit,))
+            rows = cur.fetchall()
+    return [
+        MatchContext(
+            match_id=str(row["id"]),
+            match_date_ms=int(row["mst_utc"] or 0),
+            league=str(row["league"] or "Unknown League"),
+            home_team=str(row["home_team"] or "Home"),
+            away_team=str(row["away_team"] or "Away"),
+            final_home=int(row["score_home"] or 0),
+            final_away=int(row["score_away"] or 0),
+            ht_home=int(row["ht_score_home"]) if row.get("ht_score_home") is not None else None,
+            ht_away=int(row["ht_score_away"]) if row.get("ht_score_away") is not None else None,
+        )
+        for row in rows
+    ]
+
+
+def _safe_float(value: Any) -> float:
+    try:
+        return float(value)
+    except (TypeError, ValueError):
+        return 0.0
+
+
+def _outcome_symbol(home: int, away: int) -> str:
+    if home > away:
+        return "1"
+    if home < away:
+        return "2"
+    return "X"
+
+
+def _resolve_htft(pick: str, context: MatchContext) -> Dict[str, Any]:
+    if not pick or "/" not in str(pick):
+        return {"result": "UNRESOLVED", "won": None, "note": "htft_pick_invalid"}
+    actual = f"{_outcome_symbol(context.ht_home or 0, context.ht_away or 0)}/{_outcome_symbol(context.final_home, context.final_away)}"
+    won = str(pick).strip().upper() == actual
+    return {"result": "WON" if won else "LOST", "won": won, "note": f"actual={actual}"}
+
+
+def _market_odds(odds: Dict[str, Any], market: str, pick: str) -> float:
+    mapping = {
+        "HTFT": {
+            "1/1": "htft_11",
+            "1/X": "htft_1x",
+            "1/2": "htft_12",
+            "X/1": "htft_x1",
+            "X/X": "htft_xx",
+            "X/2": "htft_x2",
+            "2/1": "htft_21",
+            "2/X": "htft_2x",
+            "2/2": "htft_22",
+        },
+        "MS": {"1": "ms_h", "X": "ms_d", "2": "ms_a"},
+    }
+    key = mapping.get(market, {}).get(str(pick))
+    if not key:
+        return 0.0
+    value = _safe_float((odds or {}).get(key))
+    return value if value > 1.0 else 0.0
+
+
+def _evaluate_pick(
+    *,
+    strategy: str,
+    market: str,
+    pick: str,
+    odds: Any,
+    playable: bool,
+    confidence: Any,
+    extra: Optional[Dict[str, Any]],
+    context: MatchContext,
+) -> Dict[str, Any]:
+    odds_value = _safe_float(odds)
+    if market == "HT/FT":
+        market = "HTFT"
+    resolution = _resolve_htft(pick, context) if market == "HTFT" else {
+        "result": "UNRESOLVED",
+        "won": None,
+        "note": "non_htft_market",
+    }
+    counted = bool(playable and market == "HTFT" and odds_value > 1.01 and resolution["result"] in {"WON", "LOST"})
+    profit = 0.0
+    if counted:
+        profit = (odds_value - 1.0) if resolution["result"] == "WON" else -1.0
+    row = {
+        "strategy": strategy,
+        "market": market,
+        "pick": pick,
+        "odds": round(odds_value, 2),
+        "playable": playable,
+        "confidence": round(_safe_float(confidence), 1),
+        "result": resolution["result"],
+        "counted_in_roi": counted,
+        "profit_flat": round(profit, 4),
+        "resolution_note": resolution["note"],
+    }
+    if extra:
+        row.update(extra)
+    return row
+
+
+def _extract_strategy_rows(
+    *,
+    context: MatchContext,
+    odds_data: Dict[str, Any],
+    v25: Dict[str, Any],
+    v26: Dict[str, Any],
+) -> Dict[str, Optional[Dict[str, Any]]]:
+    strategies: Dict[str, Optional[Dict[str, Any]]] = {name: None for name in STRATEGIES}
+
+    v25_aggressive = v25.get("aggressive_pick") or {}
+    if v25_aggressive.get("pick"):
+        pick = str(v25_aggressive.get("pick"))
+        strategies["v25_aggressive"] = _evaluate_pick(
+            strategy="v25_aggressive",
+            market=str(v25_aggressive.get("market") or "HTFT"),
+            pick=pick,
+            odds=_market_odds(odds_data, "HTFT", pick),
+            playable=True,
+            confidence=v25_aggressive.get("confidence"),
+            extra={
+                "source": "v25.aggressive_pick",
+                "reversal_pick": pick,
+            },
+            context=context,
+        )
+
+    v26_surprise = v26.get("surprise_pick") or {}
+    v26_hunter = v26.get("surprise_hunter") or {}
+    if v26_surprise.get("pick"):
+        pick = str(v26_surprise.get("raw_pick") or v26_surprise.get("pick"))
+        strategies["v26_surprise"] = _evaluate_pick(
+            strategy="v26_surprise",
+            market=str(v26_surprise.get("market") or "HTFT"),
+            pick=pick,
+            odds=v26_surprise.get("odds") or _market_odds(odds_data, "HTFT", pick),
+            playable=bool(v26_surprise.get("playable")),
+            confidence=v26_surprise.get("calibrated_confidence", v26_surprise.get("confidence")),
+            extra={
+                "source": "v26.surprise_pick",
+                "surprise_score": round(_safe_float(v26_surprise.get("surprise_score")), 1),
+                "support_score": round(_safe_float(v26_surprise.get("support_score")), 1),
+                "reversal_pick": v26_hunter.get("reversal_pick"),
+                "reversal_prob": round(_safe_float(v26_hunter.get("reversal_prob")), 4),
+                "favorite_gap": round(_safe_float(v26_hunter.get("favorite_gap")), 3),
+                "favorite_odd": round(_safe_float(v26_hunter.get("favorite_odd")), 2),
+                "odds_band_score": round(_safe_float(v26_hunter.get("odds_band_score")), 3),
+                "odds_band_label": str(v26_hunter.get("odds_band_label") or ""),
+                "league_reversal_rate": round(_safe_float(v26_hunter.get("league_reversal_rate")), 4),
+                "league_strict_rev_rate": round(_safe_float(v26_hunter.get("league_strict_rev_rate")), 4),
+                "referee_strict_rev_rate": round(_safe_float(v26_hunter.get("referee_strict_rev_rate")), 4),
+                "reason_codes": ",".join(v26_hunter.get("reason_codes", [])),
+            },
+            context=context,
+        )
+
+    v26_aggressive = v26.get("aggressive_pick") or {}
+    if v26_aggressive.get("pick"):
+        pick = str(v26_aggressive.get("pick"))
+        strategies["v26_aggressive"] = _evaluate_pick(
+            strategy="v26_aggressive",
+            market=str(v26_aggressive.get("market") or "HTFT"),
+            pick=pick,
+            odds=v26_aggressive.get("odds") or _market_odds(odds_data, "HTFT", pick),
+            playable=True,
+            confidence=v26_aggressive.get("confidence"),
+            extra={
+                "source": "v26.aggressive_pick",
+                "reversal_pick": pick,
+            },
+            context=context,
+        )
+
+    v26_main = v26.get("main_pick") or {}
+    if str(v26_main.get("market") or "") == "HTFT" and v26_main.get("pick"):
+        pick = str(v26_main.get("raw_pick") or v26_main.get("pick"))
+        strategies["v26_main_htft"] = _evaluate_pick(
+            strategy="v26_main_htft",
+            market="HTFT",
+            pick=pick,
+            odds=v26_main.get("odds") or _market_odds(odds_data, "HTFT", pick),
+            playable=bool(v26_main.get("playable")),
+            confidence=v26_main.get("calibrated_confidence", v26_main.get("confidence")),
+            extra={
+                "source": "v26.main_pick",
+                "pick_reason": v26_main.get("pick_reason"),
+                "surprise_score": round(_safe_float(v26_main.get("surprise_score")), 1),
+            },
+            context=context,
+        )
+
+    return strategies
+
+
+def _summarize_bucket(bucket: Dict[str, float]) -> Dict[str, Any]:
+    played = int(bucket["played"])
+    won = int(bucket["won"])
+    lost = int(bucket["lost"])
+    candidate = int(bucket["candidate"])
+    profit = round(bucket["profit"], 4)
+    roi = round((profit / played) * 100.0, 2) if played else 0.0
+    hit = round((won / played) * 100.0, 2) if played else 0.0
+    return {
+        "candidates": candidate,
+        "played": played,
+        "won": won,
+        "lost": lost,
+        "profit_flat": profit,
+        "roi_flat_pct": roi,
+        "hit_rate_pct": hit,
+    }
+
+
+def _format_date(ms: int) -> str:
+    return datetime.fromtimestamp(ms / 1000, tz=timezone.utc).strftime("%Y-%m-%d")
+
+
+def _build_markdown(report: Dict[str, Any]) -> str:
+    lines: list[str] = []
+    lines.append("# HT/FT + Upset Backtest")
+    lines.append("")
+    lines.append(f"- Sample: last {report['sample_size']} finished football matches")
+    lines.append("- Scope: only HT/FT reversal and upset-oriented picks")
+    lines.append("- ROI: flat `1 unit` per played pick")
+    lines.append(f"- Generated at: {report['generated_at']}")
+    lines.append("")
+    lines.append("## Strategy Summary")
+    lines.append("")
+    lines.append("| Strategy | Candidates | Played | Won | Lost | Hit Rate | Profit | ROI |")
+    lines.append("|---|---:|---:|---:|---:|---:|---:|---:|")
+    for strategy in STRATEGIES:
+        payload = report["summary"]["strategies"][strategy]
+        lines.append(
+            f"| {strategy} | {payload['candidates']} | {payload['played']} | {payload['won']} | "
+            f"{payload['lost']} | {payload['hit_rate_pct']}% | {payload['profit_flat']:+.2f} | {payload['roi_flat_pct']:+.2f}% |"
+        )
+    lines.append("")
+    lines.append("## v26 Surprise By Reversal Type")
+    lines.append("")
+    lines.append("| Reversal | Candidates | Played | Won | Lost | Profit | ROI |")
+    lines.append("|---|---:|---:|---:|---:|---:|---:|")
+    for reversal, payload in report["summary"]["v26_surprise_by_pick"].items():
+        lines.append(
+            f"| {reversal} | {payload['candidates']} | {payload['played']} | {payload['won']} | "
+            f"{payload['lost']} | {payload['profit_flat']:+.2f} | {payload['roi_flat_pct']:+.2f}% |"
+        )
+    lines.append("")
+    lines.append("## Match Detail")
+    lines.append("")
+    lines.append("| Date | Match | HT | FT | v25 aggressive | v26 surprise | v26 aggressive | v26 main HTFT |")
+    lines.append("|---|---|---|---|---|---|---|---|")
+    for match in report["matches"]:
+        lines.append(
+            f"| {_format_date(match['match_date_ms'])} | {match['match_name']} | {match['ht_score']} | {match['final_score']} | "
+            f"{match['v25_aggressive']} | {match['v26_surprise']} | {match['v26_aggressive']} | {match['v26_main_htft']} |"
+        )
+    lines.append("")
+    return "\n".join(lines)
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="HT/FT + upset focused backtest.")
+    parser.add_argument("--limit", type=int, default=120, help="Number of finished matches to analyze.")
+    args = parser.parse_args()
+
+    dsn = _resolve_dsn()
+    orchestrator = SingleMatchOrchestrator()
+    matches = _fetch_matches(dsn, max(1, args.limit))
+
+    strategy_buckets: Dict[str, Dict[str, float]] = {name: defaultdict(float) for name in STRATEGIES}
+    v26_reversal_buckets: Dict[str, Dict[str, float]] = {label: defaultdict(float) for label in REVERSAL_LABELS}
+    report_matches: list[Dict[str, Any]] = []
+    csv_rows: list[Dict[str, Any]] = []
+
+    for context in matches:
+        data = orchestrator._load_match_data(context.match_id)  # noqa: SLF001
+        if data is None:
+            continue
+
+        orchestrator.engine_mode = "v25"
+        v25 = orchestrator.analyze_match(context.match_id) or {}
+        orchestrator.engine_mode = "v26"
+        v26 = orchestrator.analyze_match(context.match_id) or {}
+
+        extracted = _extract_strategy_rows(
+            context=context,
+            odds_data=data.odds_data or {},
+            v25=v25,
+            v26=v26,
+        )
+
+        match_row: Dict[str, Any] = {
+            "match_id": context.match_id,
+            "match_name": context.match_name,
+            "league": context.league,
+            "match_date_ms": context.match_date_ms,
+            "ht_score": context.ht_score,
+            "final_score": context.final_score,
+        }
+
+        for strategy, payload in extracted.items():
+            if payload:
+                strategy_buckets[strategy]["candidate"] += 1
+                if payload["counted_in_roi"]:
+                    strategy_buckets[strategy]["played"] += 1
+                    if payload["result"] == "WON":
+                        strategy_buckets[strategy]["won"] += 1
+                    else:
+                        strategy_buckets[strategy]["lost"] += 1
+                    strategy_buckets[strategy]["profit"] += payload["profit_flat"]
+
+                if strategy == "v26_surprise":
+                    reversal_label = str(payload.get("reversal_pick") or "")
+                    if reversal_label in v26_reversal_buckets:
+                        v26_reversal_buckets[reversal_label]["candidate"] += 1
+                        if payload["counted_in_roi"]:
+                            v26_reversal_buckets[reversal_label]["played"] += 1
+                            if payload["result"] == "WON":
+                                v26_reversal_buckets[reversal_label]["won"] += 1
+                            else:
+                                v26_reversal_buckets[reversal_label]["lost"] += 1
+                            v26_reversal_buckets[reversal_label]["profit"] += payload["profit_flat"]
+
+                summary = (
+                    f"{payload['pick']} ({payload['result']}, {'played' if payload['counted_in_roi'] else 'not played'}, {payload['profit_flat']:+.2f})"
+                )
+                match_row[strategy] = summary
+
+                csv_rows.append(
+                    {
+                        "match_id": context.match_id,
+                        "date": _format_date(context.match_date_ms),
+                        "league": context.league,
+                        "match": context.match_name,
+                        "ht_score": context.ht_score,
+                        "final_score": context.final_score,
+                        **payload,
+                    }
+                )
+            else:
+                match_row[strategy] = "-"
+
+        report_matches.append(match_row)
+
+    report = {
+        "generated_at": datetime.now(timezone.utc).isoformat(),
+        "sample_size": len(report_matches),
+        "summary": {
+            "strategies": {
+                strategy: _summarize_bucket(bucket)
+                for strategy, bucket in strategy_buckets.items()
+            },
+            "v26_surprise_by_pick": {
+                label: _summarize_bucket(bucket)
+                for label, bucket in v26_reversal_buckets.items()
+            },
+        },
+        "matches": report_matches,
+    }
+
+    report_dir = AI_ENGINE_DIR / "reports"
+    json_path = report_dir / "backtest_v26_shadow_htft_upset.json"
+    csv_path = report_dir / "backtest_v26_shadow_htft_upset.csv"
+    md_path = report_dir / "backtest_v26_shadow_htft_upset.md"
+
+    json_path.write_text(json.dumps(report, indent=2, ensure_ascii=False), encoding="utf-8")
+    with csv_path.open("w", encoding="utf-8", newline="") as handle:
+        writer = csv.DictWriter(
+            handle,
+            fieldnames=[
+                "match_id",
+                "date",
+                "league",
+                "match",
+                "ht_score",
+                "final_score",
+                "strategy",
+                "market",
+                "pick",
+                "odds",
+                "playable",
+                "confidence",
+                "result",
+                "counted_in_roi",
+                "profit_flat",
+                "resolution_note",
+                "source",
+                "reversal_pick",
+                "reversal_prob",
+                "favorite_gap",
+                "favorite_odd",
+                "support_score",
+                "odds_band_score",
+                "odds_band_label",
+                "league_reversal_rate",
+                "league_strict_rev_rate",
+                "referee_strict_rev_rate",
+                "surprise_score",
+                "reason_codes",
+                "pick_reason",
+            ],
+        )
+        writer.writeheader()
+        writer.writerows(csv_rows)
+    md_path.write_text(_build_markdown(report), encoding="utf-8")
+
+    print(f"[OK] JSON report written to {json_path}")
+    print(f"[OK] CSV report written to {csv_path}")
+    print(f"[OK] Markdown report written to {md_path}")
+
+
+if __name__ == "__main__":
+    main()