iddaai-be/ai-engine/scripts/backtest_v26_shadow_htft_upset.py

from __future__ import annotations

import argparse
import csv
import json
import sys
from collections import defaultdict
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, Optional

import psycopg2
from psycopg2.extras import RealDictCursor


AI_ENGINE_DIR = Path(__file__).resolve().parents[1]
if str(AI_ENGINE_DIR) not in sys.path:
    sys.path.insert(0, str(AI_ENGINE_DIR))

from services.single_match_orchestrator import SingleMatchOrchestrator


STRATEGIES = ("v25_aggressive", "v26_surprise", "v26_aggressive", "v26_main_htft")
REVERSAL_LABELS = ("1/2", "2/1", "X/1", "X/2")


@dataclass
class MatchContext:
    match_id: str
    match_date_ms: int
    league: str
    home_team: str
    away_team: str
    final_home: int
    final_away: int
    ht_home: Optional[int]
    ht_away: Optional[int]

    @property
    def match_name(self) -> str:
        return f"{self.home_team} vs {self.away_team}"

    @property
    def final_score(self) -> str:
        return f"{self.final_home}-{self.final_away}"

    @property
    def ht_score(self) -> str:
        if self.ht_home is None or self.ht_away is None:
            return "-"
        return f"{self.ht_home}-{self.ht_away}"


def _resolve_dsn() -> str:
    env_path = AI_ENGINE_DIR / ".env"
    if env_path.exists():
        for line in env_path.read_text(encoding="utf-8").splitlines():
            if line.startswith("DATABASE_URL="):
                return line.split("=", 1)[1].strip().split("?schema=")[0]
    raise SystemExit("DATABASE_URL not found in ai-engine/.env")


def _fetch_matches(dsn: str, limit: int) -> list[MatchContext]:
    query = """
        SELECT
            m.id,
            m.mst_utc,
            COALESCE(l.name, 'Unknown League') AS league,
            COALESCE(ht.name, 'Home') AS home_team,
            COALESCE(at.name, 'Away') AS away_team,
            COALESCE(m.score_home, 0) AS score_home,
            COALESCE(m.score_away, 0) AS score_away,
            m.ht_score_home,
            m.ht_score_away
        FROM matches m
        LEFT JOIN leagues l ON l.id = m.league_id
        LEFT JOIN teams ht ON ht.id = m.home_team_id
        LEFT JOIN teams at ON at.id = m.away_team_id
        WHERE m.status = 'FT'
          AND m.sport = 'football'
          AND m.score_home IS NOT NULL
          AND m.score_away IS NOT NULL
          AND m.ht_score_home IS NOT NULL
          AND m.ht_score_away IS NOT NULL
        ORDER BY m.mst_utc DESC
        LIMIT %s
    """
    with psycopg2.connect(dsn) as conn:
        with conn.cursor(cursor_factory=RealDictCursor) as cur:
            cur.execute(query, (limit,))
            rows = cur.fetchall()
    return [
        MatchContext(
            match_id=str(row["id"]),
            match_date_ms=int(row["mst_utc"] or 0),
            league=str(row["league"] or "Unknown League"),
            home_team=str(row["home_team"] or "Home"),
            away_team=str(row["away_team"] or "Away"),
            final_home=int(row["score_home"] or 0),
            final_away=int(row["score_away"] or 0),
            ht_home=int(row["ht_score_home"]) if row.get("ht_score_home") is not None else None,
            ht_away=int(row["ht_score_away"]) if row.get("ht_score_away") is not None else None,
        )
        for row in rows
    ]


def _safe_float(value: Any) -> float:
    try:
        return float(value)
    except (TypeError, ValueError):
        return 0.0


def _outcome_symbol(home: int, away: int) -> str:
    if home > away:
        return "1"
    if home < away:
        return "2"
    return "X"


def _resolve_htft(pick: str, context: MatchContext) -> Dict[str, Any]:
    if not pick or "/" not in str(pick):
        return {"result": "UNRESOLVED", "won": None, "note": "htft_pick_invalid"}
    actual = f"{_outcome_symbol(context.ht_home or 0, context.ht_away or 0)}/{_outcome_symbol(context.final_home, context.final_away)}"
    won = str(pick).strip().upper() == actual
    return {"result": "WON" if won else "LOST", "won": won, "note": f"actual={actual}"}


def _market_odds(odds: Dict[str, Any], market: str, pick: str) -> float:
    mapping = {
        "HTFT": {
            "1/1": "htft_11",
            "1/X": "htft_1x",
            "1/2": "htft_12",
            "X/1": "htft_x1",
            "X/X": "htft_xx",
            "X/2": "htft_x2",
            "2/1": "htft_21",
            "2/X": "htft_2x",
            "2/2": "htft_22",
        },
        "MS": {"1": "ms_h", "X": "ms_d", "2": "ms_a"},
    }
    key = mapping.get(market, {}).get(str(pick))
    if not key:
        return 0.0
    value = _safe_float((odds or {}).get(key))
    return value if value > 1.0 else 0.0


def _evaluate_pick(
    *,
    strategy: str,
    market: str,
    pick: str,
    odds: Any,
    playable: bool,
    confidence: Any,
    extra: Optional[Dict[str, Any]],
    context: MatchContext,
) -> Dict[str, Any]:
    odds_value = _safe_float(odds)
    if market == "HT/FT":
        market = "HTFT"
    resolution = _resolve_htft(pick, context) if market == "HTFT" else {
        "result": "UNRESOLVED",
        "won": None,
        "note": "non_htft_market",
    }
    counted = bool(playable and market == "HTFT" and odds_value > 1.01 and resolution["result"] in {"WON", "LOST"})
    profit = 0.0
    if counted:
        profit = (odds_value - 1.0) if resolution["result"] == "WON" else -1.0
    row = {
        "strategy": strategy,
        "market": market,
        "pick": pick,
        "odds": round(odds_value, 2),
        "playable": playable,
        "confidence": round(_safe_float(confidence), 1),
        "result": resolution["result"],
        "counted_in_roi": counted,
        "profit_flat": round(profit, 4),
        "resolution_note": resolution["note"],
    }
    if extra:
        row.update(extra)
    return row


def _extract_strategy_rows(
    *,
    context: MatchContext,
    odds_data: Dict[str, Any],
    v25: Dict[str, Any],
    v26: Dict[str, Any],
) -> Dict[str, Optional[Dict[str, Any]]]:
    strategies: Dict[str, Optional[Dict[str, Any]]] = {name: None for name in STRATEGIES}

    v25_aggressive = v25.get("aggressive_pick") or {}
    if v25_aggressive.get("pick"):
        pick = str(v25_aggressive.get("pick"))
        strategies["v25_aggressive"] = _evaluate_pick(
            strategy="v25_aggressive",
            market=str(v25_aggressive.get("market") or "HTFT"),
            pick=pick,
            odds=_market_odds(odds_data, "HTFT", pick),
            playable=True,
            confidence=v25_aggressive.get("confidence"),
            extra={
                "source": "v25.aggressive_pick",
                "reversal_pick": pick,
            },
            context=context,
        )

    v26_surprise = v26.get("surprise_pick") or {}
    v26_hunter = v26.get("surprise_hunter") or {}
    if v26_surprise.get("pick"):
        pick = str(v26_surprise.get("raw_pick") or v26_surprise.get("pick"))
        strategies["v26_surprise"] = _evaluate_pick(
            strategy="v26_surprise",
            market=str(v26_surprise.get("market") or "HTFT"),
            pick=pick,
            odds=v26_surprise.get("odds") or _market_odds(odds_data, "HTFT", pick),
            playable=bool(v26_surprise.get("playable")),
            confidence=v26_surprise.get("calibrated_confidence", v26_surprise.get("confidence")),
            extra={
                "source": "v26.surprise_pick",
                "surprise_score": round(_safe_float(v26_surprise.get("surprise_score")), 1),
                "support_score": round(_safe_float(v26_surprise.get("support_score")), 1),
                "reversal_pick": v26_hunter.get("reversal_pick"),
                "reversal_prob": round(_safe_float(v26_hunter.get("reversal_prob")), 4),
                "favorite_gap": round(_safe_float(v26_hunter.get("favorite_gap")), 3),
                "favorite_odd": round(_safe_float(v26_hunter.get("favorite_odd")), 2),
                "odds_band_score": round(_safe_float(v26_hunter.get("odds_band_score")), 3),
                "odds_band_label": str(v26_hunter.get("odds_band_label") or ""),
                "league_reversal_rate": round(_safe_float(v26_hunter.get("league_reversal_rate")), 4),
                "league_strict_rev_rate": round(_safe_float(v26_hunter.get("league_strict_rev_rate")), 4),
                "referee_strict_rev_rate": round(_safe_float(v26_hunter.get("referee_strict_rev_rate")), 4),
                "reason_codes": ",".join(v26_hunter.get("reason_codes", [])),
            },
            context=context,
        )

    v26_aggressive = v26.get("aggressive_pick") or {}
    if v26_aggressive.get("pick"):
        pick = str(v26_aggressive.get("pick"))
        strategies["v26_aggressive"] = _evaluate_pick(
            strategy="v26_aggressive",
            market=str(v26_aggressive.get("market") or "HTFT"),
            pick=pick,
            odds=v26_aggressive.get("odds") or _market_odds(odds_data, "HTFT", pick),
            playable=True,
            confidence=v26_aggressive.get("confidence"),
            extra={
                "source": "v26.aggressive_pick",
                "reversal_pick": pick,
            },
            context=context,
        )

    v26_main = v26.get("main_pick") or {}
    if str(v26_main.get("market") or "") == "HTFT" and v26_main.get("pick"):
        pick = str(v26_main.get("raw_pick") or v26_main.get("pick"))
        strategies["v26_main_htft"] = _evaluate_pick(
            strategy="v26_main_htft",
            market="HTFT",
            pick=pick,
            odds=v26_main.get("odds") or _market_odds(odds_data, "HTFT", pick),
            playable=bool(v26_main.get("playable")),
            confidence=v26_main.get("calibrated_confidence", v26_main.get("confidence")),
            extra={
                "source": "v26.main_pick",
                "pick_reason": v26_main.get("pick_reason"),
                "surprise_score": round(_safe_float(v26_main.get("surprise_score")), 1),
            },
            context=context,
        )

    return strategies


def _summarize_bucket(bucket: Dict[str, float]) -> Dict[str, Any]:
    played = int(bucket["played"])
    won = int(bucket["won"])
    lost = int(bucket["lost"])
    candidate = int(bucket["candidate"])
    profit = round(bucket["profit"], 4)
    roi = round((profit / played) * 100.0, 2) if played else 0.0
    hit = round((won / played) * 100.0, 2) if played else 0.0
    return {
        "candidates": candidate,
        "played": played,
        "won": won,
        "lost": lost,
        "profit_flat": profit,
        "roi_flat_pct": roi,
        "hit_rate_pct": hit,
    }


def _format_date(ms: int) -> str:
    return datetime.fromtimestamp(ms / 1000, tz=timezone.utc).strftime("%Y-%m-%d")


def _build_markdown(report: Dict[str, Any]) -> str:
    lines: list[str] = []
    lines.append("# HT/FT + Upset Backtest")
    lines.append("")
    lines.append(f"- Sample: last {report['sample_size']} finished football matches")
    lines.append("- Scope: only HT/FT reversal and upset-oriented picks")
    lines.append("- ROI: flat `1 unit` per played pick")
    lines.append(f"- Generated at: {report['generated_at']}")
    lines.append("")
    lines.append("## Strategy Summary")
    lines.append("")
    lines.append("| Strategy | Candidates | Played | Won | Lost | Hit Rate | Profit | ROI |")
    lines.append("|---|---:|---:|---:|---:|---:|---:|---:|")
    for strategy in STRATEGIES:
        payload = report["summary"]["strategies"][strategy]
        lines.append(
            f"| {strategy} | {payload['candidates']} | {payload['played']} | {payload['won']} | "
            f"{payload['lost']} | {payload['hit_rate_pct']}% | {payload['profit_flat']:+.2f} | {payload['roi_flat_pct']:+.2f}% |"
        )
    lines.append("")
    lines.append("## v26 Surprise By Reversal Type")
    lines.append("")
    lines.append("| Reversal | Candidates | Played | Won | Lost | Profit | ROI |")
    lines.append("|---|---:|---:|---:|---:|---:|---:|")
    for reversal, payload in report["summary"]["v26_surprise_by_pick"].items():
        lines.append(
            f"| {reversal} | {payload['candidates']} | {payload['played']} | {payload['won']} | "
            f"{payload['lost']} | {payload['profit_flat']:+.2f} | {payload['roi_flat_pct']:+.2f}% |"
        )
    lines.append("")
    lines.append("## Match Detail")
    lines.append("")
    lines.append("| Date | Match | HT | FT | v25 aggressive | v26 surprise | v26 aggressive | v26 main HTFT |")
    lines.append("|---|---|---|---|---|---|---|---|")
    for match in report["matches"]:
        lines.append(
            f"| {_format_date(match['match_date_ms'])} | {match['match_name']} | {match['ht_score']} | {match['final_score']} | "
            f"{match['v25_aggressive']} | {match['v26_surprise']} | {match['v26_aggressive']} | {match['v26_main_htft']} |"
        )
    lines.append("")
    return "\n".join(lines)


def main() -> None:
    parser = argparse.ArgumentParser(description="HT/FT + upset focused backtest.")
    parser.add_argument("--limit", type=int, default=120, help="Number of finished matches to analyze.")
    args = parser.parse_args()

    dsn = _resolve_dsn()
    orchestrator = SingleMatchOrchestrator()
    matches = _fetch_matches(dsn, max(1, args.limit))

    strategy_buckets: Dict[str, Dict[str, float]] = {name: defaultdict(float) for name in STRATEGIES}
    v26_reversal_buckets: Dict[str, Dict[str, float]] = {label: defaultdict(float) for label in REVERSAL_LABELS}
    report_matches: list[Dict[str, Any]] = []
    csv_rows: list[Dict[str, Any]] = []

    for context in matches:
        data = orchestrator._load_match_data(context.match_id)  # noqa: SLF001
        if data is None:
            continue

        orchestrator.engine_mode = "v25"
        v25 = orchestrator.analyze_match(context.match_id) or {}
        orchestrator.engine_mode = "v26"
        v26 = orchestrator.analyze_match(context.match_id) or {}

        extracted = _extract_strategy_rows(
            context=context,
            odds_data=data.odds_data or {},
            v25=v25,
            v26=v26,
        )

        match_row: Dict[str, Any] = {
            "match_id": context.match_id,
            "match_name": context.match_name,
            "league": context.league,
            "match_date_ms": context.match_date_ms,
            "ht_score": context.ht_score,
            "final_score": context.final_score,
        }

        for strategy, payload in extracted.items():
            if payload:
                strategy_buckets[strategy]["candidate"] += 1
                if payload["counted_in_roi"]:
                    strategy_buckets[strategy]["played"] += 1
                    if payload["result"] == "WON":
                        strategy_buckets[strategy]["won"] += 1
                    else:
                        strategy_buckets[strategy]["lost"] += 1
                    strategy_buckets[strategy]["profit"] += payload["profit_flat"]

                if strategy == "v26_surprise":
                    reversal_label = str(payload.get("reversal_pick") or "")
                    if reversal_label in v26_reversal_buckets:
                        v26_reversal_buckets[reversal_label]["candidate"] += 1
                        if payload["counted_in_roi"]:
                            v26_reversal_buckets[reversal_label]["played"] += 1
                            if payload["result"] == "WON":
                                v26_reversal_buckets[reversal_label]["won"] += 1
                            else:
                                v26_reversal_buckets[reversal_label]["lost"] += 1
                            v26_reversal_buckets[reversal_label]["profit"] += payload["profit_flat"]

                summary = (
                    f"{payload['pick']} ({payload['result']}, {'played' if payload['counted_in_roi'] else 'not played'}, {payload['profit_flat']:+.2f})"
                )
                match_row[strategy] = summary

                csv_rows.append(
                    {
                        "match_id": context.match_id,
                        "date": _format_date(context.match_date_ms),
                        "league": context.league,
                        "match": context.match_name,
                        "ht_score": context.ht_score,
                        "final_score": context.final_score,
                        **payload,
                    }
                )
            else:
                match_row[strategy] = "-"

        report_matches.append(match_row)

    report = {
        "generated_at": datetime.now(timezone.utc).isoformat(),
        "sample_size": len(report_matches),
        "summary": {
            "strategies": {
                strategy: _summarize_bucket(bucket)
                for strategy, bucket in strategy_buckets.items()
            },
            "v26_surprise_by_pick": {
                label: _summarize_bucket(bucket)
                for label, bucket in v26_reversal_buckets.items()
            },
        },
        "matches": report_matches,
    }

    report_dir = AI_ENGINE_DIR / "reports"
    json_path = report_dir / "backtest_v26_shadow_htft_upset.json"
    csv_path = report_dir / "backtest_v26_shadow_htft_upset.csv"
    md_path = report_dir / "backtest_v26_shadow_htft_upset.md"

    json_path.write_text(json.dumps(report, indent=2, ensure_ascii=False), encoding="utf-8")
    with csv_path.open("w", encoding="utf-8", newline="") as handle:
        writer = csv.DictWriter(
            handle,
            fieldnames=[
                "match_id",
                "date",
                "league",
                "match",
                "ht_score",
                "final_score",
                "strategy",
                "market",
                "pick",
                "odds",
                "playable",
                "confidence",
                "result",
                "counted_in_roi",
                "profit_flat",
                "resolution_note",
                "source",
                "reversal_pick",
                "reversal_prob",
                "favorite_gap",
                "favorite_odd",
                "support_score",
                "odds_band_score",
                "odds_band_label",
                "league_reversal_rate",
                "league_strict_rev_rate",
                "referee_strict_rev_rate",
                "surprise_score",
                "reason_codes",
                "pick_reason",
            ],
        )
        writer.writeheader()
        writer.writerows(csv_rows)
    md_path.write_text(_build_markdown(report), encoding="utf-8")

    print(f"[OK] JSON report written to {json_path}")
    print(f"[OK] CSV report written to {csv_path}")
    print(f"[OK] Markdown report written to {md_path}")


if __name__ == "__main__":
    main()