This commit is contained in:
2026-04-21 16:53:56 +03:00
parent 1346924387
commit 2ccd6831eb
26 changed files with 430403 additions and 3 deletions
@@ -0,0 +1,505 @@
from __future__ import annotations
import argparse
import csv
import json
import sys
from collections import defaultdict
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, Optional
import psycopg2
from psycopg2.extras import RealDictCursor
AI_ENGINE_DIR = Path(__file__).resolve().parents[1]
if str(AI_ENGINE_DIR) not in sys.path:
sys.path.insert(0, str(AI_ENGINE_DIR))
from services.single_match_orchestrator import SingleMatchOrchestrator
STRATEGIES = ("v25_aggressive", "v26_surprise", "v26_aggressive", "v26_main_htft")
REVERSAL_LABELS = ("1/2", "2/1", "X/1", "X/2")
@dataclass
class MatchContext:
match_id: str
match_date_ms: int
league: str
home_team: str
away_team: str
final_home: int
final_away: int
ht_home: Optional[int]
ht_away: Optional[int]
@property
def match_name(self) -> str:
return f"{self.home_team} vs {self.away_team}"
@property
def final_score(self) -> str:
return f"{self.final_home}-{self.final_away}"
@property
def ht_score(self) -> str:
if self.ht_home is None or self.ht_away is None:
return "-"
return f"{self.ht_home}-{self.ht_away}"
def _resolve_dsn() -> str:
env_path = AI_ENGINE_DIR / ".env"
if env_path.exists():
for line in env_path.read_text(encoding="utf-8").splitlines():
if line.startswith("DATABASE_URL="):
return line.split("=", 1)[1].strip().split("?schema=")[0]
raise SystemExit("DATABASE_URL not found in ai-engine/.env")
def _fetch_matches(dsn: str, limit: int) -> list[MatchContext]:
query = """
SELECT
m.id,
m.mst_utc,
COALESCE(l.name, 'Unknown League') AS league,
COALESCE(ht.name, 'Home') AS home_team,
COALESCE(at.name, 'Away') AS away_team,
COALESCE(m.score_home, 0) AS score_home,
COALESCE(m.score_away, 0) AS score_away,
m.ht_score_home,
m.ht_score_away
FROM matches m
LEFT JOIN leagues l ON l.id = m.league_id
LEFT JOIN teams ht ON ht.id = m.home_team_id
LEFT JOIN teams at ON at.id = m.away_team_id
WHERE m.status = 'FT'
AND m.sport = 'football'
AND m.score_home IS NOT NULL
AND m.score_away IS NOT NULL
AND m.ht_score_home IS NOT NULL
AND m.ht_score_away IS NOT NULL
ORDER BY m.mst_utc DESC
LIMIT %s
"""
with psycopg2.connect(dsn) as conn:
with conn.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute(query, (limit,))
rows = cur.fetchall()
return [
MatchContext(
match_id=str(row["id"]),
match_date_ms=int(row["mst_utc"] or 0),
league=str(row["league"] or "Unknown League"),
home_team=str(row["home_team"] or "Home"),
away_team=str(row["away_team"] or "Away"),
final_home=int(row["score_home"] or 0),
final_away=int(row["score_away"] or 0),
ht_home=int(row["ht_score_home"]) if row.get("ht_score_home") is not None else None,
ht_away=int(row["ht_score_away"]) if row.get("ht_score_away") is not None else None,
)
for row in rows
]
def _safe_float(value: Any) -> float:
try:
return float(value)
except (TypeError, ValueError):
return 0.0
def _outcome_symbol(home: int, away: int) -> str:
if home > away:
return "1"
if home < away:
return "2"
return "X"
def _resolve_htft(pick: str, context: MatchContext) -> Dict[str, Any]:
if not pick or "/" not in str(pick):
return {"result": "UNRESOLVED", "won": None, "note": "htft_pick_invalid"}
actual = f"{_outcome_symbol(context.ht_home or 0, context.ht_away or 0)}/{_outcome_symbol(context.final_home, context.final_away)}"
won = str(pick).strip().upper() == actual
return {"result": "WON" if won else "LOST", "won": won, "note": f"actual={actual}"}
def _market_odds(odds: Dict[str, Any], market: str, pick: str) -> float:
mapping = {
"HTFT": {
"1/1": "htft_11",
"1/X": "htft_1x",
"1/2": "htft_12",
"X/1": "htft_x1",
"X/X": "htft_xx",
"X/2": "htft_x2",
"2/1": "htft_21",
"2/X": "htft_2x",
"2/2": "htft_22",
},
"MS": {"1": "ms_h", "X": "ms_d", "2": "ms_a"},
}
key = mapping.get(market, {}).get(str(pick))
if not key:
return 0.0
value = _safe_float((odds or {}).get(key))
return value if value > 1.0 else 0.0
def _evaluate_pick(
*,
strategy: str,
market: str,
pick: str,
odds: Any,
playable: bool,
confidence: Any,
extra: Optional[Dict[str, Any]],
context: MatchContext,
) -> Dict[str, Any]:
odds_value = _safe_float(odds)
if market == "HT/FT":
market = "HTFT"
resolution = _resolve_htft(pick, context) if market == "HTFT" else {
"result": "UNRESOLVED",
"won": None,
"note": "non_htft_market",
}
counted = bool(playable and market == "HTFT" and odds_value > 1.01 and resolution["result"] in {"WON", "LOST"})
profit = 0.0
if counted:
profit = (odds_value - 1.0) if resolution["result"] == "WON" else -1.0
row = {
"strategy": strategy,
"market": market,
"pick": pick,
"odds": round(odds_value, 2),
"playable": playable,
"confidence": round(_safe_float(confidence), 1),
"result": resolution["result"],
"counted_in_roi": counted,
"profit_flat": round(profit, 4),
"resolution_note": resolution["note"],
}
if extra:
row.update(extra)
return row
def _extract_strategy_rows(
*,
context: MatchContext,
odds_data: Dict[str, Any],
v25: Dict[str, Any],
v26: Dict[str, Any],
) -> Dict[str, Optional[Dict[str, Any]]]:
strategies: Dict[str, Optional[Dict[str, Any]]] = {name: None for name in STRATEGIES}
v25_aggressive = v25.get("aggressive_pick") or {}
if v25_aggressive.get("pick"):
pick = str(v25_aggressive.get("pick"))
strategies["v25_aggressive"] = _evaluate_pick(
strategy="v25_aggressive",
market=str(v25_aggressive.get("market") or "HTFT"),
pick=pick,
odds=_market_odds(odds_data, "HTFT", pick),
playable=True,
confidence=v25_aggressive.get("confidence"),
extra={
"source": "v25.aggressive_pick",
"reversal_pick": pick,
},
context=context,
)
v26_surprise = v26.get("surprise_pick") or {}
v26_hunter = v26.get("surprise_hunter") or {}
if v26_surprise.get("pick"):
pick = str(v26_surprise.get("raw_pick") or v26_surprise.get("pick"))
strategies["v26_surprise"] = _evaluate_pick(
strategy="v26_surprise",
market=str(v26_surprise.get("market") or "HTFT"),
pick=pick,
odds=v26_surprise.get("odds") or _market_odds(odds_data, "HTFT", pick),
playable=bool(v26_surprise.get("playable")),
confidence=v26_surprise.get("calibrated_confidence", v26_surprise.get("confidence")),
extra={
"source": "v26.surprise_pick",
"surprise_score": round(_safe_float(v26_surprise.get("surprise_score")), 1),
"support_score": round(_safe_float(v26_surprise.get("support_score")), 1),
"reversal_pick": v26_hunter.get("reversal_pick"),
"reversal_prob": round(_safe_float(v26_hunter.get("reversal_prob")), 4),
"favorite_gap": round(_safe_float(v26_hunter.get("favorite_gap")), 3),
"favorite_odd": round(_safe_float(v26_hunter.get("favorite_odd")), 2),
"odds_band_score": round(_safe_float(v26_hunter.get("odds_band_score")), 3),
"odds_band_label": str(v26_hunter.get("odds_band_label") or ""),
"league_reversal_rate": round(_safe_float(v26_hunter.get("league_reversal_rate")), 4),
"league_strict_rev_rate": round(_safe_float(v26_hunter.get("league_strict_rev_rate")), 4),
"referee_strict_rev_rate": round(_safe_float(v26_hunter.get("referee_strict_rev_rate")), 4),
"reason_codes": ",".join(v26_hunter.get("reason_codes", [])),
},
context=context,
)
v26_aggressive = v26.get("aggressive_pick") or {}
if v26_aggressive.get("pick"):
pick = str(v26_aggressive.get("pick"))
strategies["v26_aggressive"] = _evaluate_pick(
strategy="v26_aggressive",
market=str(v26_aggressive.get("market") or "HTFT"),
pick=pick,
odds=v26_aggressive.get("odds") or _market_odds(odds_data, "HTFT", pick),
playable=True,
confidence=v26_aggressive.get("confidence"),
extra={
"source": "v26.aggressive_pick",
"reversal_pick": pick,
},
context=context,
)
v26_main = v26.get("main_pick") or {}
if str(v26_main.get("market") or "") == "HTFT" and v26_main.get("pick"):
pick = str(v26_main.get("raw_pick") or v26_main.get("pick"))
strategies["v26_main_htft"] = _evaluate_pick(
strategy="v26_main_htft",
market="HTFT",
pick=pick,
odds=v26_main.get("odds") or _market_odds(odds_data, "HTFT", pick),
playable=bool(v26_main.get("playable")),
confidence=v26_main.get("calibrated_confidence", v26_main.get("confidence")),
extra={
"source": "v26.main_pick",
"pick_reason": v26_main.get("pick_reason"),
"surprise_score": round(_safe_float(v26_main.get("surprise_score")), 1),
},
context=context,
)
return strategies
def _summarize_bucket(bucket: Dict[str, float]) -> Dict[str, Any]:
played = int(bucket["played"])
won = int(bucket["won"])
lost = int(bucket["lost"])
candidate = int(bucket["candidate"])
profit = round(bucket["profit"], 4)
roi = round((profit / played) * 100.0, 2) if played else 0.0
hit = round((won / played) * 100.0, 2) if played else 0.0
return {
"candidates": candidate,
"played": played,
"won": won,
"lost": lost,
"profit_flat": profit,
"roi_flat_pct": roi,
"hit_rate_pct": hit,
}
def _format_date(ms: int) -> str:
return datetime.fromtimestamp(ms / 1000, tz=timezone.utc).strftime("%Y-%m-%d")
def _build_markdown(report: Dict[str, Any]) -> str:
lines: list[str] = []
lines.append("# HT/FT + Upset Backtest")
lines.append("")
lines.append(f"- Sample: last {report['sample_size']} finished football matches")
lines.append("- Scope: only HT/FT reversal and upset-oriented picks")
lines.append("- ROI: flat `1 unit` per played pick")
lines.append(f"- Generated at: {report['generated_at']}")
lines.append("")
lines.append("## Strategy Summary")
lines.append("")
lines.append("| Strategy | Candidates | Played | Won | Lost | Hit Rate | Profit | ROI |")
lines.append("|---|---:|---:|---:|---:|---:|---:|---:|")
for strategy in STRATEGIES:
payload = report["summary"]["strategies"][strategy]
lines.append(
f"| {strategy} | {payload['candidates']} | {payload['played']} | {payload['won']} | "
f"{payload['lost']} | {payload['hit_rate_pct']}% | {payload['profit_flat']:+.2f} | {payload['roi_flat_pct']:+.2f}% |"
)
lines.append("")
lines.append("## v26 Surprise By Reversal Type")
lines.append("")
lines.append("| Reversal | Candidates | Played | Won | Lost | Profit | ROI |")
lines.append("|---|---:|---:|---:|---:|---:|---:|")
for reversal, payload in report["summary"]["v26_surprise_by_pick"].items():
lines.append(
f"| {reversal} | {payload['candidates']} | {payload['played']} | {payload['won']} | "
f"{payload['lost']} | {payload['profit_flat']:+.2f} | {payload['roi_flat_pct']:+.2f}% |"
)
lines.append("")
lines.append("## Match Detail")
lines.append("")
lines.append("| Date | Match | HT | FT | v25 aggressive | v26 surprise | v26 aggressive | v26 main HTFT |")
lines.append("|---|---|---|---|---|---|---|---|")
for match in report["matches"]:
lines.append(
f"| {_format_date(match['match_date_ms'])} | {match['match_name']} | {match['ht_score']} | {match['final_score']} | "
f"{match['v25_aggressive']} | {match['v26_surprise']} | {match['v26_aggressive']} | {match['v26_main_htft']} |"
)
lines.append("")
return "\n".join(lines)
def main() -> None:
parser = argparse.ArgumentParser(description="HT/FT + upset focused backtest.")
parser.add_argument("--limit", type=int, default=120, help="Number of finished matches to analyze.")
args = parser.parse_args()
dsn = _resolve_dsn()
orchestrator = SingleMatchOrchestrator()
matches = _fetch_matches(dsn, max(1, args.limit))
strategy_buckets: Dict[str, Dict[str, float]] = {name: defaultdict(float) for name in STRATEGIES}
v26_reversal_buckets: Dict[str, Dict[str, float]] = {label: defaultdict(float) for label in REVERSAL_LABELS}
report_matches: list[Dict[str, Any]] = []
csv_rows: list[Dict[str, Any]] = []
for context in matches:
data = orchestrator._load_match_data(context.match_id) # noqa: SLF001
if data is None:
continue
orchestrator.engine_mode = "v25"
v25 = orchestrator.analyze_match(context.match_id) or {}
orchestrator.engine_mode = "v26"
v26 = orchestrator.analyze_match(context.match_id) or {}
extracted = _extract_strategy_rows(
context=context,
odds_data=data.odds_data or {},
v25=v25,
v26=v26,
)
match_row: Dict[str, Any] = {
"match_id": context.match_id,
"match_name": context.match_name,
"league": context.league,
"match_date_ms": context.match_date_ms,
"ht_score": context.ht_score,
"final_score": context.final_score,
}
for strategy, payload in extracted.items():
if payload:
strategy_buckets[strategy]["candidate"] += 1
if payload["counted_in_roi"]:
strategy_buckets[strategy]["played"] += 1
if payload["result"] == "WON":
strategy_buckets[strategy]["won"] += 1
else:
strategy_buckets[strategy]["lost"] += 1
strategy_buckets[strategy]["profit"] += payload["profit_flat"]
if strategy == "v26_surprise":
reversal_label = str(payload.get("reversal_pick") or "")
if reversal_label in v26_reversal_buckets:
v26_reversal_buckets[reversal_label]["candidate"] += 1
if payload["counted_in_roi"]:
v26_reversal_buckets[reversal_label]["played"] += 1
if payload["result"] == "WON":
v26_reversal_buckets[reversal_label]["won"] += 1
else:
v26_reversal_buckets[reversal_label]["lost"] += 1
v26_reversal_buckets[reversal_label]["profit"] += payload["profit_flat"]
summary = (
f"{payload['pick']} ({payload['result']}, {'played' if payload['counted_in_roi'] else 'not played'}, {payload['profit_flat']:+.2f})"
)
match_row[strategy] = summary
csv_rows.append(
{
"match_id": context.match_id,
"date": _format_date(context.match_date_ms),
"league": context.league,
"match": context.match_name,
"ht_score": context.ht_score,
"final_score": context.final_score,
**payload,
}
)
else:
match_row[strategy] = "-"
report_matches.append(match_row)
report = {
"generated_at": datetime.now(timezone.utc).isoformat(),
"sample_size": len(report_matches),
"summary": {
"strategies": {
strategy: _summarize_bucket(bucket)
for strategy, bucket in strategy_buckets.items()
},
"v26_surprise_by_pick": {
label: _summarize_bucket(bucket)
for label, bucket in v26_reversal_buckets.items()
},
},
"matches": report_matches,
}
report_dir = AI_ENGINE_DIR / "reports"
json_path = report_dir / "backtest_v26_shadow_htft_upset.json"
csv_path = report_dir / "backtest_v26_shadow_htft_upset.csv"
md_path = report_dir / "backtest_v26_shadow_htft_upset.md"
json_path.write_text(json.dumps(report, indent=2, ensure_ascii=False), encoding="utf-8")
with csv_path.open("w", encoding="utf-8", newline="") as handle:
writer = csv.DictWriter(
handle,
fieldnames=[
"match_id",
"date",
"league",
"match",
"ht_score",
"final_score",
"strategy",
"market",
"pick",
"odds",
"playable",
"confidence",
"result",
"counted_in_roi",
"profit_flat",
"resolution_note",
"source",
"reversal_pick",
"reversal_prob",
"favorite_gap",
"favorite_odd",
"support_score",
"odds_band_score",
"odds_band_label",
"league_reversal_rate",
"league_strict_rev_rate",
"referee_strict_rev_rate",
"surprise_score",
"reason_codes",
"pick_reason",
],
)
writer.writeheader()
writer.writerows(csv_rows)
md_path.write_text(_build_markdown(report), encoding="utf-8")
print(f"[OK] JSON report written to {json_path}")
print(f"[OK] CSV report written to {csv_path}")
print(f"[OK] Markdown report written to {md_path}")
if __name__ == "__main__":
main()