gg
This commit is contained in:
@@ -0,0 +1,94 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
|
||||
AI_ENGINE_DIR = Path(__file__).resolve().parents[1]
|
||||
if str(AI_ENGINE_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(AI_ENGINE_DIR))
|
||||
|
||||
from services.single_match_orchestrator import SingleMatchOrchestrator
|
||||
|
||||
|
||||
def _resolve_dsn() -> str:
|
||||
env_path = AI_ENGINE_DIR / ".env"
|
||||
if env_path.exists():
|
||||
for line in env_path.read_text(encoding="utf-8").splitlines():
|
||||
if line.startswith("DATABASE_URL="):
|
||||
return line.split("=", 1)[1].strip().split("?schema=")[0]
|
||||
raise SystemExit("DATABASE_URL not found in ai-engine/.env")
|
||||
|
||||
|
||||
def _fetch_matches(dsn: str, limit: int = 60) -> list[str]:
|
||||
query = """
|
||||
SELECT m.id
|
||||
FROM matches m
|
||||
WHERE m.status = 'FT'
|
||||
AND m.sport = 'football'
|
||||
AND m.score_home IS NOT NULL
|
||||
AND m.score_away IS NOT NULL
|
||||
ORDER BY m.mst_utc DESC
|
||||
LIMIT %s
|
||||
"""
|
||||
with psycopg2.connect(dsn) as conn:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
cur.execute(query, (limit,))
|
||||
return [str(row["id"]) for row in cur.fetchall()]
|
||||
|
||||
|
||||
def _score_prediction(package: dict) -> dict[str, float]:
|
||||
rows = package.get("bet_summary", []) or []
|
||||
playable = [row for row in rows if row.get("playable")]
|
||||
return {
|
||||
"playable_count": float(len(playable)),
|
||||
"avg_edge": round(
|
||||
sum(float(row.get("ev_edge", 0.0)) for row in playable) / len(playable),
|
||||
4,
|
||||
)
|
||||
if playable
|
||||
else 0.0,
|
||||
"avg_confidence": round(
|
||||
sum(float(row.get("calibrated_confidence", 0.0)) for row in playable)
|
||||
/ len(playable),
|
||||
2,
|
||||
)
|
||||
if playable
|
||||
else 0.0,
|
||||
}
|
||||
|
||||
|
||||
def main() -> None:
|
||||
dsn = _resolve_dsn()
|
||||
match_ids = _fetch_matches(dsn)
|
||||
orchestrator = SingleMatchOrchestrator()
|
||||
|
||||
results: list[dict[str, object]] = []
|
||||
for match_id in match_ids:
|
||||
orchestrator.engine_mode = "v25"
|
||||
v25 = orchestrator.analyze_match(match_id)
|
||||
orchestrator.engine_mode = "v26"
|
||||
v26 = orchestrator.analyze_match(match_id)
|
||||
if not v25 or not v26:
|
||||
continue
|
||||
results.append(
|
||||
{
|
||||
"match_id": match_id,
|
||||
"v25": _score_prediction(v25),
|
||||
"v26": _score_prediction(v26),
|
||||
"v25_main": (v25.get("main_pick") or {}).get("pick"),
|
||||
"v26_main": (v26.get("main_pick") or {}).get("pick"),
|
||||
}
|
||||
)
|
||||
|
||||
out_path = AI_ENGINE_DIR / "reports" / "backtest_v26_shadow.json"
|
||||
out_path.write_text(json.dumps(results, indent=2), encoding="utf-8")
|
||||
print(f"[OK] Shadow backtest summary written to {out_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,505 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import json
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
|
||||
AI_ENGINE_DIR = Path(__file__).resolve().parents[1]
|
||||
if str(AI_ENGINE_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(AI_ENGINE_DIR))
|
||||
|
||||
from services.single_match_orchestrator import SingleMatchOrchestrator
|
||||
|
||||
|
||||
STRATEGIES = ("v25_aggressive", "v26_surprise", "v26_aggressive", "v26_main_htft")
|
||||
REVERSAL_LABELS = ("1/2", "2/1", "X/1", "X/2")
|
||||
|
||||
|
||||
@dataclass
|
||||
class MatchContext:
|
||||
match_id: str
|
||||
match_date_ms: int
|
||||
league: str
|
||||
home_team: str
|
||||
away_team: str
|
||||
final_home: int
|
||||
final_away: int
|
||||
ht_home: Optional[int]
|
||||
ht_away: Optional[int]
|
||||
|
||||
@property
|
||||
def match_name(self) -> str:
|
||||
return f"{self.home_team} vs {self.away_team}"
|
||||
|
||||
@property
|
||||
def final_score(self) -> str:
|
||||
return f"{self.final_home}-{self.final_away}"
|
||||
|
||||
@property
|
||||
def ht_score(self) -> str:
|
||||
if self.ht_home is None or self.ht_away is None:
|
||||
return "-"
|
||||
return f"{self.ht_home}-{self.ht_away}"
|
||||
|
||||
|
||||
def _resolve_dsn() -> str:
|
||||
env_path = AI_ENGINE_DIR / ".env"
|
||||
if env_path.exists():
|
||||
for line in env_path.read_text(encoding="utf-8").splitlines():
|
||||
if line.startswith("DATABASE_URL="):
|
||||
return line.split("=", 1)[1].strip().split("?schema=")[0]
|
||||
raise SystemExit("DATABASE_URL not found in ai-engine/.env")
|
||||
|
||||
|
||||
def _fetch_matches(dsn: str, limit: int) -> list[MatchContext]:
|
||||
query = """
|
||||
SELECT
|
||||
m.id,
|
||||
m.mst_utc,
|
||||
COALESCE(l.name, 'Unknown League') AS league,
|
||||
COALESCE(ht.name, 'Home') AS home_team,
|
||||
COALESCE(at.name, 'Away') AS away_team,
|
||||
COALESCE(m.score_home, 0) AS score_home,
|
||||
COALESCE(m.score_away, 0) AS score_away,
|
||||
m.ht_score_home,
|
||||
m.ht_score_away
|
||||
FROM matches m
|
||||
LEFT JOIN leagues l ON l.id = m.league_id
|
||||
LEFT JOIN teams ht ON ht.id = m.home_team_id
|
||||
LEFT JOIN teams at ON at.id = m.away_team_id
|
||||
WHERE m.status = 'FT'
|
||||
AND m.sport = 'football'
|
||||
AND m.score_home IS NOT NULL
|
||||
AND m.score_away IS NOT NULL
|
||||
AND m.ht_score_home IS NOT NULL
|
||||
AND m.ht_score_away IS NOT NULL
|
||||
ORDER BY m.mst_utc DESC
|
||||
LIMIT %s
|
||||
"""
|
||||
with psycopg2.connect(dsn) as conn:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
cur.execute(query, (limit,))
|
||||
rows = cur.fetchall()
|
||||
return [
|
||||
MatchContext(
|
||||
match_id=str(row["id"]),
|
||||
match_date_ms=int(row["mst_utc"] or 0),
|
||||
league=str(row["league"] or "Unknown League"),
|
||||
home_team=str(row["home_team"] or "Home"),
|
||||
away_team=str(row["away_team"] or "Away"),
|
||||
final_home=int(row["score_home"] or 0),
|
||||
final_away=int(row["score_away"] or 0),
|
||||
ht_home=int(row["ht_score_home"]) if row.get("ht_score_home") is not None else None,
|
||||
ht_away=int(row["ht_score_away"]) if row.get("ht_score_away") is not None else None,
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
|
||||
|
||||
def _safe_float(value: Any) -> float:
|
||||
try:
|
||||
return float(value)
|
||||
except (TypeError, ValueError):
|
||||
return 0.0
|
||||
|
||||
|
||||
def _outcome_symbol(home: int, away: int) -> str:
|
||||
if home > away:
|
||||
return "1"
|
||||
if home < away:
|
||||
return "2"
|
||||
return "X"
|
||||
|
||||
|
||||
def _resolve_htft(pick: str, context: MatchContext) -> Dict[str, Any]:
|
||||
if not pick or "/" not in str(pick):
|
||||
return {"result": "UNRESOLVED", "won": None, "note": "htft_pick_invalid"}
|
||||
actual = f"{_outcome_symbol(context.ht_home or 0, context.ht_away or 0)}/{_outcome_symbol(context.final_home, context.final_away)}"
|
||||
won = str(pick).strip().upper() == actual
|
||||
return {"result": "WON" if won else "LOST", "won": won, "note": f"actual={actual}"}
|
||||
|
||||
|
||||
def _market_odds(odds: Dict[str, Any], market: str, pick: str) -> float:
|
||||
mapping = {
|
||||
"HTFT": {
|
||||
"1/1": "htft_11",
|
||||
"1/X": "htft_1x",
|
||||
"1/2": "htft_12",
|
||||
"X/1": "htft_x1",
|
||||
"X/X": "htft_xx",
|
||||
"X/2": "htft_x2",
|
||||
"2/1": "htft_21",
|
||||
"2/X": "htft_2x",
|
||||
"2/2": "htft_22",
|
||||
},
|
||||
"MS": {"1": "ms_h", "X": "ms_d", "2": "ms_a"},
|
||||
}
|
||||
key = mapping.get(market, {}).get(str(pick))
|
||||
if not key:
|
||||
return 0.0
|
||||
value = _safe_float((odds or {}).get(key))
|
||||
return value if value > 1.0 else 0.0
|
||||
|
||||
|
||||
def _evaluate_pick(
|
||||
*,
|
||||
strategy: str,
|
||||
market: str,
|
||||
pick: str,
|
||||
odds: Any,
|
||||
playable: bool,
|
||||
confidence: Any,
|
||||
extra: Optional[Dict[str, Any]],
|
||||
context: MatchContext,
|
||||
) -> Dict[str, Any]:
|
||||
odds_value = _safe_float(odds)
|
||||
if market == "HT/FT":
|
||||
market = "HTFT"
|
||||
resolution = _resolve_htft(pick, context) if market == "HTFT" else {
|
||||
"result": "UNRESOLVED",
|
||||
"won": None,
|
||||
"note": "non_htft_market",
|
||||
}
|
||||
counted = bool(playable and market == "HTFT" and odds_value > 1.01 and resolution["result"] in {"WON", "LOST"})
|
||||
profit = 0.0
|
||||
if counted:
|
||||
profit = (odds_value - 1.0) if resolution["result"] == "WON" else -1.0
|
||||
row = {
|
||||
"strategy": strategy,
|
||||
"market": market,
|
||||
"pick": pick,
|
||||
"odds": round(odds_value, 2),
|
||||
"playable": playable,
|
||||
"confidence": round(_safe_float(confidence), 1),
|
||||
"result": resolution["result"],
|
||||
"counted_in_roi": counted,
|
||||
"profit_flat": round(profit, 4),
|
||||
"resolution_note": resolution["note"],
|
||||
}
|
||||
if extra:
|
||||
row.update(extra)
|
||||
return row
|
||||
|
||||
|
||||
def _extract_strategy_rows(
|
||||
*,
|
||||
context: MatchContext,
|
||||
odds_data: Dict[str, Any],
|
||||
v25: Dict[str, Any],
|
||||
v26: Dict[str, Any],
|
||||
) -> Dict[str, Optional[Dict[str, Any]]]:
|
||||
strategies: Dict[str, Optional[Dict[str, Any]]] = {name: None for name in STRATEGIES}
|
||||
|
||||
v25_aggressive = v25.get("aggressive_pick") or {}
|
||||
if v25_aggressive.get("pick"):
|
||||
pick = str(v25_aggressive.get("pick"))
|
||||
strategies["v25_aggressive"] = _evaluate_pick(
|
||||
strategy="v25_aggressive",
|
||||
market=str(v25_aggressive.get("market") or "HTFT"),
|
||||
pick=pick,
|
||||
odds=_market_odds(odds_data, "HTFT", pick),
|
||||
playable=True,
|
||||
confidence=v25_aggressive.get("confidence"),
|
||||
extra={
|
||||
"source": "v25.aggressive_pick",
|
||||
"reversal_pick": pick,
|
||||
},
|
||||
context=context,
|
||||
)
|
||||
|
||||
v26_surprise = v26.get("surprise_pick") or {}
|
||||
v26_hunter = v26.get("surprise_hunter") or {}
|
||||
if v26_surprise.get("pick"):
|
||||
pick = str(v26_surprise.get("raw_pick") or v26_surprise.get("pick"))
|
||||
strategies["v26_surprise"] = _evaluate_pick(
|
||||
strategy="v26_surprise",
|
||||
market=str(v26_surprise.get("market") or "HTFT"),
|
||||
pick=pick,
|
||||
odds=v26_surprise.get("odds") or _market_odds(odds_data, "HTFT", pick),
|
||||
playable=bool(v26_surprise.get("playable")),
|
||||
confidence=v26_surprise.get("calibrated_confidence", v26_surprise.get("confidence")),
|
||||
extra={
|
||||
"source": "v26.surprise_pick",
|
||||
"surprise_score": round(_safe_float(v26_surprise.get("surprise_score")), 1),
|
||||
"support_score": round(_safe_float(v26_surprise.get("support_score")), 1),
|
||||
"reversal_pick": v26_hunter.get("reversal_pick"),
|
||||
"reversal_prob": round(_safe_float(v26_hunter.get("reversal_prob")), 4),
|
||||
"favorite_gap": round(_safe_float(v26_hunter.get("favorite_gap")), 3),
|
||||
"favorite_odd": round(_safe_float(v26_hunter.get("favorite_odd")), 2),
|
||||
"odds_band_score": round(_safe_float(v26_hunter.get("odds_band_score")), 3),
|
||||
"odds_band_label": str(v26_hunter.get("odds_band_label") or ""),
|
||||
"league_reversal_rate": round(_safe_float(v26_hunter.get("league_reversal_rate")), 4),
|
||||
"league_strict_rev_rate": round(_safe_float(v26_hunter.get("league_strict_rev_rate")), 4),
|
||||
"referee_strict_rev_rate": round(_safe_float(v26_hunter.get("referee_strict_rev_rate")), 4),
|
||||
"reason_codes": ",".join(v26_hunter.get("reason_codes", [])),
|
||||
},
|
||||
context=context,
|
||||
)
|
||||
|
||||
v26_aggressive = v26.get("aggressive_pick") or {}
|
||||
if v26_aggressive.get("pick"):
|
||||
pick = str(v26_aggressive.get("pick"))
|
||||
strategies["v26_aggressive"] = _evaluate_pick(
|
||||
strategy="v26_aggressive",
|
||||
market=str(v26_aggressive.get("market") or "HTFT"),
|
||||
pick=pick,
|
||||
odds=v26_aggressive.get("odds") or _market_odds(odds_data, "HTFT", pick),
|
||||
playable=True,
|
||||
confidence=v26_aggressive.get("confidence"),
|
||||
extra={
|
||||
"source": "v26.aggressive_pick",
|
||||
"reversal_pick": pick,
|
||||
},
|
||||
context=context,
|
||||
)
|
||||
|
||||
v26_main = v26.get("main_pick") or {}
|
||||
if str(v26_main.get("market") or "") == "HTFT" and v26_main.get("pick"):
|
||||
pick = str(v26_main.get("raw_pick") or v26_main.get("pick"))
|
||||
strategies["v26_main_htft"] = _evaluate_pick(
|
||||
strategy="v26_main_htft",
|
||||
market="HTFT",
|
||||
pick=pick,
|
||||
odds=v26_main.get("odds") or _market_odds(odds_data, "HTFT", pick),
|
||||
playable=bool(v26_main.get("playable")),
|
||||
confidence=v26_main.get("calibrated_confidence", v26_main.get("confidence")),
|
||||
extra={
|
||||
"source": "v26.main_pick",
|
||||
"pick_reason": v26_main.get("pick_reason"),
|
||||
"surprise_score": round(_safe_float(v26_main.get("surprise_score")), 1),
|
||||
},
|
||||
context=context,
|
||||
)
|
||||
|
||||
return strategies
|
||||
|
||||
|
||||
def _summarize_bucket(bucket: Dict[str, float]) -> Dict[str, Any]:
|
||||
played = int(bucket["played"])
|
||||
won = int(bucket["won"])
|
||||
lost = int(bucket["lost"])
|
||||
candidate = int(bucket["candidate"])
|
||||
profit = round(bucket["profit"], 4)
|
||||
roi = round((profit / played) * 100.0, 2) if played else 0.0
|
||||
hit = round((won / played) * 100.0, 2) if played else 0.0
|
||||
return {
|
||||
"candidates": candidate,
|
||||
"played": played,
|
||||
"won": won,
|
||||
"lost": lost,
|
||||
"profit_flat": profit,
|
||||
"roi_flat_pct": roi,
|
||||
"hit_rate_pct": hit,
|
||||
}
|
||||
|
||||
|
||||
def _format_date(ms: int) -> str:
|
||||
return datetime.fromtimestamp(ms / 1000, tz=timezone.utc).strftime("%Y-%m-%d")
|
||||
|
||||
|
||||
def _build_markdown(report: Dict[str, Any]) -> str:
|
||||
lines: list[str] = []
|
||||
lines.append("# HT/FT + Upset Backtest")
|
||||
lines.append("")
|
||||
lines.append(f"- Sample: last {report['sample_size']} finished football matches")
|
||||
lines.append("- Scope: only HT/FT reversal and upset-oriented picks")
|
||||
lines.append("- ROI: flat `1 unit` per played pick")
|
||||
lines.append(f"- Generated at: {report['generated_at']}")
|
||||
lines.append("")
|
||||
lines.append("## Strategy Summary")
|
||||
lines.append("")
|
||||
lines.append("| Strategy | Candidates | Played | Won | Lost | Hit Rate | Profit | ROI |")
|
||||
lines.append("|---|---:|---:|---:|---:|---:|---:|---:|")
|
||||
for strategy in STRATEGIES:
|
||||
payload = report["summary"]["strategies"][strategy]
|
||||
lines.append(
|
||||
f"| {strategy} | {payload['candidates']} | {payload['played']} | {payload['won']} | "
|
||||
f"{payload['lost']} | {payload['hit_rate_pct']}% | {payload['profit_flat']:+.2f} | {payload['roi_flat_pct']:+.2f}% |"
|
||||
)
|
||||
lines.append("")
|
||||
lines.append("## v26 Surprise By Reversal Type")
|
||||
lines.append("")
|
||||
lines.append("| Reversal | Candidates | Played | Won | Lost | Profit | ROI |")
|
||||
lines.append("|---|---:|---:|---:|---:|---:|---:|")
|
||||
for reversal, payload in report["summary"]["v26_surprise_by_pick"].items():
|
||||
lines.append(
|
||||
f"| {reversal} | {payload['candidates']} | {payload['played']} | {payload['won']} | "
|
||||
f"{payload['lost']} | {payload['profit_flat']:+.2f} | {payload['roi_flat_pct']:+.2f}% |"
|
||||
)
|
||||
lines.append("")
|
||||
lines.append("## Match Detail")
|
||||
lines.append("")
|
||||
lines.append("| Date | Match | HT | FT | v25 aggressive | v26 surprise | v26 aggressive | v26 main HTFT |")
|
||||
lines.append("|---|---|---|---|---|---|---|---|")
|
||||
for match in report["matches"]:
|
||||
lines.append(
|
||||
f"| {_format_date(match['match_date_ms'])} | {match['match_name']} | {match['ht_score']} | {match['final_score']} | "
|
||||
f"{match['v25_aggressive']} | {match['v26_surprise']} | {match['v26_aggressive']} | {match['v26_main_htft']} |"
|
||||
)
|
||||
lines.append("")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="HT/FT + upset focused backtest.")
|
||||
parser.add_argument("--limit", type=int, default=120, help="Number of finished matches to analyze.")
|
||||
args = parser.parse_args()
|
||||
|
||||
dsn = _resolve_dsn()
|
||||
orchestrator = SingleMatchOrchestrator()
|
||||
matches = _fetch_matches(dsn, max(1, args.limit))
|
||||
|
||||
strategy_buckets: Dict[str, Dict[str, float]] = {name: defaultdict(float) for name in STRATEGIES}
|
||||
v26_reversal_buckets: Dict[str, Dict[str, float]] = {label: defaultdict(float) for label in REVERSAL_LABELS}
|
||||
report_matches: list[Dict[str, Any]] = []
|
||||
csv_rows: list[Dict[str, Any]] = []
|
||||
|
||||
for context in matches:
|
||||
data = orchestrator._load_match_data(context.match_id) # noqa: SLF001
|
||||
if data is None:
|
||||
continue
|
||||
|
||||
orchestrator.engine_mode = "v25"
|
||||
v25 = orchestrator.analyze_match(context.match_id) or {}
|
||||
orchestrator.engine_mode = "v26"
|
||||
v26 = orchestrator.analyze_match(context.match_id) or {}
|
||||
|
||||
extracted = _extract_strategy_rows(
|
||||
context=context,
|
||||
odds_data=data.odds_data or {},
|
||||
v25=v25,
|
||||
v26=v26,
|
||||
)
|
||||
|
||||
match_row: Dict[str, Any] = {
|
||||
"match_id": context.match_id,
|
||||
"match_name": context.match_name,
|
||||
"league": context.league,
|
||||
"match_date_ms": context.match_date_ms,
|
||||
"ht_score": context.ht_score,
|
||||
"final_score": context.final_score,
|
||||
}
|
||||
|
||||
for strategy, payload in extracted.items():
|
||||
if payload:
|
||||
strategy_buckets[strategy]["candidate"] += 1
|
||||
if payload["counted_in_roi"]:
|
||||
strategy_buckets[strategy]["played"] += 1
|
||||
if payload["result"] == "WON":
|
||||
strategy_buckets[strategy]["won"] += 1
|
||||
else:
|
||||
strategy_buckets[strategy]["lost"] += 1
|
||||
strategy_buckets[strategy]["profit"] += payload["profit_flat"]
|
||||
|
||||
if strategy == "v26_surprise":
|
||||
reversal_label = str(payload.get("reversal_pick") or "")
|
||||
if reversal_label in v26_reversal_buckets:
|
||||
v26_reversal_buckets[reversal_label]["candidate"] += 1
|
||||
if payload["counted_in_roi"]:
|
||||
v26_reversal_buckets[reversal_label]["played"] += 1
|
||||
if payload["result"] == "WON":
|
||||
v26_reversal_buckets[reversal_label]["won"] += 1
|
||||
else:
|
||||
v26_reversal_buckets[reversal_label]["lost"] += 1
|
||||
v26_reversal_buckets[reversal_label]["profit"] += payload["profit_flat"]
|
||||
|
||||
summary = (
|
||||
f"{payload['pick']} ({payload['result']}, {'played' if payload['counted_in_roi'] else 'not played'}, {payload['profit_flat']:+.2f})"
|
||||
)
|
||||
match_row[strategy] = summary
|
||||
|
||||
csv_rows.append(
|
||||
{
|
||||
"match_id": context.match_id,
|
||||
"date": _format_date(context.match_date_ms),
|
||||
"league": context.league,
|
||||
"match": context.match_name,
|
||||
"ht_score": context.ht_score,
|
||||
"final_score": context.final_score,
|
||||
**payload,
|
||||
}
|
||||
)
|
||||
else:
|
||||
match_row[strategy] = "-"
|
||||
|
||||
report_matches.append(match_row)
|
||||
|
||||
report = {
|
||||
"generated_at": datetime.now(timezone.utc).isoformat(),
|
||||
"sample_size": len(report_matches),
|
||||
"summary": {
|
||||
"strategies": {
|
||||
strategy: _summarize_bucket(bucket)
|
||||
for strategy, bucket in strategy_buckets.items()
|
||||
},
|
||||
"v26_surprise_by_pick": {
|
||||
label: _summarize_bucket(bucket)
|
||||
for label, bucket in v26_reversal_buckets.items()
|
||||
},
|
||||
},
|
||||
"matches": report_matches,
|
||||
}
|
||||
|
||||
report_dir = AI_ENGINE_DIR / "reports"
|
||||
json_path = report_dir / "backtest_v26_shadow_htft_upset.json"
|
||||
csv_path = report_dir / "backtest_v26_shadow_htft_upset.csv"
|
||||
md_path = report_dir / "backtest_v26_shadow_htft_upset.md"
|
||||
|
||||
json_path.write_text(json.dumps(report, indent=2, ensure_ascii=False), encoding="utf-8")
|
||||
with csv_path.open("w", encoding="utf-8", newline="") as handle:
|
||||
writer = csv.DictWriter(
|
||||
handle,
|
||||
fieldnames=[
|
||||
"match_id",
|
||||
"date",
|
||||
"league",
|
||||
"match",
|
||||
"ht_score",
|
||||
"final_score",
|
||||
"strategy",
|
||||
"market",
|
||||
"pick",
|
||||
"odds",
|
||||
"playable",
|
||||
"confidence",
|
||||
"result",
|
||||
"counted_in_roi",
|
||||
"profit_flat",
|
||||
"resolution_note",
|
||||
"source",
|
||||
"reversal_pick",
|
||||
"reversal_prob",
|
||||
"favorite_gap",
|
||||
"favorite_odd",
|
||||
"support_score",
|
||||
"odds_band_score",
|
||||
"odds_band_label",
|
||||
"league_reversal_rate",
|
||||
"league_strict_rev_rate",
|
||||
"referee_strict_rev_rate",
|
||||
"surprise_score",
|
||||
"reason_codes",
|
||||
"pick_reason",
|
||||
],
|
||||
)
|
||||
writer.writeheader()
|
||||
writer.writerows(csv_rows)
|
||||
md_path.write_text(_build_markdown(report), encoding="utf-8")
|
||||
|
||||
print(f"[OK] JSON report written to {json_path}")
|
||||
print(f"[OK] CSV report written to {csv_path}")
|
||||
print(f"[OK] Markdown report written to {md_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,810 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import json
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterable, Optional
|
||||
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
|
||||
AI_ENGINE_DIR = Path(__file__).resolve().parents[1]
|
||||
if str(AI_ENGINE_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(AI_ENGINE_DIR))
|
||||
|
||||
from services.single_match_orchestrator import SingleMatchOrchestrator
|
||||
from utils.top_leagues import load_top_league_ids
|
||||
|
||||
|
||||
MARKET_ORDER = [
|
||||
"MS",
|
||||
"DC",
|
||||
"OU15",
|
||||
"OU25",
|
||||
"OU35",
|
||||
"BTTS",
|
||||
"HT",
|
||||
"HT_OU05",
|
||||
"HT_OU15",
|
||||
"HTFT",
|
||||
"OE",
|
||||
"CARDS",
|
||||
"HCAP",
|
||||
]
|
||||
|
||||
|
||||
@dataclass
|
||||
class MatchContext:
|
||||
match_id: str
|
||||
match_date_ms: int
|
||||
league_id: Optional[str]
|
||||
league: str
|
||||
home_team: str
|
||||
away_team: str
|
||||
final_home: int
|
||||
final_away: int
|
||||
ht_home: Optional[int]
|
||||
ht_away: Optional[int]
|
||||
total_cards: Optional[float]
|
||||
|
||||
@property
|
||||
def match_name(self) -> str:
|
||||
return f"{self.home_team} vs {self.away_team}"
|
||||
|
||||
@property
|
||||
def final_score(self) -> str:
|
||||
return f"{self.final_home}-{self.final_away}"
|
||||
|
||||
@property
|
||||
def ht_score(self) -> Optional[str]:
|
||||
if self.ht_home is None or self.ht_away is None:
|
||||
return None
|
||||
return f"{self.ht_home}-{self.ht_away}"
|
||||
|
||||
@property
|
||||
def total_goals(self) -> int:
|
||||
return self.final_home + self.final_away
|
||||
|
||||
@property
|
||||
def total_ht_goals(self) -> Optional[int]:
|
||||
if self.ht_home is None or self.ht_away is None:
|
||||
return None
|
||||
return self.ht_home + self.ht_away
|
||||
|
||||
|
||||
def _resolve_dsn() -> str:
|
||||
env_path = AI_ENGINE_DIR / ".env"
|
||||
if env_path.exists():
|
||||
for line in env_path.read_text(encoding="utf-8").splitlines():
|
||||
if line.startswith("DATABASE_URL="):
|
||||
return line.split("=", 1)[1].strip().split("?schema=")[0]
|
||||
raise SystemExit("DATABASE_URL not found in ai-engine/.env")
|
||||
|
||||
|
||||
def _fetch_matches(
|
||||
dsn: str,
|
||||
limit: int,
|
||||
top_league_ids: Optional[list[str]] = None,
|
||||
) -> list[MatchContext]:
|
||||
query = """
|
||||
SELECT
|
||||
m.id,
|
||||
m.mst_utc,
|
||||
m.league_id,
|
||||
COALESCE(l.name, 'Unknown League') AS league,
|
||||
COALESCE(ht.name, 'Home') AS home_team,
|
||||
COALESCE(at.name, 'Away') AS away_team,
|
||||
COALESCE(m.score_home, 0) AS score_home,
|
||||
COALESCE(m.score_away, 0) AS score_away,
|
||||
m.ht_score_home,
|
||||
m.ht_score_away,
|
||||
cards.total_cards
|
||||
FROM matches m
|
||||
LEFT JOIN leagues l ON l.id = m.league_id
|
||||
LEFT JOIN teams ht ON ht.id = m.home_team_id
|
||||
LEFT JOIN teams at ON at.id = m.away_team_id
|
||||
LEFT JOIN (
|
||||
SELECT
|
||||
mpe.match_id,
|
||||
SUM(
|
||||
CASE
|
||||
WHEN mpe.event_type::text LIKE '%%yellow_card%%' THEN 1
|
||||
WHEN mpe.event_type::text LIKE '%%red_card%%' THEN 2
|
||||
ELSE 1
|
||||
END
|
||||
)::float AS total_cards
|
||||
FROM match_player_events mpe
|
||||
WHERE mpe.event_type::text LIKE '%%card%%'
|
||||
GROUP BY mpe.match_id
|
||||
) cards ON cards.match_id = m.id
|
||||
WHERE m.status = 'FT'
|
||||
AND m.sport = 'football'
|
||||
AND m.score_home IS NOT NULL
|
||||
AND m.score_away IS NOT NULL
|
||||
"""
|
||||
params: list[Any] = []
|
||||
if top_league_ids:
|
||||
query += " AND m.league_id = ANY(%s)"
|
||||
params.append(top_league_ids)
|
||||
query += """
|
||||
ORDER BY m.mst_utc DESC
|
||||
LIMIT %s
|
||||
"""
|
||||
params.append(limit)
|
||||
with psycopg2.connect(dsn) as conn:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
cur.execute(query, params)
|
||||
rows = cur.fetchall()
|
||||
|
||||
results: list[MatchContext] = []
|
||||
for row in rows:
|
||||
results.append(
|
||||
MatchContext(
|
||||
match_id=str(row["id"]),
|
||||
match_date_ms=int(row["mst_utc"] or 0),
|
||||
league_id=str(row["league_id"]) if row.get("league_id") else None,
|
||||
league=str(row["league"] or "Unknown League"),
|
||||
home_team=str(row["home_team"] or "Home"),
|
||||
away_team=str(row["away_team"] or "Away"),
|
||||
final_home=int(row["score_home"] or 0),
|
||||
final_away=int(row["score_away"] or 0),
|
||||
ht_home=(
|
||||
int(row["ht_score_home"])
|
||||
if row.get("ht_score_home") is not None
|
||||
else None
|
||||
),
|
||||
ht_away=(
|
||||
int(row["ht_score_away"])
|
||||
if row.get("ht_score_away") is not None
|
||||
else None
|
||||
),
|
||||
total_cards=(
|
||||
float(row["total_cards"])
|
||||
if row.get("total_cards") is not None
|
||||
else None
|
||||
),
|
||||
)
|
||||
)
|
||||
return results
|
||||
|
||||
|
||||
def _odds_band(odds: float) -> str:
|
||||
if odds < 1.5:
|
||||
return "<1.50"
|
||||
if odds < 1.8:
|
||||
return "1.50-1.79"
|
||||
if odds < 2.1:
|
||||
return "1.80-2.09"
|
||||
if odds < 2.5:
|
||||
return "2.10-2.49"
|
||||
return "2.50+"
|
||||
|
||||
|
||||
def _confidence_band(confidence: float) -> str:
|
||||
if confidence < 55.0:
|
||||
return "<55"
|
||||
if confidence < 65.0:
|
||||
return "55-64.9"
|
||||
if confidence < 75.0:
|
||||
return "65-74.9"
|
||||
return "75+"
|
||||
|
||||
|
||||
def _edge_band(edge: float) -> str:
|
||||
if edge < 0.03:
|
||||
return "<0.03"
|
||||
if edge < 0.06:
|
||||
return "0.03-0.059"
|
||||
if edge < 0.10:
|
||||
return "0.06-0.099"
|
||||
return "0.10+"
|
||||
|
||||
|
||||
def _top_n_buckets(rows: Iterable[tuple[str, float]], limit: int = 10) -> list[dict[str, Any]]:
|
||||
ranked = sorted(rows, key=lambda item: (-item[1], item[0]))
|
||||
return [
|
||||
{"label": label, "count": int(count)}
|
||||
for label, count in ranked[:limit]
|
||||
]
|
||||
|
||||
|
||||
def _summarize_v26_losses(csv_rows: list[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
losses = [
|
||||
row for row in csv_rows
|
||||
if row.get("model") == "v26.shadow"
|
||||
and bool(row.get("counted_in_roi"))
|
||||
and row.get("result") == "LOST"
|
||||
]
|
||||
by_market: Dict[str, float] = defaultdict(float)
|
||||
by_league: Dict[str, float] = defaultdict(float)
|
||||
by_pick: Dict[str, float] = defaultdict(float)
|
||||
by_odds_band: Dict[str, float] = defaultdict(float)
|
||||
by_conf_band: Dict[str, float] = defaultdict(float)
|
||||
by_edge_band: Dict[str, float] = defaultdict(float)
|
||||
|
||||
for row in losses:
|
||||
market = str(row.get("market") or "UNKNOWN")
|
||||
league = str(row.get("league") or "Unknown League")
|
||||
pick = str(row.get("pick") or "")
|
||||
odds = _safe_float(row.get("odds"))
|
||||
confidence = _safe_float(row.get("confidence"))
|
||||
edge = _safe_float(row.get("edge"))
|
||||
|
||||
by_market[market] += 1
|
||||
by_league[league] += 1
|
||||
by_pick[f"{market} {pick}".strip()] += 1
|
||||
by_odds_band[_odds_band(odds)] += 1
|
||||
by_conf_band[_confidence_band(confidence)] += 1
|
||||
by_edge_band[_edge_band(edge)] += 1
|
||||
|
||||
return {
|
||||
"lost_bets": len(losses),
|
||||
"by_market": _top_n_buckets(by_market.items(), limit=20),
|
||||
"by_league": _top_n_buckets(by_league.items(), limit=15),
|
||||
"by_pick": _top_n_buckets(by_pick.items(), limit=15),
|
||||
"by_odds_band": _top_n_buckets(by_odds_band.items(), limit=10),
|
||||
"by_confidence_band": _top_n_buckets(by_conf_band.items(), limit=10),
|
||||
"by_edge_band": _top_n_buckets(by_edge_band.items(), limit=10),
|
||||
}
|
||||
|
||||
|
||||
def _safe_float(value: Any) -> float:
|
||||
try:
|
||||
return float(value)
|
||||
except (TypeError, ValueError):
|
||||
return 0.0
|
||||
|
||||
|
||||
def _normalize_text(value: Any) -> str:
|
||||
text = str(value or "").strip().upper()
|
||||
return (
|
||||
text.replace("İ", "I")
|
||||
.replace("İ", "I")
|
||||
.replace("Ş", "S")
|
||||
.replace("Ğ", "G")
|
||||
.replace("Ü", "U")
|
||||
.replace("Ö", "O")
|
||||
.replace("Ç", "C")
|
||||
)
|
||||
|
||||
|
||||
def _outcome_symbol(home: int, away: int) -> str:
|
||||
if home > away:
|
||||
return "1"
|
||||
if home < away:
|
||||
return "2"
|
||||
return "X"
|
||||
|
||||
|
||||
def _resolve_pick(
|
||||
market: str,
|
||||
pick: str,
|
||||
context: MatchContext,
|
||||
) -> Dict[str, Any]:
|
||||
market_code = _normalize_text(market).replace("/", "")
|
||||
pick_text = str(pick or "").strip()
|
||||
pick_norm = _normalize_text(pick_text)
|
||||
|
||||
if not market_code or not pick_norm:
|
||||
return {"result": "UNRESOLVED", "won": None, "note": "pick_missing"}
|
||||
|
||||
if market_code == "HTFT":
|
||||
market_code = "HTFT"
|
||||
if market_code == "HTFT" or market_code == "HTFT":
|
||||
if context.ht_home is None or context.ht_away is None:
|
||||
return {"result": "UNRESOLVED", "won": None, "note": "ht_score_missing"}
|
||||
if "/" not in pick_text:
|
||||
return {"result": "UNRESOLVED", "won": None, "note": "htft_pick_invalid"}
|
||||
ht_pick, ft_pick = pick_text.split("/", 1)
|
||||
actual = f"{_outcome_symbol(context.ht_home, context.ht_away)}/{_outcome_symbol(context.final_home, context.final_away)}"
|
||||
won = f"{_normalize_text(ht_pick)}/{_normalize_text(ft_pick)}" == actual
|
||||
return {"result": "WON" if won else "LOST", "won": won, "note": f"actual={actual}"}
|
||||
|
||||
if market_code == "MS":
|
||||
actual = _outcome_symbol(context.final_home, context.final_away)
|
||||
won = pick_norm in {actual, f"MS {actual}"}
|
||||
return {"result": "WON" if won else "LOST", "won": won, "note": f"actual={actual}"}
|
||||
|
||||
if market_code == "DC":
|
||||
actual = _outcome_symbol(context.final_home, context.final_away)
|
||||
winning = {
|
||||
"1X": {"1", "X"},
|
||||
"X2": {"X", "2"},
|
||||
"12": {"1", "2"},
|
||||
}
|
||||
won = actual in winning.get(pick_norm, set())
|
||||
return {"result": "WON" if won else "LOST", "won": won, "note": f"actual={actual}"}
|
||||
|
||||
if market_code in {"OU15", "OU25", "OU35", "HTOU05", "HTOU15", "HT_OU05", "HT_OU15"}:
|
||||
if market_code in {"HTOU05", "HTOU15", "HT_OU05", "HT_OU15"}:
|
||||
if context.total_ht_goals is None:
|
||||
return {"result": "UNRESOLVED", "won": None, "note": "ht_score_missing"}
|
||||
total = context.total_ht_goals
|
||||
line = 0.5 if "05" in market_code else 1.5
|
||||
else:
|
||||
total = context.total_goals
|
||||
line = {"OU15": 1.5, "OU25": 2.5, "OU35": 3.5}[market_code]
|
||||
|
||||
if "UST" in pick_norm or "OVER" in pick_norm:
|
||||
won = total > line
|
||||
side = "OVER"
|
||||
elif "ALT" in pick_norm or "UNDER" in pick_norm:
|
||||
won = total < line
|
||||
side = "UNDER"
|
||||
else:
|
||||
return {"result": "UNRESOLVED", "won": None, "note": "ou_side_unknown"}
|
||||
return {
|
||||
"result": "WON" if won else "LOST",
|
||||
"won": won,
|
||||
"note": f"actual_total={total} side={side} line={line}",
|
||||
}
|
||||
|
||||
if market_code == "BTTS":
|
||||
both_scored = context.final_home > 0 and context.final_away > 0
|
||||
if "VAR" in pick_norm or "YES" in pick_norm:
|
||||
won = both_scored
|
||||
side = "YES"
|
||||
elif "YOK" in pick_norm or pick_norm.endswith("NO") or pick_norm == "NO":
|
||||
won = not both_scored
|
||||
side = "NO"
|
||||
else:
|
||||
return {"result": "UNRESOLVED", "won": None, "note": "btts_side_unknown"}
|
||||
return {
|
||||
"result": "WON" if won else "LOST",
|
||||
"won": won,
|
||||
"note": f"actual_btts={'YES' if both_scored else 'NO'} side={side}",
|
||||
}
|
||||
|
||||
if market_code == "HT":
|
||||
if context.ht_home is None or context.ht_away is None:
|
||||
return {"result": "UNRESOLVED", "won": None, "note": "ht_score_missing"}
|
||||
actual = _outcome_symbol(context.ht_home, context.ht_away)
|
||||
won = pick_norm == actual
|
||||
return {"result": "WON" if won else "LOST", "won": won, "note": f"actual={actual}"}
|
||||
|
||||
if market_code == "OE":
|
||||
actual = "EVEN" if context.total_goals % 2 == 0 else "ODD"
|
||||
if pick_norm in {"CIFT", "EVEN"}:
|
||||
wanted = "EVEN"
|
||||
elif pick_norm in {"TEK", "ODD"}:
|
||||
wanted = "ODD"
|
||||
else:
|
||||
return {"result": "UNRESOLVED", "won": None, "note": "oe_pick_unknown"}
|
||||
won = actual == wanted
|
||||
return {"result": "WON" if won else "LOST", "won": won, "note": f"actual={actual}"}
|
||||
|
||||
if market_code == "CARDS":
|
||||
if context.total_cards is None:
|
||||
return {"result": "UNRESOLVED", "won": None, "note": "cards_missing"}
|
||||
if "UST" in pick_norm or "OVER" in pick_norm:
|
||||
won = context.total_cards > 4.5
|
||||
side = "OVER"
|
||||
elif "ALT" in pick_norm or "UNDER" in pick_norm:
|
||||
won = context.total_cards < 4.5
|
||||
side = "UNDER"
|
||||
else:
|
||||
return {"result": "UNRESOLVED", "won": None, "note": "cards_side_unknown"}
|
||||
return {
|
||||
"result": "WON" if won else "LOST",
|
||||
"won": won,
|
||||
"note": f"actual_cards={context.total_cards:.1f} side={side} line=4.5",
|
||||
}
|
||||
|
||||
if market_code == "HCAP":
|
||||
adjusted_home = context.final_home - 1.0
|
||||
adjusted_away = float(context.final_away)
|
||||
if adjusted_home > adjusted_away:
|
||||
actual = "1"
|
||||
elif adjusted_home < adjusted_away:
|
||||
actual = "2"
|
||||
else:
|
||||
actual = "X"
|
||||
won = pick_norm == actual
|
||||
return {
|
||||
"result": "WON" if won else "LOST",
|
||||
"won": won,
|
||||
"note": f"actual={actual} line_home=-1.0",
|
||||
}
|
||||
|
||||
return {"result": "UNRESOLVED", "won": None, "note": "market_not_supported"}
|
||||
|
||||
|
||||
def _evaluate_row(
|
||||
market: str,
|
||||
pick: str,
|
||||
odds: Any,
|
||||
playable: bool,
|
||||
stake_units: Any,
|
||||
context: MatchContext,
|
||||
) -> Dict[str, Any]:
|
||||
resolution = _resolve_pick(market, pick, context)
|
||||
odds_value = _safe_float(odds)
|
||||
stake_value = _safe_float(stake_units)
|
||||
counted = bool(playable and odds_value > 1.01 and resolution["result"] in {"WON", "LOST"})
|
||||
|
||||
flat_profit = 0.0
|
||||
stake_profit = 0.0
|
||||
if counted:
|
||||
flat_profit = (odds_value - 1.0) if resolution["result"] == "WON" else -1.0
|
||||
stake_profit = flat_profit * (stake_value if stake_value > 0 else 1.0)
|
||||
|
||||
return {
|
||||
"result": resolution["result"],
|
||||
"won": resolution["won"],
|
||||
"resolution_note": resolution["note"],
|
||||
"counted_in_roi": counted,
|
||||
"profit_flat": round(flat_profit, 4),
|
||||
"profit_stake": round(stake_profit, 4),
|
||||
}
|
||||
|
||||
|
||||
def _summarize_bucket(bucket: Dict[str, float]) -> Dict[str, Any]:
|
||||
played = int(bucket["played"])
|
||||
won = int(bucket["won"])
|
||||
lost = int(bucket["lost"])
|
||||
unresolved = int(bucket["unresolved"])
|
||||
profit = round(bucket["profit"], 4)
|
||||
roi = round((profit / played) * 100.0, 2) if played else 0.0
|
||||
win_rate = round((won / played) * 100.0, 2) if played else 0.0
|
||||
return {
|
||||
"played": played,
|
||||
"won": won,
|
||||
"lost": lost,
|
||||
"unresolved": unresolved,
|
||||
"profit_flat": profit,
|
||||
"roi_flat_pct": roi,
|
||||
"win_rate_pct": win_rate,
|
||||
}
|
||||
|
||||
|
||||
def _format_date(ms: int) -> str:
|
||||
if ms <= 0:
|
||||
return "-"
|
||||
dt = datetime.fromtimestamp(ms / 1000, tz=timezone.utc)
|
||||
return dt.strftime("%Y-%m-%d")
|
||||
|
||||
|
||||
def _build_markdown_report(report: Dict[str, Any]) -> str:
|
||||
lines: list[str] = []
|
||||
lines.append("# v25 vs v26.shadow ROI Report")
|
||||
lines.append("")
|
||||
lines.append(f"- Sample: last {report['sample_size']} finished football matches")
|
||||
if report.get("top_leagues_only"):
|
||||
lines.append("- Filter: top leagues only")
|
||||
lines.append("- ROI calculation: flat `1 unit` per playable and resolvable bet")
|
||||
lines.append(f"- Generated at: {report['generated_at']}")
|
||||
lines.append("")
|
||||
lines.append("## Overall Summary")
|
||||
lines.append("")
|
||||
lines.append("| Model | Played | Won | Lost | Win Rate | Profit | ROI | Main Pick ROI | Main Pick W/L |")
|
||||
lines.append("|---|---:|---:|---:|---:|---:|---:|---:|---|")
|
||||
for model_name, payload in report["summary"]["models"].items():
|
||||
main = payload["main_pick"]
|
||||
lines.append(
|
||||
f"| {model_name} | {payload['all_playable']['played']} | {payload['all_playable']['won']} | "
|
||||
f"{payload['all_playable']['lost']} | {payload['all_playable']['win_rate_pct']}% | "
|
||||
f"{payload['all_playable']['profit_flat']:+.2f} | {payload['all_playable']['roi_flat_pct']:+.2f}% | "
|
||||
f"{main['roi_flat_pct']:+.2f}% | {main['won']}/{main['played']} |"
|
||||
)
|
||||
lines.append("")
|
||||
lines.append("## Market Summary")
|
||||
lines.append("")
|
||||
lines.append("| Model | Market | Played | Won | Lost | Profit | ROI |")
|
||||
lines.append("|---|---|---:|---:|---:|---:|---:|")
|
||||
for model_name, markets in report["summary"]["markets"].items():
|
||||
for market_name in MARKET_ORDER:
|
||||
payload = markets.get(market_name)
|
||||
if not payload or payload["played"] == 0:
|
||||
continue
|
||||
lines.append(
|
||||
f"| {model_name} | {market_name} | {payload['played']} | {payload['won']} | {payload['lost']} | "
|
||||
f"{payload['profit_flat']:+.2f} | {payload['roi_flat_pct']:+.2f}% |"
|
||||
)
|
||||
lines.append("")
|
||||
loss_summary = report["summary"].get("v26_loss_analysis", {})
|
||||
if loss_summary:
|
||||
lines.append("## v26 Loss Analysis")
|
||||
lines.append("")
|
||||
lines.append(f"- Lost bets: {loss_summary.get('lost_bets', 0)}")
|
||||
lines.append("")
|
||||
lines.append("| Bucket | Top Items |")
|
||||
lines.append("|---|---|")
|
||||
for label, key in (
|
||||
("By market", "by_market"),
|
||||
("By league", "by_league"),
|
||||
("By pick", "by_pick"),
|
||||
("By odds band", "by_odds_band"),
|
||||
("By confidence band", "by_confidence_band"),
|
||||
("By edge band", "by_edge_band"),
|
||||
):
|
||||
items = loss_summary.get(key) or []
|
||||
rendered = ", ".join(f"{item['label']} ({item['count']})" for item in items[:6]) or "-"
|
||||
lines.append(f"| {label} | {rendered} |")
|
||||
lines.append("")
|
||||
lines.append("## Match By Match")
|
||||
lines.append("")
|
||||
lines.append("| Date | Match | Score | v25 Main | v25 Played Picks | v25 Profit | v26 Main | v26 Played Picks | v26 Profit |")
|
||||
lines.append("|---|---|---|---|---|---:|---|---|---:|")
|
||||
for match in report["matches"]:
|
||||
v25 = match["models"]["v25"]
|
||||
v26 = match["models"]["v26.shadow"]
|
||||
lines.append(
|
||||
f"| {_format_date(match['match_date_ms'])} | {match['match_name']} | {match['final_score']} | "
|
||||
f"{v25['main_pick']['summary']} | {v25['played_picks_summary']} | {v25['profit_flat']:+.2f} | "
|
||||
f"{v26['main_pick']['summary']} | {v26['played_picks_summary']} | {v26['profit_flat']:+.2f} |"
|
||||
)
|
||||
lines.append("")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Detailed ROI backtest for v25 vs v26.shadow.",
|
||||
)
|
||||
parser.add_argument("--limit", type=int, default=60, help="Number of finished matches to analyze.")
|
||||
parser.add_argument(
|
||||
"--top-leagues-only",
|
||||
action="store_true",
|
||||
help="Only analyze matches whose league_id exists in top_leagues.json.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
dsn = _resolve_dsn()
|
||||
top_league_ids = sorted(load_top_league_ids()) if args.top_leagues_only else None
|
||||
matches = _fetch_matches(dsn, max(1, args.limit), top_league_ids=top_league_ids)
|
||||
orchestrator = SingleMatchOrchestrator()
|
||||
|
||||
report_matches: list[Dict[str, Any]] = []
|
||||
model_aggregate: Dict[str, Dict[str, float]] = {
|
||||
"v25": defaultdict(float),
|
||||
"v26.shadow": defaultdict(float),
|
||||
}
|
||||
main_pick_aggregate: Dict[str, Dict[str, float]] = {
|
||||
"v25": defaultdict(float),
|
||||
"v26.shadow": defaultdict(float),
|
||||
}
|
||||
market_aggregate: Dict[str, Dict[str, Dict[str, float]]] = {
|
||||
"v25": defaultdict(lambda: defaultdict(float)),
|
||||
"v26.shadow": defaultdict(lambda: defaultdict(float)),
|
||||
}
|
||||
csv_rows: list[Dict[str, Any]] = []
|
||||
|
||||
for context in matches:
|
||||
match_payload = {
|
||||
"match_id": context.match_id,
|
||||
"match_name": context.match_name,
|
||||
"league": context.league,
|
||||
"match_date_ms": context.match_date_ms,
|
||||
"final_score": context.final_score,
|
||||
"ht_score": context.ht_score,
|
||||
"total_cards": context.total_cards,
|
||||
"models": {},
|
||||
}
|
||||
|
||||
for model_name, mode in (("v25", "v25"), ("v26.shadow", "v26")):
|
||||
orchestrator.engine_mode = mode
|
||||
package = orchestrator.analyze_match(context.match_id) or {}
|
||||
rows = package.get("bet_summary") or []
|
||||
evaluated_rows: list[Dict[str, Any]] = []
|
||||
match_profit = 0.0
|
||||
|
||||
for row in rows:
|
||||
market = str(row.get("market") or "")
|
||||
pick = str(row.get("pick") or "")
|
||||
evaluation = _evaluate_row(
|
||||
market=market,
|
||||
pick=pick,
|
||||
odds=row.get("odds"),
|
||||
playable=bool(row.get("playable")),
|
||||
stake_units=row.get("stake_units"),
|
||||
context=context,
|
||||
)
|
||||
combined = {
|
||||
"market": market,
|
||||
"pick": pick,
|
||||
"playable": bool(row.get("playable")),
|
||||
"bet_grade": row.get("bet_grade"),
|
||||
"odds": round(_safe_float(row.get("odds")), 2),
|
||||
"calibrated_confidence": round(_safe_float(row.get("calibrated_confidence")), 1),
|
||||
"edge": round(_safe_float(row.get("ev_edge", row.get("edge"))), 4),
|
||||
"stake_units": round(_safe_float(row.get("stake_units")), 2),
|
||||
**evaluation,
|
||||
}
|
||||
evaluated_rows.append(combined)
|
||||
|
||||
if combined["counted_in_roi"]:
|
||||
bucket = market_aggregate[model_name][market]
|
||||
bucket["played"] += 1
|
||||
if combined["result"] == "WON":
|
||||
bucket["won"] += 1
|
||||
else:
|
||||
bucket["lost"] += 1
|
||||
bucket["profit"] += combined["profit_flat"]
|
||||
|
||||
model_bucket = model_aggregate[model_name]
|
||||
model_bucket["played"] += 1
|
||||
if combined["result"] == "WON":
|
||||
model_bucket["won"] += 1
|
||||
else:
|
||||
model_bucket["lost"] += 1
|
||||
model_bucket["profit"] += combined["profit_flat"]
|
||||
match_profit += combined["profit_flat"]
|
||||
elif combined["playable"]:
|
||||
model_aggregate[model_name]["unresolved"] += 1
|
||||
market_aggregate[model_name][market]["unresolved"] += 1
|
||||
|
||||
csv_rows.append(
|
||||
{
|
||||
"match_id": context.match_id,
|
||||
"date": _format_date(context.match_date_ms),
|
||||
"league": context.league,
|
||||
"match": context.match_name,
|
||||
"final_score": context.final_score,
|
||||
"ht_score": context.ht_score or "",
|
||||
"model": model_name,
|
||||
"market": market,
|
||||
"pick": pick,
|
||||
"playable": combined["playable"],
|
||||
"bet_grade": combined["bet_grade"],
|
||||
"odds": combined["odds"],
|
||||
"confidence": combined["calibrated_confidence"],
|
||||
"edge": combined["edge"],
|
||||
"result": combined["result"],
|
||||
"counted_in_roi": combined["counted_in_roi"],
|
||||
"profit_flat": combined["profit_flat"],
|
||||
"resolution_note": combined["resolution_note"],
|
||||
}
|
||||
)
|
||||
|
||||
main_pick = package.get("main_pick") or {}
|
||||
main_eval = _evaluate_row(
|
||||
market=str(main_pick.get("market") or ""),
|
||||
pick=str(main_pick.get("pick") or ""),
|
||||
odds=main_pick.get("odds"),
|
||||
playable=bool(main_pick.get("playable")),
|
||||
stake_units=main_pick.get("stake_units"),
|
||||
context=context,
|
||||
)
|
||||
main_pick_summary = {
|
||||
"market": main_pick.get("market"),
|
||||
"pick": main_pick.get("pick"),
|
||||
"playable": bool(main_pick.get("playable")),
|
||||
"odds": round(_safe_float(main_pick.get("odds")), 2),
|
||||
"confidence": round(
|
||||
_safe_float(
|
||||
main_pick.get("calibrated_confidence", main_pick.get("confidence"))
|
||||
),
|
||||
1,
|
||||
),
|
||||
"edge": round(_safe_float(main_pick.get("ev_edge", main_pick.get("edge"))), 4),
|
||||
**main_eval,
|
||||
}
|
||||
|
||||
if main_pick_summary["counted_in_roi"]:
|
||||
summary_suffix = (
|
||||
f"{main_pick_summary['result']}, played, {main_pick_summary['profit_flat']:+.2f}"
|
||||
)
|
||||
elif main_pick_summary.get("market") and main_pick_summary.get("pick"):
|
||||
summary_suffix = f"{main_pick_summary['result']}, not played"
|
||||
else:
|
||||
summary_suffix = ""
|
||||
|
||||
if main_pick_summary["counted_in_roi"]:
|
||||
bucket = main_pick_aggregate[model_name]
|
||||
bucket["played"] += 1
|
||||
if main_pick_summary["result"] == "WON":
|
||||
bucket["won"] += 1
|
||||
else:
|
||||
bucket["lost"] += 1
|
||||
bucket["profit"] += main_pick_summary["profit_flat"]
|
||||
elif main_pick_summary["playable"]:
|
||||
main_pick_aggregate[model_name]["unresolved"] += 1
|
||||
|
||||
main_pick_summary["summary"] = (
|
||||
f"{main_pick_summary['market']} {main_pick_summary['pick']} "
|
||||
f"({summary_suffix})"
|
||||
if main_pick_summary.get("market") and main_pick_summary.get("pick")
|
||||
else "No main pick"
|
||||
)
|
||||
|
||||
played_rows = [row for row in evaluated_rows if row["counted_in_roi"]]
|
||||
played_picks_summary = (
|
||||
"; ".join(
|
||||
f"{row['market']} {row['pick']}={row['result']} ({row['profit_flat']:+.2f})"
|
||||
for row in played_rows
|
||||
)
|
||||
if played_rows
|
||||
else "-"
|
||||
)
|
||||
|
||||
match_payload["models"][model_name] = {
|
||||
"main_pick": main_pick_summary,
|
||||
"profit_flat": round(match_profit, 4),
|
||||
"played_picks_summary": played_picks_summary,
|
||||
"played_picks": played_rows,
|
||||
"all_picks": evaluated_rows,
|
||||
}
|
||||
|
||||
report_matches.append(match_payload)
|
||||
|
||||
summary = {
|
||||
"models": {
|
||||
model_name: {
|
||||
"all_playable": _summarize_bucket(model_aggregate[model_name]),
|
||||
"main_pick": _summarize_bucket(main_pick_aggregate[model_name]),
|
||||
}
|
||||
for model_name in ("v25", "v26.shadow")
|
||||
},
|
||||
"markets": {
|
||||
model_name: {
|
||||
market_name: _summarize_bucket(bucket)
|
||||
for market_name, bucket in sorted(
|
||||
market_aggregate[model_name].items(),
|
||||
key=lambda item: (
|
||||
MARKET_ORDER.index(item[0]) if item[0] in MARKET_ORDER else 999,
|
||||
item[0],
|
||||
),
|
||||
)
|
||||
}
|
||||
for model_name in ("v25", "v26.shadow")
|
||||
},
|
||||
"v26_loss_analysis": _summarize_v26_losses(csv_rows),
|
||||
}
|
||||
|
||||
report = {
|
||||
"generated_at": datetime.now(timezone.utc).isoformat(),
|
||||
"sample_size": len(report_matches),
|
||||
"top_leagues_only": bool(args.top_leagues_only),
|
||||
"summary": summary,
|
||||
"matches": report_matches,
|
||||
}
|
||||
|
||||
report_dir = AI_ENGINE_DIR / "reports"
|
||||
json_path = report_dir / "backtest_v26_shadow_roi_detail.json"
|
||||
csv_path = report_dir / "backtest_v26_shadow_roi_picks.csv"
|
||||
md_path = report_dir / "backtest_v26_shadow_roi_report.md"
|
||||
|
||||
json_path.write_text(json.dumps(report, indent=2, ensure_ascii=False), encoding="utf-8")
|
||||
|
||||
with csv_path.open("w", encoding="utf-8", newline="") as handle:
|
||||
writer = csv.DictWriter(
|
||||
handle,
|
||||
fieldnames=[
|
||||
"match_id",
|
||||
"date",
|
||||
"league",
|
||||
"match",
|
||||
"final_score",
|
||||
"ht_score",
|
||||
"model",
|
||||
"market",
|
||||
"pick",
|
||||
"playable",
|
||||
"bet_grade",
|
||||
"odds",
|
||||
"confidence",
|
||||
"edge",
|
||||
"result",
|
||||
"counted_in_roi",
|
||||
"profit_flat",
|
||||
"resolution_note",
|
||||
],
|
||||
)
|
||||
writer.writeheader()
|
||||
writer.writerows(csv_rows)
|
||||
|
||||
md_path.write_text(_build_markdown_report(report), encoding="utf-8")
|
||||
|
||||
print(f"[OK] JSON report written to {json_path}")
|
||||
print(f"[OK] CSV report written to {csv_path}")
|
||||
print(f"[OK] Markdown report written to {md_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,93 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
AI_ENGINE_DIR = Path(__file__).resolve().parents[1]
|
||||
SOURCE_CSV = AI_ENGINE_DIR / "data" / "training_data.csv"
|
||||
TARGET_DIR = AI_ENGINE_DIR / "data" / "v26_shadow"
|
||||
TARGET_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
def _rolling_windows(frame: pd.DataFrame) -> list[dict[str, int]]:
|
||||
ordered = frame.sort_values("mst_utc").reset_index(drop=True)
|
||||
windows: list[dict[str, int]] = []
|
||||
if ordered.empty:
|
||||
return windows
|
||||
|
||||
size = len(ordered)
|
||||
cuts = [0.55, 0.7, 0.85]
|
||||
for idx, cut in enumerate(cuts, start=1):
|
||||
end_ix = max(int(size * cut), 1)
|
||||
test_end = min(size - 1, end_ix + max(int(size * 0.10), 1))
|
||||
windows.append(
|
||||
{
|
||||
"window": idx,
|
||||
"train_end_ix": end_ix - 1,
|
||||
"test_start_ix": end_ix,
|
||||
"test_end_ix": test_end,
|
||||
"train_end_mst_utc": int(ordered.iloc[end_ix - 1]["mst_utc"]),
|
||||
"test_end_mst_utc": int(ordered.iloc[test_end]["mst_utc"]),
|
||||
}
|
||||
)
|
||||
return windows
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if not SOURCE_CSV.exists():
|
||||
raise SystemExit(f"Missing source CSV: {SOURCE_CSV}")
|
||||
|
||||
frame = pd.read_csv(SOURCE_CSV)
|
||||
if "mst_utc" not in frame.columns:
|
||||
raise SystemExit("training_data.csv must include mst_utc")
|
||||
|
||||
ordered = frame.sort_values("mst_utc").reset_index(drop=True)
|
||||
ordered["lineup_completeness"] = 1.0
|
||||
ordered["referee_available"] = (
|
||||
ordered.get("referee_experience", pd.Series([0] * len(ordered))).fillna(0) > 0
|
||||
).astype(float)
|
||||
ordered["league_reliability"] = ordered.get("league_zero_goal_rate", 0).fillna(0).apply(
|
||||
lambda value: round(max(0.25, min(0.95, 0.85 - float(value))), 4)
|
||||
)
|
||||
ordered["odds_snapshot_freshness"] = 1.0
|
||||
|
||||
train_end = max(int(len(ordered) * 0.70), 1)
|
||||
validation_end = max(int(len(ordered) * 0.85), train_end + 1)
|
||||
validation_end = min(validation_end, len(ordered) - 1)
|
||||
|
||||
train_df = ordered.iloc[:train_end].copy()
|
||||
validation_df = ordered.iloc[train_end:validation_end].copy()
|
||||
holdout_df = ordered.iloc[validation_end:].copy()
|
||||
|
||||
train_df.to_csv(TARGET_DIR / "train.csv", index=False)
|
||||
validation_df.to_csv(TARGET_DIR / "validation.csv", index=False)
|
||||
holdout_df.to_csv(TARGET_DIR / "holdout.csv", index=False)
|
||||
|
||||
meta = {
|
||||
"source": str(SOURCE_CSV),
|
||||
"rows": int(len(ordered)),
|
||||
"train_rows": int(len(train_df)),
|
||||
"validation_rows": int(len(validation_df)),
|
||||
"holdout_rows": int(len(holdout_df)),
|
||||
"rolling_windows": _rolling_windows(ordered),
|
||||
"derived_columns": [
|
||||
"lineup_completeness",
|
||||
"referee_available",
|
||||
"league_reliability",
|
||||
"odds_snapshot_freshness",
|
||||
],
|
||||
"feature_policy": "prediction_time_only",
|
||||
}
|
||||
(TARGET_DIR / "dataset_meta.json").write_text(
|
||||
json.dumps(meta, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
print(f"[OK] V26 dataset written to {TARGET_DIR}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,58 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
AI_ENGINE_DIR = Path(__file__).resolve().parents[1]
|
||||
DATA_DIR = AI_ENGINE_DIR / "data" / "v26_shadow"
|
||||
CONFIG_PATH = AI_ENGINE_DIR / "models" / "v26_shadow" / "market_profiles.json"
|
||||
REPORT_PATH = AI_ENGINE_DIR / "reports" / "training_v26_shadow.json"
|
||||
REPORT_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
def _market_accuracy(frame: pd.DataFrame, target_col: str) -> float:
|
||||
if target_col not in frame.columns or frame.empty:
|
||||
return 0.0
|
||||
counts = frame[target_col].value_counts(normalize=True)
|
||||
if counts.empty:
|
||||
return 0.0
|
||||
return round(float(counts.max()), 4)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
train_csv = DATA_DIR / "train.csv"
|
||||
validation_csv = DATA_DIR / "validation.csv"
|
||||
if not train_csv.exists() or not validation_csv.exists():
|
||||
raise SystemExit("Run extract_training_data_v26.py first")
|
||||
|
||||
train_df = pd.read_csv(train_csv)
|
||||
validation_df = pd.read_csv(validation_csv)
|
||||
config = json.loads(CONFIG_PATH.read_text(encoding="utf-8"))
|
||||
report = {
|
||||
"version": config.get("version"),
|
||||
"calibration_version": config.get("calibration_version"),
|
||||
"train_rows": int(len(train_df)),
|
||||
"validation_rows": int(len(validation_df)),
|
||||
"label_priors": {
|
||||
"MS": _market_accuracy(validation_df, "label_ms"),
|
||||
"OU25": _market_accuracy(validation_df, "label_ou25"),
|
||||
"BTTS": _market_accuracy(validation_df, "label_btts"),
|
||||
"HT": _market_accuracy(validation_df, "label_ht_result"),
|
||||
"HTFT": _market_accuracy(validation_df, "label_ht_ft"),
|
||||
"CARDS": _market_accuracy(validation_df, "label_cards_ou45"),
|
||||
},
|
||||
"artifact_path": str(CONFIG_PATH),
|
||||
"notes": [
|
||||
"v26.shadow runtime currently uses artifact-based calibration and ROI gating",
|
||||
"market profile JSON remains the source of truth for runtime thresholds",
|
||||
],
|
||||
}
|
||||
REPORT_PATH.write_text(json.dumps(report, indent=2), encoding="utf-8")
|
||||
print(f"[OK] Shadow training report written to {REPORT_PATH}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user