from __future__ import annotations import json from pathlib import Path import pandas as pd AI_ENGINE_DIR = Path(__file__).resolve().parents[1] DATA_DIR = AI_ENGINE_DIR / "data" / "v26_shadow" CONFIG_PATH = AI_ENGINE_DIR / "models" / "v26_shadow" / "market_profiles.json" REPORT_PATH = AI_ENGINE_DIR / "reports" / "training_v26_shadow.json" REPORT_PATH.parent.mkdir(parents=True, exist_ok=True) def _market_accuracy(frame: pd.DataFrame, target_col: str) -> float: if target_col not in frame.columns or frame.empty: return 0.0 counts = frame[target_col].value_counts(normalize=True) if counts.empty: return 0.0 return round(float(counts.max()), 4) def main() -> None: train_csv = DATA_DIR / "train.csv" validation_csv = DATA_DIR / "validation.csv" if not train_csv.exists() or not validation_csv.exists(): raise SystemExit("Run extract_training_data_v26.py first") train_df = pd.read_csv(train_csv) validation_df = pd.read_csv(validation_csv) config = json.loads(CONFIG_PATH.read_text(encoding="utf-8")) report = { "version": config.get("version"), "calibration_version": config.get("calibration_version"), "train_rows": int(len(train_df)), "validation_rows": int(len(validation_df)), "label_priors": { "MS": _market_accuracy(validation_df, "label_ms"), "OU25": _market_accuracy(validation_df, "label_ou25"), "BTTS": _market_accuracy(validation_df, "label_btts"), "HT": _market_accuracy(validation_df, "label_ht_result"), "HTFT": _market_accuracy(validation_df, "label_ht_ft"), "CARDS": _market_accuracy(validation_df, "label_cards_ou45"), }, "artifact_path": str(CONFIG_PATH), "notes": [ "v26.shadow runtime currently uses artifact-based calibration and ROI gating", "market profile JSON remains the source of truth for runtime thresholds", ], } REPORT_PATH.write_text(json.dumps(report, indent=2), encoding="utf-8") print(f"[OK] Shadow training report written to {REPORT_PATH}") if __name__ == "__main__": main()