iddaai-be/ai-engine/scripts/train_v26_shadow.py

from __future__ import annotations

import json
from pathlib import Path

import pandas as pd


AI_ENGINE_DIR = Path(__file__).resolve().parents[1]
DATA_DIR = AI_ENGINE_DIR / "data" / "v26_shadow"
CONFIG_PATH = AI_ENGINE_DIR / "models" / "v26_shadow" / "market_profiles.json"
REPORT_PATH = AI_ENGINE_DIR / "reports" / "training_v26_shadow.json"
REPORT_PATH.parent.mkdir(parents=True, exist_ok=True)


def _market_accuracy(frame: pd.DataFrame, target_col: str) -> float:
    if target_col not in frame.columns or frame.empty:
        return 0.0
    counts = frame[target_col].value_counts(normalize=True)
    if counts.empty:
        return 0.0
    return round(float(counts.max()), 4)


def main() -> None:
    train_csv = DATA_DIR / "train.csv"
    validation_csv = DATA_DIR / "validation.csv"
    if not train_csv.exists() or not validation_csv.exists():
        raise SystemExit("Run extract_training_data_v26.py first")

    train_df = pd.read_csv(train_csv)
    validation_df = pd.read_csv(validation_csv)
    config = json.loads(CONFIG_PATH.read_text(encoding="utf-8"))
    report = {
        "version": config.get("version"),
        "calibration_version": config.get("calibration_version"),
        "train_rows": int(len(train_df)),
        "validation_rows": int(len(validation_df)),
        "label_priors": {
            "MS": _market_accuracy(validation_df, "label_ms"),
            "OU25": _market_accuracy(validation_df, "label_ou25"),
            "BTTS": _market_accuracy(validation_df, "label_btts"),
            "HT": _market_accuracy(validation_df, "label_ht_result"),
            "HTFT": _market_accuracy(validation_df, "label_ht_ft"),
            "CARDS": _market_accuracy(validation_df, "label_cards_ou45"),
        },
        "artifact_path": str(CONFIG_PATH),
        "notes": [
            "v26.shadow runtime currently uses artifact-based calibration and ROI gating",
            "market profile JSON remains the source of truth for runtime thresholds",
        ],
    }
    REPORT_PATH.write_text(json.dumps(report, indent=2), encoding="utf-8")
    print(f"[OK] Shadow training report written to {REPORT_PATH}")


if __name__ == "__main__":
    main()