Files
iddaai-be/ai-engine/scripts/train_v26_shadow.py
T
2026-04-21 16:53:56 +03:00

59 lines
2.1 KiB
Python

from __future__ import annotations
import json
from pathlib import Path
import pandas as pd
AI_ENGINE_DIR = Path(__file__).resolve().parents[1]
DATA_DIR = AI_ENGINE_DIR / "data" / "v26_shadow"
CONFIG_PATH = AI_ENGINE_DIR / "models" / "v26_shadow" / "market_profiles.json"
REPORT_PATH = AI_ENGINE_DIR / "reports" / "training_v26_shadow.json"
REPORT_PATH.parent.mkdir(parents=True, exist_ok=True)
def _market_accuracy(frame: pd.DataFrame, target_col: str) -> float:
if target_col not in frame.columns or frame.empty:
return 0.0
counts = frame[target_col].value_counts(normalize=True)
if counts.empty:
return 0.0
return round(float(counts.max()), 4)
def main() -> None:
train_csv = DATA_DIR / "train.csv"
validation_csv = DATA_DIR / "validation.csv"
if not train_csv.exists() or not validation_csv.exists():
raise SystemExit("Run extract_training_data_v26.py first")
train_df = pd.read_csv(train_csv)
validation_df = pd.read_csv(validation_csv)
config = json.loads(CONFIG_PATH.read_text(encoding="utf-8"))
report = {
"version": config.get("version"),
"calibration_version": config.get("calibration_version"),
"train_rows": int(len(train_df)),
"validation_rows": int(len(validation_df)),
"label_priors": {
"MS": _market_accuracy(validation_df, "label_ms"),
"OU25": _market_accuracy(validation_df, "label_ou25"),
"BTTS": _market_accuracy(validation_df, "label_btts"),
"HT": _market_accuracy(validation_df, "label_ht_result"),
"HTFT": _market_accuracy(validation_df, "label_ht_ft"),
"CARDS": _market_accuracy(validation_df, "label_cards_ou45"),
},
"artifact_path": str(CONFIG_PATH),
"notes": [
"v26.shadow runtime currently uses artifact-based calibration and ROI gating",
"market profile JSON remains the source of truth for runtime thresholds",
],
}
REPORT_PATH.write_text(json.dumps(report, indent=2), encoding="utf-8")
print(f"[OK] Shadow training report written to {REPORT_PATH}")
if __name__ == "__main__":
main()