"""
Train Favorite-Policy Model (v1) — leak-free MS model for the validated strategy.
================================================================================
Trains a LEAK-FREE 1X2 model (drops the result-encoding columns) and saves it
plus the feature list and policy metadata. This is the brain of the new system;
the favourite-band value policy (odds ~1.5-2.2, model_prob>implied, flat stake)
is applied on top of its probabilities at serving time.

Honest holdout: trains on the first --holdout-frac of history, evaluates the
EXACT policy on the most recent slice (never seen in training), then retrains
on ALL history for the saved production artifact.

Saves to models/favorite_v1/: model.json, feature_cols.json, metadata.json

Usage: python scripts/train_favorite_model.py
"""
from __future__ import annotations
import argparse, json, os, sys, datetime
import numpy as np, pandas as pd, xgboost as xgb

if sys.stdout and hasattr(sys.stdout, "reconfigure"):
    try: sys.stdout.reconfigure(encoding="utf-8")
    except Exception: pass

AI_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
CSV = os.path.join(AI_DIR, "data", "training_data_v27.csv")
OUT = os.path.join(AI_DIR, "models", "favorite_v1")

META = {"match_id","home_team_id","away_team_id","league_id","mst_utc",
        "score_home","score_away","ht_score_home","ht_score_away"}
# Result-encoding leakage — never feed these to the model (train OR serve).
LEAKY = {"home_goals_form","away_goals_form","total_goals","ht_total_goals",
         "squad_diff","home_squad_quality","away_squad_quality",
         "referee_home_bias","referee_avg_goals"}

PARAMS = {"objective":"multi:softprob","num_class":3,"max_depth":5,"eta":0.05,
          "subsample":0.8,"colsample_bytree":0.8,"tree_method":"hist","verbosity":0}


def policy_eval(P, y, O, lo, hi, margin):
    implied = np.where(O > 1.0, 1.0/O, np.nan)
    edge = np.where(np.isnan(implied), -9.0, P - implied)
    pick = edge.argmax(1); pe = edge[np.arange(len(y)), pick]; po = O[np.arange(len(y)), pick]
    bet = (pe > margin) & (po >= lo) & (po < hi)
    win = (pick == y) & bet
    pnl = np.where(win, po-1.0, -1.0)[bet]
    n = int(bet.sum())
    return {"bets": n, "hit_pct": round(100*win.sum()/max(n,1),1),
            "roi_pct": round(100*pnl.sum()/max(n,1),2), "net_u": round(float(pnl.sum()),1)}


def main():
    ap = argparse.ArgumentParser(description=__doc__)
    ap.add_argument("--lo", type=float, default=1.5)
    ap.add_argument("--hi", type=float, default=2.2)
    ap.add_argument("--margin", type=float, default=0.0)
    ap.add_argument("--holdout-frac", type=float, default=0.15)
    ap.add_argument("--estimators", type=int, default=300)
    args = ap.parse_args()

    print(f"Loading {CSV} ...")
    df = pd.read_csv(CSV, low_memory=False).sort_values("mst_utc").reset_index(drop=True)
    sh = pd.to_numeric(df["score_home"], errors="coerce")
    sa = pd.to_numeric(df["score_away"], errors="coerce")
    ok = sh.notna() & sa.notna()
    df, sh, sa = df[ok].reset_index(drop=True), sh[ok.values].values, sa[ok.values].values
    y = np.where(sh > sa, 0, np.where(sh == sa, 1, 2))
    O = df[["odds_ms_h","odds_ms_d","odds_ms_a"]].apply(pd.to_numeric, errors="coerce").fillna(0.0).values
    feats = [c for c in df.columns if c not in META and not c.startswith("label_") and c not in LEAKY]
    X = df[feats].apply(pd.to_numeric, errors="coerce").fillna(0.0).values
    print(f"  {len(df):,} rows, {len(feats)} leak-free features")

    # ── Honest holdout (last slice, never trained on) ──
    cut = int(len(df) * (1 - args.holdout_frac))
    bst = xgb.train(PARAMS, xgb.DMatrix(X[:cut], label=y[:cut]), num_boost_round=args.estimators)
    Ph = bst.predict(xgb.DMatrix(X[cut:]))
    acc = float((Ph.argmax(1) == y[cut:]).mean())
    hold = policy_eval(Ph, y[cut:], O[cut:], args.lo, args.hi, args.margin)
    print(f"\nHOLDOUT (last {args.holdout_frac:.0%}, {len(df)-cut:,} matches, never seen):")
    print(f"  MS accuracy: {acc*100:.1f}%")
    print(f"  POLICY band[{args.lo},{args.hi}] margin {args.margin}: {hold}")

    # ── Production model: retrain on ALL history ──
    print("\nTraining production model on ALL history ...")
    final = xgb.train(PARAMS, xgb.DMatrix(X, label=y), num_boost_round=args.estimators)
    os.makedirs(OUT, exist_ok=True)
    final.save_model(os.path.join(OUT, "model.json"))
    with open(os.path.join(OUT, "feature_cols.json"), "w", encoding="utf-8") as f:
        json.dump(feats, f, ensure_ascii=False, indent=2)
    meta = {
        "version": "favorite_v1",
        "trained_at": datetime.datetime.now().isoformat(timespec="seconds"),
        "market": "MS",
        "classes": {"0": "home(1)", "1": "draw(X)", "2": "away(2)"},
        "policy": {"odds_lo": args.lo, "odds_hi": args.hi, "margin": args.margin,
                   "stake": "flat 1u", "rule": "bet model's max value edge if picked odds in band",
                   "never": ["longshots odds>=hi", "parlays/combos"]},
        "n_train": len(df), "n_features": len(feats),
        "leaky_excluded": sorted(LEAKY),
        "holdout_eval": {"accuracy_pct": round(acc*100,1), **hold},
        "caveat": "CSV odds are a static capture, not verified closing. Forward paper-trade with real CLV before staking.",
    }
    with open(os.path.join(OUT, "metadata.json"), "w", encoding="utf-8") as f:
        json.dump(meta, f, ensure_ascii=False, indent=2)
    print(f"\n✅ Saved production model to {OUT}/")
    print(f"   model.json, feature_cols.json ({len(feats)} feats), metadata.json")
    print("\nNEXT: serving wrapper that loads this + applies the policy to upcoming")
    print("matches, logs paper-trade picks, and we measure real forward CLV/ROI.")


if __name__ == "__main__":
    main()