@@ -0,0 +1,154 @@
|
||||
"""
|
||||
Generate Daily Picks — the serving picker for the validated favourite policy.
|
||||
============================================================================
|
||||
Loads the saved leak-free MS model (models/favorite_v1) and applies the
|
||||
favourite-band value policy to a set of matches, emitting the day's STAKED
|
||||
picks and logging them for forward paper-trade settlement.
|
||||
|
||||
Train/serve consistency: features MUST come from the SAME extractor that built
|
||||
training_data_v27.csv. Production path = run the extractor nightly INCLUDING
|
||||
upcoming (status NS) matches, then point this script at that CSV. Demo path =
|
||||
use the tail of the training CSV as stand-in "today" matches (with the real
|
||||
result shown, since those are settled).
|
||||
|
||||
Policy: bet the MS side with the biggest model_prob - implied edge, ONLY if
|
||||
odds in [--lo,--hi] and edge>--margin. Flat 1u. No longshots, no parlays.
|
||||
Non-MS markets are NOT staked (efficient -> model error). One bet per match.
|
||||
|
||||
Usage:
|
||||
python scripts/generate_daily_picks.py --demo --n 20 # see it work now
|
||||
python scripts/generate_daily_picks.py --features today.csv # production
|
||||
python scripts/generate_daily_picks.py --settle # settle paper log
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import argparse, json, os, sys, datetime
|
||||
import numpy as np, pandas as pd, xgboost as xgb
|
||||
|
||||
if sys.stdout and hasattr(sys.stdout, "reconfigure"):
|
||||
try: sys.stdout.reconfigure(encoding="utf-8")
|
||||
except Exception: pass
|
||||
|
||||
AI_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
MODEL_DIR = os.path.join(AI_DIR, "models", "favorite_v1")
|
||||
TRAIN_CSV = os.path.join(AI_DIR, "data", "training_data_v27.csv")
|
||||
PAPER_LOG = os.path.join(AI_DIR, "data", "paper_trades.csv")
|
||||
MS_ODDS = ["odds_ms_h", "odds_ms_d", "odds_ms_a"]
|
||||
MS_PICKS = ["1", "X", "2"]
|
||||
|
||||
|
||||
def load_model():
|
||||
bst = xgb.Booster(); bst.load_model(os.path.join(MODEL_DIR, "model.json"))
|
||||
with open(os.path.join(MODEL_DIR, "feature_cols.json"), encoding="utf-8") as f:
|
||||
feats = json.load(f)
|
||||
with open(os.path.join(MODEL_DIR, "metadata.json"), encoding="utf-8") as f:
|
||||
meta = json.load(f)
|
||||
return bst, feats, meta
|
||||
|
||||
|
||||
def pick_for_rows(df, bst, feats, lo, hi, margin):
|
||||
X = df.reindex(columns=feats).apply(pd.to_numeric, errors="coerce").fillna(0.0).values
|
||||
P = bst.predict(xgb.DMatrix(X)) # [n,3] home/draw/away
|
||||
O = df[MS_ODDS].apply(pd.to_numeric, errors="coerce").fillna(0.0).values
|
||||
implied = np.where(O > 1.0, 1.0/O, np.nan)
|
||||
edge = np.where(np.isnan(implied), -9.0, P - implied)
|
||||
out = []
|
||||
for i in range(len(df)):
|
||||
k = int(np.argmax(edge[i])); o = float(O[i, k]); e = float(edge[i, k])
|
||||
staked = (e > margin) and (lo <= o < hi)
|
||||
out.append({"idx": i, "pick": MS_PICKS[k], "odds": round(o, 2),
|
||||
"model_prob": round(float(P[i, k]), 4), "edge": round(e, 4),
|
||||
"staked": staked})
|
||||
return out
|
||||
|
||||
|
||||
def settle():
|
||||
if not os.path.exists(PAPER_LOG):
|
||||
print("No paper_trades.csv yet."); return
|
||||
pt = pd.read_csv(PAPER_LOG)
|
||||
open_bets = pt[pt["result"].isna()] if "result" in pt.columns else pt
|
||||
if open_bets.empty:
|
||||
print("No open bets to settle.");
|
||||
# settle from training CSV scores if present, else needs DB (left as note)
|
||||
src = pd.read_csv(TRAIN_CSV, low_memory=False, usecols=["match_id","score_home","score_away"])
|
||||
sc = src.set_index("match_id")
|
||||
def res(row):
|
||||
if not pd.isna(row.get("result")): return row["result"]
|
||||
m = sc.index == row["match_id"]
|
||||
if not m.any(): return np.nan
|
||||
r = sc[m].iloc[0]; sh, sa = r["score_home"], r["score_away"]
|
||||
if pd.isna(sh): return np.nan
|
||||
outcome = "1" if sh > sa else ("X" if sh == sa else "2")
|
||||
won = (str(row["pick"]) == outcome)
|
||||
return "WON" if won else "LOST"
|
||||
pt["result"] = pt.apply(res, axis=1)
|
||||
pt["pnl"] = pt.apply(lambda r: (r["odds"]-1.0) if r["result"]=="WON"
|
||||
else (-1.0 if r["result"]=="LOST" else np.nan), axis=1)
|
||||
pt.to_csv(PAPER_LOG, index=False)
|
||||
s = pt.dropna(subset=["pnl"])
|
||||
if len(s):
|
||||
print(f"Settled {len(s)} bets: hit={100*(s['result']=='WON').mean():.1f}% "
|
||||
f"ROI={100*s['pnl'].sum()/len(s):+.2f}% net={s['pnl'].sum():+.1f}u")
|
||||
return
|
||||
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser(description=__doc__)
|
||||
ap.add_argument("--features", help="CSV of upcoming matches in training schema")
|
||||
ap.add_argument("--demo", action="store_true", help="use tail of training CSV as 'today'")
|
||||
ap.add_argument("--n", type=int, default=20)
|
||||
ap.add_argument("--lo", type=float, default=1.5)
|
||||
ap.add_argument("--hi", type=float, default=2.2)
|
||||
ap.add_argument("--margin", type=float, default=0.03)
|
||||
ap.add_argument("--settle", action="store_true")
|
||||
ap.add_argument("--log", action="store_true", help="append staked picks to paper_trades.csv")
|
||||
args = ap.parse_args()
|
||||
|
||||
if args.settle:
|
||||
settle(); return
|
||||
|
||||
bst, feats, meta = load_model()
|
||||
print(f"Model {meta['version']} (trained {meta['trained_at']}, holdout "
|
||||
f"ROI {meta['holdout_eval']['roi_pct']}%) band[{args.lo},{args.hi}] margin {args.margin}\n")
|
||||
|
||||
if args.features:
|
||||
df = pd.read_csv(args.features, low_memory=False)
|
||||
demo = False
|
||||
else:
|
||||
df = pd.read_csv(TRAIN_CSV, low_memory=False).sort_values("mst_utc").tail(args.n).reset_index(drop=True)
|
||||
demo = True
|
||||
print("(DEMO: last matches of training CSV as stand-in for today)\n")
|
||||
|
||||
picks = pick_for_rows(df, bst, feats, args.lo, args.hi, args.margin)
|
||||
staked = [p for p in picks if p["staked"]]
|
||||
print(f"{len(df)} matches scanned -> {len(staked)} STAKED MS picks\n")
|
||||
print(f" {'match_id':<28}{'pick':>5}{'odds':>7}{'model%':>8}{'edge%':>7}" + (" result" if demo else ""))
|
||||
print(" "+"-"*60)
|
||||
log_rows = []
|
||||
for p in picks:
|
||||
if not p["staked"]: continue
|
||||
r = df.iloc[p["idx"]]; mid = str(r["match_id"])
|
||||
res = ""
|
||||
if demo:
|
||||
sh, sa = r.get("score_home"), r.get("score_away")
|
||||
if pd.notna(sh):
|
||||
out = "1" if sh>sa else ("X" if sh==sa else "2")
|
||||
res = " WON" if p["pick"]==out else " lost"
|
||||
print(f" {mid:<28}{p['pick']:>5}{p['odds']:>7.2f}{100*p['model_prob']:>8.1f}{100*p['edge']:>+7.1f}{res}")
|
||||
log_rows.append({"logged_at": datetime.datetime.now().isoformat(timespec="seconds"),
|
||||
"match_id": mid, "market": "MS", "pick": p["pick"], "odds": p["odds"],
|
||||
"model_prob": p["model_prob"], "edge": p["edge"], "stake": 1.0,
|
||||
"result": np.nan, "pnl": np.nan})
|
||||
if args.log and log_rows and not demo:
|
||||
new = pd.DataFrame(log_rows)
|
||||
if os.path.exists(PAPER_LOG):
|
||||
new = pd.concat([pd.read_csv(PAPER_LOG), new], ignore_index=True)
|
||||
new.to_csv(PAPER_LOG, index=False)
|
||||
print(f"\n logged {len(log_rows)} picks -> {PAPER_LOG}")
|
||||
elif args.log and demo:
|
||||
print("\n (--log ignored in --demo; only real upcoming picks are logged)")
|
||||
print("\nReminder: paper-trade only. Stake real money after weeks of forward")
|
||||
print("CLV>0 + ROI>0 (settle with --settle, check scoreboard/clv_report).")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user