@@ -0,0 +1,113 @@
|
||||
"""
|
||||
Betting Policy — the honest, leak-free strategy the data actually supports.
|
||||
==========================================================================
|
||||
Everything else in this repo bet UNDERDOGS (odds 6-7.5) and lost (-43.7% live).
|
||||
The data says the opposite: the only positive, fold-consistent, model-driven
|
||||
signal is MILD FAVOURITES the model rates above the market price.
|
||||
|
||||
POLICY (MS / 1X2 only):
|
||||
* leak-free model (drops the result-encoding features, see LEAKY)
|
||||
* bet the model's single biggest value edge (model_prob - implied) ...
|
||||
* ONLY if the picked side's odds are in [--lo, --hi] (favourite band)
|
||||
* ONLY if that edge > --margin
|
||||
* flat 1u stake, one bet per match, never a longshot, never a parlay.
|
||||
|
||||
Walk-forward, no leakage. Reports the policy ROI, fold consistency, drawdown,
|
||||
and the model-free baseline (blind favourite) so you can see the model's lift.
|
||||
|
||||
⚠️ HONEST CAVEAT: CSV odds are a static capture, not the verified obtainable
|
||||
closing line. A small backtest edge here is a LEAD, not a guarantee. Forward
|
||||
paper-trade with real CLV (capture_closing_odds.py) before risking money.
|
||||
|
||||
Usage: python scripts/betting_policy.py --lo 1.5 --hi 2.2 --margin 0.0 --folds 8
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import argparse, os, sys
|
||||
import numpy as np, pandas as pd, xgboost as xgb
|
||||
|
||||
if sys.stdout and hasattr(sys.stdout, "reconfigure"):
|
||||
try: sys.stdout.reconfigure(encoding="utf-8")
|
||||
except Exception: pass
|
||||
|
||||
AI_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
CSV = os.path.join(AI_DIR, "data", "training_data_v27.csv")
|
||||
META = {"match_id","home_team_id","away_team_id","league_id","mst_utc",
|
||||
"score_home","score_away","ht_score_home","ht_score_away"}
|
||||
LEAKY = {"home_goals_form","away_goals_form","total_goals","ht_total_goals",
|
||||
"squad_diff","home_squad_quality","away_squad_quality",
|
||||
"referee_home_bias","referee_avg_goals"}
|
||||
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser(description=__doc__)
|
||||
ap.add_argument("--lo", type=float, default=1.5)
|
||||
ap.add_argument("--hi", type=float, default=2.2)
|
||||
ap.add_argument("--margin", type=float, default=0.0)
|
||||
ap.add_argument("--folds", type=int, default=8)
|
||||
ap.add_argument("--estimators", type=int, default=250)
|
||||
args = ap.parse_args()
|
||||
|
||||
df = pd.read_csv(CSV, low_memory=False).sort_values("mst_utc").reset_index(drop=True)
|
||||
sh = pd.to_numeric(df["score_home"], errors="coerce")
|
||||
sa = pd.to_numeric(df["score_away"], errors="coerce")
|
||||
ok = sh.notna() & sa.notna()
|
||||
df, sh, sa = df[ok].reset_index(drop=True), sh[ok.values].values, sa[ok.values].values
|
||||
y = np.where(sh > sa, 0, np.where(sh == sa, 1, 2))
|
||||
O = df[["odds_ms_h","odds_ms_d","odds_ms_a"]].apply(pd.to_numeric, errors="coerce").fillna(0.0).values
|
||||
feats = [c for c in df.columns if c not in META and not c.startswith("label_") and c not in LEAKY]
|
||||
X = df[feats].apply(pd.to_numeric, errors="coerce").fillna(0.0).values
|
||||
|
||||
n = len(df); start = int(n*0.5)
|
||||
bounds = np.linspace(start, n, args.folds+1, dtype=int)
|
||||
params = {"objective":"multi:softprob","num_class":3,"max_depth":5,"eta":0.05,
|
||||
"subsample":0.8,"colsample_bytree":0.8,"tree_method":"hist","verbosity":0}
|
||||
|
||||
print(f"POLICY: favourite band [{args.lo},{args.hi}] margin {args.margin} "
|
||||
f"leak-free feats={len(feats)} folds={args.folds}\n")
|
||||
all_pnl=[]; fold_rows=[]; base_pnl=[]
|
||||
for fi in range(args.folds):
|
||||
te0,te1 = bounds[fi], bounds[fi+1]
|
||||
if te1-te0 < 50: continue
|
||||
bst = xgb.train(params, xgb.DMatrix(X[:te0], label=y[:te0]), num_boost_round=args.estimators)
|
||||
P = bst.predict(xgb.DMatrix(X[te0:te1]))
|
||||
yte, Ote = y[te0:te1], O[te0:te1]
|
||||
implied = np.where(Ote>1.0, 1.0/Ote, np.nan)
|
||||
edge = np.where(np.isnan(implied), -9.0, P-implied)
|
||||
pick = edge.argmax(1); pe = edge[np.arange(len(yte)),pick]; po = Ote[np.arange(len(yte)),pick]
|
||||
bet = (pe>args.margin) & (po>=args.lo) & (po<args.hi)
|
||||
win = (pick==yte)&bet
|
||||
pnl = np.where(win, po-1.0, -1.0)[bet]
|
||||
# model-free baseline: blind favourite in same band
|
||||
fav=Ote.argmin(1); fo=Ote[np.arange(len(yte)),fav]
|
||||
bmask=(fo>=args.lo)&(fo<args.hi)&(Ote>1.0).all(1)
|
||||
bpnl=np.where(fav[bmask]==yte[bmask], fo[bmask]-1.0, -1.0)
|
||||
roi = 100*pnl.sum()/len(pnl) if len(pnl) else float('nan')
|
||||
broi= 100*bpnl.sum()/len(bpnl) if len(bpnl) else float('nan')
|
||||
fold_rows.append((fi, len(pnl), 100*win.sum()/max(bet.sum(),1), roi, broi))
|
||||
all_pnl.extend(pnl.tolist()); base_pnl.extend(bpnl.tolist())
|
||||
print(f" fold {fi}: policy_bets={len(pnl):>4} hit={100*win.sum()/max(bet.sum(),1):>5.1f}% "
|
||||
f"ROI={roi:>7.2f}% | baseline(blind fav) ROI={broi:>7.2f}%")
|
||||
|
||||
a=np.array(all_pnl); b=np.array(base_pnl)
|
||||
print("\n"+"="*70)
|
||||
print("AGGREGATE")
|
||||
print("="*70)
|
||||
if len(a):
|
||||
cum=np.cumsum(a); peak=np.maximum.accumulate(cum); dd=(cum-peak).min()
|
||||
folds_pos=sum(1 for r in fold_rows if r[3]>0)
|
||||
print(f" POLICY: bets={len(a):>5} hit={100*(a>0).mean():.1f}% "
|
||||
f"ROI={100*a.mean():+.2f}% net={a.sum():+.1f}u maxDD={dd:.1f}u "
|
||||
f"folds+={folds_pos}/{len(fold_rows)}")
|
||||
if len(b):
|
||||
print(f" BASELINE: bets={len(b):>5} hit={100*(b>0).mean():.1f}% "
|
||||
f"ROI={100*b.mean():+.2f}% (blind favourite, same band)")
|
||||
if len(a):
|
||||
print(f"\n MODEL LIFT over blind favourite: "
|
||||
f"{100*a.mean()-100*b.mean():+.1f} percentage points")
|
||||
print("\nREAD: a believable system has ROI>0, folds+ near full, tolerable maxDD,")
|
||||
print("and clearly beats the blind-favourite baseline. Even then it's a LEAD —")
|
||||
print("forward paper-trade with real CLV before staking real money.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user