gg3

2026-06-05 00:36:24 +03:00
parent b9700f9fda
commit 9e41407cb5
10 changed files with 1683 additions and 0 deletions
@@ -0,0 +1,113 @@
+"""
+Betting Policy — the honest, leak-free strategy the data actually supports.
+==========================================================================
+Everything else in this repo bet UNDERDOGS (odds 6-7.5) and lost (-43.7% live).
+The data says the opposite: the only positive, fold-consistent, model-driven
+signal is MILD FAVOURITES the model rates above the market price.
+
+POLICY (MS / 1X2 only):
+  * leak-free model (drops the result-encoding features, see LEAKY)
+  * bet the model's single biggest value edge (model_prob - implied) ...
+  * ONLY if the picked side's odds are in [--lo, --hi]  (favourite band)
+  * ONLY if that edge > --margin
+  * flat 1u stake, one bet per match, never a longshot, never a parlay.
+
+Walk-forward, no leakage. Reports the policy ROI, fold consistency, drawdown,
+and the model-free baseline (blind favourite) so you can see the model's lift.
+
+⚠️ HONEST CAVEAT: CSV odds are a static capture, not the verified obtainable
+closing line. A small backtest edge here is a LEAD, not a guarantee. Forward
+paper-trade with real CLV (capture_closing_odds.py) before risking money.
+
+Usage: python scripts/betting_policy.py --lo 1.5 --hi 2.2 --margin 0.0 --folds 8
+"""
+from __future__ import annotations
+import argparse, os, sys
+import numpy as np, pandas as pd, xgboost as xgb
+
+if sys.stdout and hasattr(sys.stdout, "reconfigure"):
+    try: sys.stdout.reconfigure(encoding="utf-8")
+    except Exception: pass
+
+AI_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+CSV = os.path.join(AI_DIR, "data", "training_data_v27.csv")
+META = {"match_id","home_team_id","away_team_id","league_id","mst_utc",
+        "score_home","score_away","ht_score_home","ht_score_away"}
+LEAKY = {"home_goals_form","away_goals_form","total_goals","ht_total_goals",
+         "squad_diff","home_squad_quality","away_squad_quality",
+         "referee_home_bias","referee_avg_goals"}
+
+
+def main():
+    ap = argparse.ArgumentParser(description=__doc__)
+    ap.add_argument("--lo", type=float, default=1.5)
+    ap.add_argument("--hi", type=float, default=2.2)
+    ap.add_argument("--margin", type=float, default=0.0)
+    ap.add_argument("--folds", type=int, default=8)
+    ap.add_argument("--estimators", type=int, default=250)
+    args = ap.parse_args()
+
+    df = pd.read_csv(CSV, low_memory=False).sort_values("mst_utc").reset_index(drop=True)
+    sh = pd.to_numeric(df["score_home"], errors="coerce")
+    sa = pd.to_numeric(df["score_away"], errors="coerce")
+    ok = sh.notna() & sa.notna()
+    df, sh, sa = df[ok].reset_index(drop=True), sh[ok.values].values, sa[ok.values].values
+    y = np.where(sh > sa, 0, np.where(sh == sa, 1, 2))
+    O = df[["odds_ms_h","odds_ms_d","odds_ms_a"]].apply(pd.to_numeric, errors="coerce").fillna(0.0).values
+    feats = [c for c in df.columns if c not in META and not c.startswith("label_") and c not in LEAKY]
+    X = df[feats].apply(pd.to_numeric, errors="coerce").fillna(0.0).values
+
+    n = len(df); start = int(n*0.5)
+    bounds = np.linspace(start, n, args.folds+1, dtype=int)
+    params = {"objective":"multi:softprob","num_class":3,"max_depth":5,"eta":0.05,
+              "subsample":0.8,"colsample_bytree":0.8,"tree_method":"hist","verbosity":0}
+
+    print(f"POLICY: favourite band [{args.lo},{args.hi}]  margin {args.margin}  "
+          f"leak-free feats={len(feats)}  folds={args.folds}\n")
+    all_pnl=[]; fold_rows=[]; base_pnl=[]
+    for fi in range(args.folds):
+        te0,te1 = bounds[fi], bounds[fi+1]
+        if te1-te0 < 50: continue
+        bst = xgb.train(params, xgb.DMatrix(X[:te0], label=y[:te0]), num_boost_round=args.estimators)
+        P = bst.predict(xgb.DMatrix(X[te0:te1]))
+        yte, Ote = y[te0:te1], O[te0:te1]
+        implied = np.where(Ote>1.0, 1.0/Ote, np.nan)
+        edge = np.where(np.isnan(implied), -9.0, P-implied)
+        pick = edge.argmax(1); pe = edge[np.arange(len(yte)),pick]; po = Ote[np.arange(len(yte)),pick]
+        bet = (pe>args.margin) & (po>=args.lo) & (po<args.hi)
+        win = (pick==yte)&bet
+        pnl = np.where(win, po-1.0, -1.0)[bet]
+        # model-free baseline: blind favourite in same band
+        fav=Ote.argmin(1); fo=Ote[np.arange(len(yte)),fav]
+        bmask=(fo>=args.lo)&(fo<args.hi)&(Ote>1.0).all(1)
+        bpnl=np.where(fav[bmask]==yte[bmask], fo[bmask]-1.0, -1.0)
+        roi = 100*pnl.sum()/len(pnl) if len(pnl) else float('nan')
+        broi= 100*bpnl.sum()/len(bpnl) if len(bpnl) else float('nan')
+        fold_rows.append((fi, len(pnl), 100*win.sum()/max(bet.sum(),1), roi, broi))
+        all_pnl.extend(pnl.tolist()); base_pnl.extend(bpnl.tolist())
+        print(f"  fold {fi}: policy_bets={len(pnl):>4}  hit={100*win.sum()/max(bet.sum(),1):>5.1f}%  "
+              f"ROI={roi:>7.2f}%   | baseline(blind fav) ROI={broi:>7.2f}%")
+
+    a=np.array(all_pnl); b=np.array(base_pnl)
+    print("\n"+"="*70)
+    print("AGGREGATE")
+    print("="*70)
+    if len(a):
+        cum=np.cumsum(a); peak=np.maximum.accumulate(cum); dd=(cum-peak).min()
+        folds_pos=sum(1 for r in fold_rows if r[3]>0)
+        print(f"  POLICY:   bets={len(a):>5}  hit={100*(a>0).mean():.1f}%  "
+              f"ROI={100*a.mean():+.2f}%  net={a.sum():+.1f}u  maxDD={dd:.1f}u  "
+              f"folds+={folds_pos}/{len(fold_rows)}")
+    if len(b):
+        print(f"  BASELINE: bets={len(b):>5}  hit={100*(b>0).mean():.1f}%  "
+              f"ROI={100*b.mean():+.2f}%  (blind favourite, same band)")
+    if len(a):
+        print(f"\n  MODEL LIFT over blind favourite: "
+              f"{100*a.mean()-100*b.mean():+.1f} percentage points")
+    print("\nREAD: a believable system has ROI>0, folds+ near full, tolerable maxDD,")
+    print("and clearly beats the blind-favourite baseline. Even then it's a LEAD —")
+    print("forward paper-trade with real CLV before staking real money.")
+
+
+if __name__ == "__main__":
+    main()