gg3
Deploy Iddaai Backend / build-and-deploy (push) Successful in 35s

This commit is contained in:
2026-06-05 00:36:24 +03:00
parent b9700f9fda
commit 9e41407cb5
10 changed files with 1683 additions and 0 deletions
+113
View File
@@ -0,0 +1,113 @@
"""
Betting Policy — the honest, leak-free strategy the data actually supports.
==========================================================================
Everything else in this repo bet UNDERDOGS (odds 6-7.5) and lost (-43.7% live).
The data says the opposite: the only positive, fold-consistent, model-driven
signal is MILD FAVOURITES the model rates above the market price.
POLICY (MS / 1X2 only):
* leak-free model (drops the result-encoding features, see LEAKY)
* bet the model's single biggest value edge (model_prob - implied) ...
* ONLY if the picked side's odds are in [--lo, --hi] (favourite band)
* ONLY if that edge > --margin
* flat 1u stake, one bet per match, never a longshot, never a parlay.
Walk-forward, no leakage. Reports the policy ROI, fold consistency, drawdown,
and the model-free baseline (blind favourite) so you can see the model's lift.
⚠️ HONEST CAVEAT: CSV odds are a static capture, not the verified obtainable
closing line. A small backtest edge here is a LEAD, not a guarantee. Forward
paper-trade with real CLV (capture_closing_odds.py) before risking money.
Usage: python scripts/betting_policy.py --lo 1.5 --hi 2.2 --margin 0.0 --folds 8
"""
from __future__ import annotations
import argparse, os, sys
import numpy as np, pandas as pd, xgboost as xgb
if sys.stdout and hasattr(sys.stdout, "reconfigure"):
try: sys.stdout.reconfigure(encoding="utf-8")
except Exception: pass
AI_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
CSV = os.path.join(AI_DIR, "data", "training_data_v27.csv")
META = {"match_id","home_team_id","away_team_id","league_id","mst_utc",
"score_home","score_away","ht_score_home","ht_score_away"}
LEAKY = {"home_goals_form","away_goals_form","total_goals","ht_total_goals",
"squad_diff","home_squad_quality","away_squad_quality",
"referee_home_bias","referee_avg_goals"}
def main():
ap = argparse.ArgumentParser(description=__doc__)
ap.add_argument("--lo", type=float, default=1.5)
ap.add_argument("--hi", type=float, default=2.2)
ap.add_argument("--margin", type=float, default=0.0)
ap.add_argument("--folds", type=int, default=8)
ap.add_argument("--estimators", type=int, default=250)
args = ap.parse_args()
df = pd.read_csv(CSV, low_memory=False).sort_values("mst_utc").reset_index(drop=True)
sh = pd.to_numeric(df["score_home"], errors="coerce")
sa = pd.to_numeric(df["score_away"], errors="coerce")
ok = sh.notna() & sa.notna()
df, sh, sa = df[ok].reset_index(drop=True), sh[ok.values].values, sa[ok.values].values
y = np.where(sh > sa, 0, np.where(sh == sa, 1, 2))
O = df[["odds_ms_h","odds_ms_d","odds_ms_a"]].apply(pd.to_numeric, errors="coerce").fillna(0.0).values
feats = [c for c in df.columns if c not in META and not c.startswith("label_") and c not in LEAKY]
X = df[feats].apply(pd.to_numeric, errors="coerce").fillna(0.0).values
n = len(df); start = int(n*0.5)
bounds = np.linspace(start, n, args.folds+1, dtype=int)
params = {"objective":"multi:softprob","num_class":3,"max_depth":5,"eta":0.05,
"subsample":0.8,"colsample_bytree":0.8,"tree_method":"hist","verbosity":0}
print(f"POLICY: favourite band [{args.lo},{args.hi}] margin {args.margin} "
f"leak-free feats={len(feats)} folds={args.folds}\n")
all_pnl=[]; fold_rows=[]; base_pnl=[]
for fi in range(args.folds):
te0,te1 = bounds[fi], bounds[fi+1]
if te1-te0 < 50: continue
bst = xgb.train(params, xgb.DMatrix(X[:te0], label=y[:te0]), num_boost_round=args.estimators)
P = bst.predict(xgb.DMatrix(X[te0:te1]))
yte, Ote = y[te0:te1], O[te0:te1]
implied = np.where(Ote>1.0, 1.0/Ote, np.nan)
edge = np.where(np.isnan(implied), -9.0, P-implied)
pick = edge.argmax(1); pe = edge[np.arange(len(yte)),pick]; po = Ote[np.arange(len(yte)),pick]
bet = (pe>args.margin) & (po>=args.lo) & (po<args.hi)
win = (pick==yte)&bet
pnl = np.where(win, po-1.0, -1.0)[bet]
# model-free baseline: blind favourite in same band
fav=Ote.argmin(1); fo=Ote[np.arange(len(yte)),fav]
bmask=(fo>=args.lo)&(fo<args.hi)&(Ote>1.0).all(1)
bpnl=np.where(fav[bmask]==yte[bmask], fo[bmask]-1.0, -1.0)
roi = 100*pnl.sum()/len(pnl) if len(pnl) else float('nan')
broi= 100*bpnl.sum()/len(bpnl) if len(bpnl) else float('nan')
fold_rows.append((fi, len(pnl), 100*win.sum()/max(bet.sum(),1), roi, broi))
all_pnl.extend(pnl.tolist()); base_pnl.extend(bpnl.tolist())
print(f" fold {fi}: policy_bets={len(pnl):>4} hit={100*win.sum()/max(bet.sum(),1):>5.1f}% "
f"ROI={roi:>7.2f}% | baseline(blind fav) ROI={broi:>7.2f}%")
a=np.array(all_pnl); b=np.array(base_pnl)
print("\n"+"="*70)
print("AGGREGATE")
print("="*70)
if len(a):
cum=np.cumsum(a); peak=np.maximum.accumulate(cum); dd=(cum-peak).min()
folds_pos=sum(1 for r in fold_rows if r[3]>0)
print(f" POLICY: bets={len(a):>5} hit={100*(a>0).mean():.1f}% "
f"ROI={100*a.mean():+.2f}% net={a.sum():+.1f}u maxDD={dd:.1f}u "
f"folds+={folds_pos}/{len(fold_rows)}")
if len(b):
print(f" BASELINE: bets={len(b):>5} hit={100*(b>0).mean():.1f}% "
f"ROI={100*b.mean():+.2f}% (blind favourite, same band)")
if len(a):
print(f"\n MODEL LIFT over blind favourite: "
f"{100*a.mean()-100*b.mean():+.1f} percentage points")
print("\nREAD: a believable system has ROI>0, folds+ near full, tolerable maxDD,")
print("and clearly beats the blind-favourite baseline. Even then it's a LEAD —")
print("forward paper-trade with real CLV before staking real money.")
if __name__ == "__main__":
main()