gg3
Deploy Iddaai Backend / build-and-deploy (push) Successful in 35s

This commit is contained in:
2026-06-05 00:36:24 +03:00
parent b9700f9fda
commit 9e41407cb5
10 changed files with 1683 additions and 0 deletions
+182
View File
@@ -0,0 +1,182 @@
"""
Multi-Market Edge + Best-Bet Selector — pick the best value bet PER MATCH
========================================================================
Not "play the handed main_pick". For each match, score EVERY market the model
covers, compare model prob vs market implied, and select the single best VALUE
bet across all markets. Leak-free, walk-forward, honest.
Markets (truth derived from scores, not trusted labels):
MS(1X2), HT-result, OU0.5/1.5/2.5/3.5, HT_OU0.5/1.5, BTTS.
Outputs:
(A) per-market value ROI -> which bet types actually carry edge
(B) cross-market SELECTOR -> best value bet per match, with odds-band filter,
fold-consistency, and the model-free baseline.
⚠️ CSV odds are a static capture, not verified closing. Positive = LEAD; forward
paper-trade with real CLV before staking.
Usage: python scripts/multi_market_edge.py --folds 5 --lo 1.5 --hi 2.6 --margin 0.03
"""
from __future__ import annotations
import argparse, os, sys
import numpy as np, pandas as pd, xgboost as xgb
if sys.stdout and hasattr(sys.stdout, "reconfigure"):
try: sys.stdout.reconfigure(encoding="utf-8")
except Exception: pass
AI_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
CSV = os.path.join(AI_DIR, "data", "training_data_v27.csv")
META = {"match_id","home_team_id","away_team_id","league_id","mst_utc",
"score_home","score_away","ht_score_home","ht_score_away"}
LEAKY = {"home_goals_form","away_goals_form","total_goals","ht_total_goals",
"squad_diff","home_squad_quality","away_squad_quality",
"referee_home_bias","referee_avg_goals"}
# market -> (kind, [odds_cols aligned to classes], truth_fn(sh,sa,hh,ha)->class idx or None)
def ou(line): return lambda sh,sa,hh,ha: (0 if (sh+sa) > line else 1) # 0=Over,1=Under
def htou(line): return lambda sh,sa,hh,ha: (None if np.isnan(hh) else (0 if (hh+ha) > line else 1))
def ms_truth(sh,sa,hh,ha): return 0 if sh>sa else (1 if sh==sa else 2)
def ht_truth(sh,sa,hh,ha): return None if np.isnan(hh) else (0 if hh>ha else (1 if hh==ha else 2))
def btts_truth(sh,sa,hh,ha): return 0 if (sh>0 and sa>0) else 1 # 0=Yes,1=No
MARKETS = {
"MS": ("multi", ["odds_ms_h","odds_ms_d","odds_ms_a"], ["1","X","2"], ms_truth),
"HT": ("multi", ["odds_ht_ms_h","odds_ht_ms_d","odds_ht_ms_a"], ["1","X","2"], ht_truth),
"OU05": ("binary", ["odds_ou05_o","odds_ou05_u"], ["Üst","Alt"], ou(0.5)),
"OU15": ("binary", ["odds_ou15_o","odds_ou15_u"], ["Üst","Alt"], ou(1.5)),
"OU25": ("binary", ["odds_ou25_o","odds_ou25_u"], ["Üst","Alt"], ou(2.5)),
"OU35": ("binary", ["odds_ou35_o","odds_ou35_u"], ["Üst","Alt"], ou(3.5)),
"HT_OU05": ("binary", ["odds_ht_ou05_o","odds_ht_ou05_u"], ["Üst","Alt"], htou(0.5)),
"HT_OU15": ("binary", ["odds_ht_ou15_o","odds_ht_ou15_u"], ["Üst","Alt"], htou(1.5)),
"BTTS": ("binary", ["odds_btts_y","odds_btts_n"], ["Var","Yok"], btts_truth),
}
PARAMS_M = {"objective":"multi:softprob","num_class":3,"max_depth":5,"eta":0.05,
"subsample":0.8,"colsample_bytree":0.8,"tree_method":"hist","verbosity":0}
PARAMS_B = {"objective":"binary:logistic","max_depth":5,"eta":0.05,
"subsample":0.8,"colsample_bytree":0.8,"tree_method":"hist","verbosity":0}
def main():
ap = argparse.ArgumentParser(description=__doc__)
ap.add_argument("--folds", type=int, default=5)
ap.add_argument("--estimators", type=int, default=150)
ap.add_argument("--lo", type=float, default=1.5)
ap.add_argument("--hi", type=float, default=2.6)
ap.add_argument("--margin", type=float, default=0.03)
args = ap.parse_args()
df = pd.read_csv(CSV, low_memory=False).sort_values("mst_utc").reset_index(drop=True)
sh = pd.to_numeric(df["score_home"], errors="coerce")
sa = pd.to_numeric(df["score_away"], errors="coerce")
ok = sh.notna() & sa.notna()
df = df[ok].reset_index(drop=True)
SH = sh[ok.values].values.astype(float); SA = sa[ok.values].values.astype(float)
HH = pd.to_numeric(df["ht_score_home"], errors="coerce").values.astype(float)
HA = pd.to_numeric(df["ht_score_away"], errors="coerce").values.astype(float)
feats = [c for c in df.columns if c not in META and not c.startswith("label_") and c not in LEAKY]
X = df[feats].apply(pd.to_numeric, errors="coerce").fillna(0.0).values
N = len(df)
print(f"{N:,} matches, {len(feats)} leak-free feats, {len(MARKETS)} markets, folds={args.folds}")
# precompute truth + odds per market
MK = {}
for mname,(kind,ocols,picks,tfn) in MARKETS.items():
if not all(c in df.columns for c in ocols):
print(f" skip {mname}: missing odds cols"); continue
O = df[ocols].apply(pd.to_numeric, errors="coerce").fillna(0.0).values
truth = np.array([tfn(SH[i],SA[i],HH[i],HA[i]) for i in range(N)], dtype=object)
MK[mname] = (kind, O, picks, truth)
start = int(N*0.5); bounds = np.linspace(start, N, args.folds+1, dtype=int)
# accumulators
per_market = {m: {"n":0,"pnl":0.0,"win":0} for m in MK} # (A) best value pick within market
sel = {"n":0,"pnl":0.0,"win":0,"fold":{}} # (B) cross-market selector
sel_by_mkt = {m: {"n":0,"pnl":0.0,"win":0} for m in MK}
for fi in range(args.folds):
te0,te1 = bounds[fi], bounds[fi+1]
if te1-te0 < 50: continue
idx = np.arange(te0,te1)
# train each market model on [:te0], predict test
cand = {} # market -> (P_matrix[n_test, n_picks], O_test, truth_test)
for m,(kind,O,picks,truth) in MK.items():
ytr_full = truth[:te0]
# mask invalid truth (e.g., HT markets with missing HT score)
valid_tr = np.array([v is not None for v in ytr_full])
if kind=="multi":
ytr = ytr_full[valid_tr].astype(int)
bst = xgb.train(PARAMS_M, xgb.DMatrix(X[:te0][valid_tr], label=ytr), num_boost_round=args.estimators)
P = bst.predict(xgb.DMatrix(X[te0:te1])) # [n,3]
else:
ytr = ytr_full[valid_tr].astype(int) # 0=positive,1=neg
pos = (ytr==0).astype(int)
bst = xgb.train(PARAMS_B, xgb.DMatrix(X[:te0][valid_tr], label=pos), num_boost_round=args.estimators)
ppos = bst.predict(xgb.DMatrix(X[te0:te1]))
P = np.column_stack([ppos, 1.0-ppos]) # [n,2] -> [pos,neg]
cand[m] = (P, O[te0:te1], truth[te0:te1])
# iterate test matches
for j in range(te1-te0):
best = None # (edge, market, pickidx, odds, won)
for m,(P,Ot,Tt) in cand.items():
t = Tt[j]
if t is None: continue
probs = P[j]; odds = Ot[j]
for k in range(len(probs)):
o = odds[k]
if o <= 1.0: continue
edge = probs[k] - 1.0/o
won = int(t==k)
# (A) per-market: track best value pick in this market (any band, edge>margin)
if edge > args.margin:
d = per_market[m]
# only count the market's single best pick per match
# collect for selector if in band + margin
if edge > args.margin and args.lo <= o < args.hi:
if best is None or edge > best[0]:
best = (edge, m, k, o, won)
# per-market best pick (separate loop for clean per-market ROI in band)
bestk=None
for k in range(len(probs)):
o=odds[k]
if o<=1.0: continue
e=probs[k]-1.0/o
if e>args.margin and args.lo<=o<args.hi and (bestk is None or e>bestk[0]):
bestk=(e,k,o,int(t==k))
if bestk is not None:
e,k,o,won = bestk
pnl = (o-1.0) if won else -1.0
d=per_market[m]; d["n"]+=1; d["pnl"]+=pnl; d["win"]+=won
# selector: single best value bet across all markets for this match
if best is not None:
edge,m,k,o,won = best
pnl = (o-1.0) if won else -1.0
sel["n"]+=1; sel["pnl"]+=pnl; sel["win"]+=won
sel["fold"][fi] = sel["fold"].get(fi,0.0)+pnl
d=sel_by_mkt[m]; d["n"]+=1; d["pnl"]+=pnl; d["win"]+=won
print(f" fold {fi}: tested {te1-te0:,}")
def line(name,d):
n=d["n"]; roi=100*d["pnl"]/n if n else float('nan'); hit=100*d["win"]/n if n else float('nan')
return f" {name:<10} bets={n:>6} hit={hit:>5.1f}% ROI={roi:>7.2f}% net={d['pnl']:>7.1f}u"
print("\n"+"="*70); print(f"(A) PER-MARKET value ROI (best value pick in band [{args.lo},{args.hi}], margin {args.margin})"); print("="*70)
for m in sorted(per_market, key=lambda x:-(100*per_market[x]['pnl']/per_market[x]['n'] if per_market[x]['n'] else -99)):
print(line(m, per_market[m]))
print("\n"+"="*70); print("(B) CROSS-MARKET SELECTOR (best value bet per match, all markets)"); print("="*70)
print(line("SELECTOR", sel))
folds_pos = sum(1 for v in sel["fold"].values() if v>0)
print(f" folds positive: {folds_pos}/{len(sel['fold'])}")
print(" selector picks distributed across markets:")
for m in sorted(sel_by_mkt, key=lambda x:-sel_by_mkt[x]['n']):
if sel_by_mkt[m]["n"]>0: print(" "+line(m, sel_by_mkt[m]).strip())
print("\nREAD: a market/selector is a LEAD only if ROI>0, folds consistent, n large.")
print("Forward-validate with CLV before staking. Static CSV odds may overstate edge.")
if __name__ == "__main__":
main()