@@ -0,0 +1,137 @@
|
||||
"""
|
||||
Analyze Match v2 — the per-match multi-market value board + disciplined pick.
|
||||
===========================================================================
|
||||
Answers "for ONE match, show every bet type's probability + model signal +
|
||||
market-vs-model value, and pick the right bet." Leak-free models.
|
||||
|
||||
KEY HONEST RULE (proven by multi_market_edge.py): compute & SHOW value for all
|
||||
markets, but only MS (1X2) carries real, fold-consistent model edge. In OU/HT/
|
||||
BTTS the market is efficient — a big model-vs-market gap there is the MODEL'S
|
||||
ERROR, not value. So non-MS rows are INFO-ONLY; only an MS value bet in the
|
||||
favourite band is STAKED.
|
||||
|
||||
Demo: trains all market models on the first 85% of history, then prints the full
|
||||
board for sample matches in the unseen last 15% (with what actually happened).
|
||||
|
||||
Usage:
|
||||
python scripts/analyze_match_v2.py --n 6
|
||||
python scripts/analyze_match_v2.py --match <match_id>
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import argparse, os, sys
|
||||
import numpy as np, pandas as pd, xgboost as xgb
|
||||
|
||||
if sys.stdout and hasattr(sys.stdout, "reconfigure"):
|
||||
try: sys.stdout.reconfigure(encoding="utf-8")
|
||||
except Exception: pass
|
||||
|
||||
AI_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
CSV = os.path.join(AI_DIR, "data", "training_data_v27.csv")
|
||||
META = {"match_id","home_team_id","away_team_id","league_id","mst_utc",
|
||||
"score_home","score_away","ht_score_home","ht_score_away"}
|
||||
LEAKY = {"home_goals_form","away_goals_form","total_goals","ht_total_goals",
|
||||
"squad_diff","home_squad_quality","away_squad_quality",
|
||||
"referee_home_bias","referee_avg_goals"}
|
||||
STAKE_LO, STAKE_HI = 1.5, 2.4 # MS favourite band that staking is allowed in
|
||||
STAKE_MARGIN = 0.03
|
||||
|
||||
def ou(line): return lambda sh,sa,hh,ha: (0 if (sh+sa) > line else 1)
|
||||
def htou(line): return lambda sh,sa,hh,ha: (None if np.isnan(hh) else (0 if (hh+ha) > line else 1))
|
||||
MARKETS = {
|
||||
"MS": ("multi", ["odds_ms_h","odds_ms_d","odds_ms_a"], ["1","X","2"],
|
||||
lambda sh,sa,hh,ha: 0 if sh>sa else (1 if sh==sa else 2)),
|
||||
"OU25": ("binary",["odds_ou25_o","odds_ou25_u"], ["2.5Üst","2.5Alt"], ou(2.5)),
|
||||
"OU15": ("binary",["odds_ou15_o","odds_ou15_u"], ["1.5Üst","1.5Alt"], ou(1.5)),
|
||||
"OU35": ("binary",["odds_ou35_o","odds_ou35_u"], ["3.5Üst","3.5Alt"], ou(3.5)),
|
||||
"BTTS": ("binary",["odds_btts_y","odds_btts_n"], ["KG Var","KG Yok"],
|
||||
lambda sh,sa,hh,ha: 0 if (sh>0 and sa>0) else 1),
|
||||
"HT": ("multi", ["odds_ht_ms_h","odds_ht_ms_d","odds_ht_ms_a"], ["İY1","İYX","İY2"],
|
||||
lambda sh,sa,hh,ha: None if np.isnan(hh) else (0 if hh>ha else (1 if hh==ha else 2))),
|
||||
"HT_OU15": ("binary",["odds_ht_ou15_o","odds_ht_ou15_u"], ["İY1.5Üst","İY1.5Alt"], htou(1.5)),
|
||||
}
|
||||
STAKED_MARKETS = {"MS"} # only these are bet; rest are info-only
|
||||
PM = {"objective":"multi:softprob","num_class":3,"max_depth":5,"eta":0.05,"subsample":0.8,"colsample_bytree":0.8,"tree_method":"hist","verbosity":0}
|
||||
PB = {"objective":"binary:logistic","max_depth":5,"eta":0.05,"subsample":0.8,"colsample_bytree":0.8,"tree_method":"hist","verbosity":0}
|
||||
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser(description=__doc__)
|
||||
ap.add_argument("--n", type=int, default=6, help="how many sample matches")
|
||||
ap.add_argument("--match", help="specific match_id")
|
||||
ap.add_argument("--estimators", type=int, default=250)
|
||||
args = ap.parse_args()
|
||||
|
||||
df = pd.read_csv(CSV, low_memory=False).sort_values("mst_utc").reset_index(drop=True)
|
||||
sh = pd.to_numeric(df["score_home"],errors="coerce"); sa = pd.to_numeric(df["score_away"],errors="coerce")
|
||||
ok = sh.notna()&sa.notna(); df = df[ok].reset_index(drop=True)
|
||||
SH=sh[ok.values].values.astype(float); SA=sa[ok.values].values.astype(float)
|
||||
HH=pd.to_numeric(df["ht_score_home"],errors="coerce").values.astype(float)
|
||||
HA=pd.to_numeric(df["ht_score_away"],errors="coerce").values.astype(float)
|
||||
feats=[c for c in df.columns if c not in META and not c.startswith("label_") and c not in LEAKY]
|
||||
X=df[feats].apply(pd.to_numeric,errors="coerce").fillna(0.0).values
|
||||
N=len(df); cut=int(N*0.85)
|
||||
print(f"Training {len(MARKETS)} leak-free market models on {cut:,} matches ...")
|
||||
|
||||
models={}
|
||||
for m,(kind,ocols,picks,tfn) in MARKETS.items():
|
||||
if not all(c in df.columns for c in ocols): continue
|
||||
truth=np.array([tfn(SH[i],SA[i],HH[i],HA[i]) for i in range(cut)],dtype=object)
|
||||
valid=np.array([v is not None for v in truth])
|
||||
if kind=="multi":
|
||||
b=xgb.train(PM,xgb.DMatrix(X[:cut][valid],label=truth[valid].astype(int)),num_boost_round=args.estimators)
|
||||
else:
|
||||
b=xgb.train(PB,xgb.DMatrix(X[:cut][valid],label=(truth[valid].astype(int)==0).astype(int)),num_boost_round=args.estimators)
|
||||
models[m]=(kind,ocols,picks,tfn,b)
|
||||
|
||||
# choose matches from holdout
|
||||
hold = df.iloc[cut:].reset_index(drop=True)
|
||||
if args.match:
|
||||
sel_idx = df.index[df["match_id"].astype(str)==str(args.match)].tolist()
|
||||
rows = [(i,) for i in sel_idx]
|
||||
base = df
|
||||
else:
|
||||
pick_pos = np.linspace(0, len(hold)-1, args.n, dtype=int)
|
||||
rows = [(cut+p,) for p in pick_pos]
|
||||
base = df
|
||||
|
||||
for (gi,) in rows:
|
||||
r = base.iloc[gi]
|
||||
xrow = X[gi:gi+1]
|
||||
sh_,sa_,hh_,ha_ = SH[gi],SA[gi],HH[gi],HA[gi]
|
||||
ht = f"{int(hh_)}-{int(ha_)}" if not np.isnan(hh_) else "?"
|
||||
print("\n"+"="*72)
|
||||
print(f"MATCH {r['match_id']} | elo H{r.get('home_overall_elo','?'):.0f} vs A{r.get('away_overall_elo','?'):.0f}"
|
||||
f" | ACTUAL {int(sh_)}-{int(sa_)} (HT {ht})")
|
||||
print(f" {'market':<8}{'pick':<10}{'model%':>8}{'impl%':>7}{'edge':>7}{'odds':>7} flag result")
|
||||
print(" "+"-"*64)
|
||||
best_ms=None
|
||||
for m,(kind,ocols,picks,tfn,b) in models.items():
|
||||
if kind=="multi":
|
||||
P=b.predict(xgb.DMatrix(xrow))[0]
|
||||
else:
|
||||
p=float(b.predict(xgb.DMatrix(xrow))[0]); P=np.array([p,1-p])
|
||||
O=pd.to_numeric(r[ocols],errors="coerce").fillna(0.0).values
|
||||
truth=tfn(sh_,sa_,hh_,ha_)
|
||||
for k in range(len(picks)):
|
||||
o=O[k]
|
||||
if o<=1.0: continue
|
||||
imp=1.0/o; edge=P[k]-imp
|
||||
res = "—" if truth is None else ("WON" if truth==k else "lost")
|
||||
staked = (m in STAKED_MARKETS) and edge>STAKE_MARGIN and STAKE_LO<=o<STAKE_HI
|
||||
flag = "★BET" if staked else ("val" if edge>STAKE_MARGIN else "")
|
||||
print(f" {m:<8}{picks[k]:<10}{100*P[k]:>7.1f}{100*imp:>7.1f}{100*edge:>+7.1f}{o:>7.2f} {flag:<5} {res}")
|
||||
if staked and (best_ms is None or edge>best_ms[0]):
|
||||
best_ms=(edge,m,picks[k],o,res)
|
||||
print(" "+"-"*64)
|
||||
if best_ms:
|
||||
e,m,p,o,res = best_ms
|
||||
print(f" >>> STAKE: {m} {p} @ {o:.2f} (edge +{100*e:.1f}%, favourite band) -> {res}")
|
||||
else:
|
||||
print(f" >>> NO STAKE: no MS value in favourite band. (Other markets info-only —")
|
||||
print(f" their 'value' is model error in efficient markets; do NOT chase it.)")
|
||||
print("\nNOTE: only MS staked (proven edge). All markets shown for transparency.")
|
||||
print("Forward-validate with CLV before real money. Static CSV odds may overstate edge.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user