@@ -0,0 +1,151 @@
|
||||
"""
|
||||
Match Report — calibrated outcome probabilities + loss-minimizing pick per match.
|
||||
================================================================================
|
||||
For each match, shows the model's CALIBRATED probability for every outcome
|
||||
(1X2, Double Chance, OU 1.5/2.5/3.5, BTTS, HT), next to the market's implied
|
||||
probability, and recommends:
|
||||
* EN GÜVENLİ = highest-probability outcome (most likely to hit / lowest variance)
|
||||
* EN İYİ DEĞER = least-negative-EV outcome (smartest bet given the margin)
|
||||
|
||||
Probabilities are leak-free and calibrated (ECE ~0.43%, see calibration_report).
|
||||
This is a LOSS-MINIMIZER, not a profit machine — accurate probabilities to make
|
||||
the smartest, least-losing decisions against İddaa's high margin.
|
||||
|
||||
Trains the market models on the full history (leak-free), then scores the input.
|
||||
|
||||
Usage:
|
||||
python scripts/match_report.py --features data/upcoming_features.csv
|
||||
python scripts/match_report.py --demo --n 6
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import argparse, os, sys, time
|
||||
import numpy as np, pandas as pd, xgboost as xgb
|
||||
|
||||
if sys.stdout and hasattr(sys.stdout, "reconfigure"):
|
||||
try: sys.stdout.reconfigure(encoding="utf-8")
|
||||
except Exception: pass
|
||||
|
||||
AI_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
sys.path.insert(0, AI_DIR)
|
||||
CSV = os.path.join(AI_DIR, "data", "training_data_v27.csv")
|
||||
META = {"match_id","home_team_id","away_team_id","league_id","mst_utc",
|
||||
"score_home","score_away","ht_score_home","ht_score_away"}
|
||||
LEAKY = {"home_goals_form","away_goals_form","total_goals","ht_total_goals",
|
||||
"squad_diff","home_squad_quality","away_squad_quality",
|
||||
"referee_home_bias","referee_avg_goals"}
|
||||
|
||||
def ou(line): return lambda sh,sa,hh,ha: (0 if (sh+sa) > line else 1)
|
||||
def htou(line):return lambda sh,sa,hh,ha: (None if np.isnan(hh) else (0 if (hh+ha)>line else 1))
|
||||
MARKETS = {
|
||||
"MS": ("multi", ["odds_ms_h","odds_ms_d","odds_ms_a"], ["1","X","2"],
|
||||
lambda sh,sa,hh,ha: 0 if sh>sa else (1 if sh==sa else 2)),
|
||||
"OU15": ("binary",["odds_ou15_o","odds_ou15_u"], ["1.5 Üst","1.5 Alt"], ou(1.5)),
|
||||
"OU25": ("binary",["odds_ou25_o","odds_ou25_u"], ["2.5 Üst","2.5 Alt"], ou(2.5)),
|
||||
"OU35": ("binary",["odds_ou35_o","odds_ou35_u"], ["3.5 Üst","3.5 Alt"], ou(3.5)),
|
||||
"BTTS": ("binary",["odds_btts_y","odds_btts_n"], ["KG Var","KG Yok"],
|
||||
lambda sh,sa,hh,ha: 0 if (sh>0 and sa>0) else 1),
|
||||
"HT": ("multi", ["odds_ht_ms_h","odds_ht_ms_d","odds_ht_ms_a"], ["İY 1","İY X","İY 2"],
|
||||
lambda sh,sa,hh,ha: None if np.isnan(hh) else (0 if hh>ha else (1 if hh==ha else 2))),
|
||||
}
|
||||
PM={"objective":"multi:softprob","num_class":3,"max_depth":5,"eta":0.05,"subsample":0.8,"colsample_bytree":0.8,"tree_method":"hist","verbosity":0}
|
||||
PB={"objective":"binary:logistic","max_depth":5,"eta":0.05,"subsample":0.8,"colsample_bytree":0.8,"tree_method":"hist","verbosity":0}
|
||||
|
||||
|
||||
def team_names(ids):
|
||||
try:
|
||||
from data.db import get_clean_dsn
|
||||
import psycopg2; from psycopg2.extras import RealDictCursor
|
||||
ids=[str(i) for i in ids]
|
||||
for _ in range(3):
|
||||
try:
|
||||
with psycopg2.connect(get_clean_dsn()) as c:
|
||||
with c.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
cur.execute("SELECT id,name FROM teams WHERE id = ANY(%s)",(ids,))
|
||||
return {str(r["id"]):r["name"] for r in cur.fetchall()}
|
||||
except Exception: time.sleep(1)
|
||||
except Exception: pass
|
||||
return {}
|
||||
|
||||
|
||||
def main():
|
||||
ap=argparse.ArgumentParser(description=__doc__)
|
||||
ap.add_argument("--features"); ap.add_argument("--demo",action="store_true")
|
||||
ap.add_argument("--n",type=int,default=8); ap.add_argument("--estimators",type=int,default=250)
|
||||
args=ap.parse_args()
|
||||
|
||||
df=pd.read_csv(CSV,low_memory=False).sort_values("mst_utc").reset_index(drop=True)
|
||||
sh=pd.to_numeric(df["score_home"],errors="coerce"); sa=pd.to_numeric(df["score_away"],errors="coerce")
|
||||
ok=sh.notna()&sa.notna(); df=df[ok].reset_index(drop=True)
|
||||
SH=sh[ok.values].values.astype(float); SA=sa[ok.values].values.astype(float)
|
||||
HH=pd.to_numeric(df["ht_score_home"],errors="coerce").values.astype(float)
|
||||
HA=pd.to_numeric(df["ht_score_away"],errors="coerce").values.astype(float)
|
||||
feats=[c for c in df.columns if c not in META and not c.startswith("label_") and c not in LEAKY]
|
||||
X=df[feats].apply(pd.to_numeric,errors="coerce").fillna(0.0).values
|
||||
N=len(df)
|
||||
print(f"Training {len(MARKETS)} leak-free calibrated market models on {N:,} matches ...",flush=True)
|
||||
models={}
|
||||
for m,(kind,oc,picks,tfn) in MARKETS.items():
|
||||
truth=np.array([tfn(SH[i],SA[i],HH[i],HA[i]) for i in range(N)],dtype=object)
|
||||
valid=np.array([v is not None for v in truth])
|
||||
if kind=="multi":
|
||||
b=xgb.train(PM,xgb.DMatrix(X[valid],label=truth[valid].astype(int)),num_boost_round=args.estimators)
|
||||
else:
|
||||
b=xgb.train(PB,xgb.DMatrix(X[valid],label=(truth[valid].astype(int)==0).astype(int)),num_boost_round=args.estimators)
|
||||
models[m]=(kind,oc,picks,b)
|
||||
|
||||
# input matches
|
||||
if args.features:
|
||||
inp=pd.read_csv(args.features,low_memory=False); demo=False
|
||||
else:
|
||||
inp=df.tail(args.n).reset_index(drop=True); demo=True
|
||||
print("(DEMO: training CSV son maçları)\n")
|
||||
names=team_names(list(inp.get("home_team_id",[]))+list(inp.get("away_team_id",[]))) if "home_team_id" in inp.columns else {}
|
||||
Xi=inp.reindex(columns=feats).apply(pd.to_numeric,errors="coerce").fillna(0.0).values
|
||||
|
||||
shown=0
|
||||
for i in range(len(inp)):
|
||||
if shown>=args.n: break
|
||||
r=inp.iloc[i]; xrow=Xi[i:i+1]
|
||||
hn=names.get(str(r.get("home_team_id")),str(r.get("home_team_id","?"))[:8])
|
||||
an=names.get(str(r.get("away_team_id")),str(r.get("away_team_id","?"))[:8])
|
||||
print("="*68)
|
||||
print(f"{hn} vs {an}")
|
||||
print(f" {'market':<8}{'sonuç':<10}{'model%':>8}{'piyasa%':>9}{'oran':>7}{'EV%':>8}")
|
||||
print(" "+"-"*58)
|
||||
bets=[]; ms_probs=None
|
||||
for m,(kind,oc,picks,b) in models.items():
|
||||
if kind=="multi":
|
||||
P=b.predict(xgb.DMatrix(xrow))[0]
|
||||
else:
|
||||
p=float(b.predict(xgb.DMatrix(xrow))[0]); P=np.array([p,1-p])
|
||||
if m=="MS": ms_probs=P
|
||||
O=pd.to_numeric(r.reindex(oc),errors="coerce").fillna(0.0).values
|
||||
for k in range(len(picks)):
|
||||
o=float(O[k]); mp=float(P[k])
|
||||
if o>1.0:
|
||||
imp=1/o; ev=mp*o-1
|
||||
print(f" {m:<8}{picks[k]:<10}{100*mp:>7.0f}%{100*imp:>8.0f}%{o:>7.2f}{100*ev:>+7.1f}")
|
||||
bets.append((m,picks[k],mp,o,ev))
|
||||
else:
|
||||
print(f" {m:<8}{picks[k]:<10}{100*mp:>7.0f}%{'-':>8} {'-':>6} {'-':>7}")
|
||||
# Double Chance derived from MS (no odds shown — Nesine'de oranına bakarsın)
|
||||
if ms_probs is not None:
|
||||
h,d,a=ms_probs
|
||||
print(f" {'DC':<8}{'1X':<10}{100*(h+d):>7.0f}% (türetilmiş 'en güvenli' seçenek)")
|
||||
print(f" {'DC':<8}{'X2':<10}{100*(d+a):>7.0f}%")
|
||||
print(f" {'DC':<8}{'12':<10}{100*(h+a):>7.0f}%")
|
||||
print(" "+"-"*58)
|
||||
if bets:
|
||||
safe=max(bets,key=lambda x:x[2]) # highest probability
|
||||
value=max(bets,key=lambda x:x[4]) # least-negative EV
|
||||
print(f" >>> EN GÜVENLİ : {safe[0]} {safe[1]} (model %{100*safe[2]:.0f}, oran {safe[3]:.2f})")
|
||||
print(f" >>> EN İYİ DEĞER: {value[0]} {value[1]} (EV %{100*value[4]:+.1f}, model %{100*value[2]:.0f}, oran {value[3]:.2f})")
|
||||
if value[4] <= 0:
|
||||
print(f" (EV negatif → marj yüzünden 'kâr' yok; en az kaybettiren bu. Değer yoksa PAS geç.)")
|
||||
shown+=1
|
||||
print("\nNOT: olasılıklar kalibre (model %X ⇒ gerçekte ~%X). EV<0 her yerde olabilir")
|
||||
print("(İddaa marjı); amaç KAYBI MİNİMİZE etmek + en doğru maç okumasını görmek.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user