Files
iddaai-be/ai-engine/scripts/match_report.py
T
fahricansecer 7b17aa1fee
Deploy Iddaai Backend / build-and-deploy (push) Successful in 33s
gg2
2026-06-07 15:59:41 +03:00

152 lines
7.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Match Report — calibrated outcome probabilities + loss-minimizing pick per match.
================================================================================
For each match, shows the model's CALIBRATED probability for every outcome
(1X2, Double Chance, OU 1.5/2.5/3.5, BTTS, HT), next to the market's implied
probability, and recommends:
* EN GÜVENLİ = highest-probability outcome (most likely to hit / lowest variance)
* EN İYİ DEĞER = least-negative-EV outcome (smartest bet given the margin)
Probabilities are leak-free and calibrated (ECE ~0.43%, see calibration_report).
This is a LOSS-MINIMIZER, not a profit machine — accurate probabilities to make
the smartest, least-losing decisions against İddaa's high margin.
Trains the market models on the full history (leak-free), then scores the input.
Usage:
python scripts/match_report.py --features data/upcoming_features.csv
python scripts/match_report.py --demo --n 6
"""
from __future__ import annotations
import argparse, os, sys, time
import numpy as np, pandas as pd, xgboost as xgb
if sys.stdout and hasattr(sys.stdout, "reconfigure"):
try: sys.stdout.reconfigure(encoding="utf-8")
except Exception: pass
AI_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, AI_DIR)
CSV = os.path.join(AI_DIR, "data", "training_data_v27.csv")
META = {"match_id","home_team_id","away_team_id","league_id","mst_utc",
"score_home","score_away","ht_score_home","ht_score_away"}
LEAKY = {"home_goals_form","away_goals_form","total_goals","ht_total_goals",
"squad_diff","home_squad_quality","away_squad_quality",
"referee_home_bias","referee_avg_goals"}
def ou(line): return lambda sh,sa,hh,ha: (0 if (sh+sa) > line else 1)
def htou(line):return lambda sh,sa,hh,ha: (None if np.isnan(hh) else (0 if (hh+ha)>line else 1))
MARKETS = {
"MS": ("multi", ["odds_ms_h","odds_ms_d","odds_ms_a"], ["1","X","2"],
lambda sh,sa,hh,ha: 0 if sh>sa else (1 if sh==sa else 2)),
"OU15": ("binary",["odds_ou15_o","odds_ou15_u"], ["1.5 Üst","1.5 Alt"], ou(1.5)),
"OU25": ("binary",["odds_ou25_o","odds_ou25_u"], ["2.5 Üst","2.5 Alt"], ou(2.5)),
"OU35": ("binary",["odds_ou35_o","odds_ou35_u"], ["3.5 Üst","3.5 Alt"], ou(3.5)),
"BTTS": ("binary",["odds_btts_y","odds_btts_n"], ["KG Var","KG Yok"],
lambda sh,sa,hh,ha: 0 if (sh>0 and sa>0) else 1),
"HT": ("multi", ["odds_ht_ms_h","odds_ht_ms_d","odds_ht_ms_a"], ["İY 1","İY X","İY 2"],
lambda sh,sa,hh,ha: None if np.isnan(hh) else (0 if hh>ha else (1 if hh==ha else 2))),
}
PM={"objective":"multi:softprob","num_class":3,"max_depth":5,"eta":0.05,"subsample":0.8,"colsample_bytree":0.8,"tree_method":"hist","verbosity":0}
PB={"objective":"binary:logistic","max_depth":5,"eta":0.05,"subsample":0.8,"colsample_bytree":0.8,"tree_method":"hist","verbosity":0}
def team_names(ids):
try:
from data.db import get_clean_dsn
import psycopg2; from psycopg2.extras import RealDictCursor
ids=[str(i) for i in ids]
for _ in range(3):
try:
with psycopg2.connect(get_clean_dsn()) as c:
with c.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute("SELECT id,name FROM teams WHERE id = ANY(%s)",(ids,))
return {str(r["id"]):r["name"] for r in cur.fetchall()}
except Exception: time.sleep(1)
except Exception: pass
return {}
def main():
ap=argparse.ArgumentParser(description=__doc__)
ap.add_argument("--features"); ap.add_argument("--demo",action="store_true")
ap.add_argument("--n",type=int,default=8); ap.add_argument("--estimators",type=int,default=250)
args=ap.parse_args()
df=pd.read_csv(CSV,low_memory=False).sort_values("mst_utc").reset_index(drop=True)
sh=pd.to_numeric(df["score_home"],errors="coerce"); sa=pd.to_numeric(df["score_away"],errors="coerce")
ok=sh.notna()&sa.notna(); df=df[ok].reset_index(drop=True)
SH=sh[ok.values].values.astype(float); SA=sa[ok.values].values.astype(float)
HH=pd.to_numeric(df["ht_score_home"],errors="coerce").values.astype(float)
HA=pd.to_numeric(df["ht_score_away"],errors="coerce").values.astype(float)
feats=[c for c in df.columns if c not in META and not c.startswith("label_") and c not in LEAKY]
X=df[feats].apply(pd.to_numeric,errors="coerce").fillna(0.0).values
N=len(df)
print(f"Training {len(MARKETS)} leak-free calibrated market models on {N:,} matches ...",flush=True)
models={}
for m,(kind,oc,picks,tfn) in MARKETS.items():
truth=np.array([tfn(SH[i],SA[i],HH[i],HA[i]) for i in range(N)],dtype=object)
valid=np.array([v is not None for v in truth])
if kind=="multi":
b=xgb.train(PM,xgb.DMatrix(X[valid],label=truth[valid].astype(int)),num_boost_round=args.estimators)
else:
b=xgb.train(PB,xgb.DMatrix(X[valid],label=(truth[valid].astype(int)==0).astype(int)),num_boost_round=args.estimators)
models[m]=(kind,oc,picks,b)
# input matches
if args.features:
inp=pd.read_csv(args.features,low_memory=False); demo=False
else:
inp=df.tail(args.n).reset_index(drop=True); demo=True
print("(DEMO: training CSV son maçları)\n")
names=team_names(list(inp.get("home_team_id",[]))+list(inp.get("away_team_id",[]))) if "home_team_id" in inp.columns else {}
Xi=inp.reindex(columns=feats).apply(pd.to_numeric,errors="coerce").fillna(0.0).values
shown=0
for i in range(len(inp)):
if shown>=args.n: break
r=inp.iloc[i]; xrow=Xi[i:i+1]
hn=names.get(str(r.get("home_team_id")),str(r.get("home_team_id","?"))[:8])
an=names.get(str(r.get("away_team_id")),str(r.get("away_team_id","?"))[:8])
print("="*68)
print(f"{hn} vs {an}")
print(f" {'market':<8}{'sonuç':<10}{'model%':>8}{'piyasa%':>9}{'oran':>7}{'EV%':>8}")
print(" "+"-"*58)
bets=[]; ms_probs=None
for m,(kind,oc,picks,b) in models.items():
if kind=="multi":
P=b.predict(xgb.DMatrix(xrow))[0]
else:
p=float(b.predict(xgb.DMatrix(xrow))[0]); P=np.array([p,1-p])
if m=="MS": ms_probs=P
O=pd.to_numeric(r.reindex(oc),errors="coerce").fillna(0.0).values
for k in range(len(picks)):
o=float(O[k]); mp=float(P[k])
if o>1.0:
imp=1/o; ev=mp*o-1
print(f" {m:<8}{picks[k]:<10}{100*mp:>7.0f}%{100*imp:>8.0f}%{o:>7.2f}{100*ev:>+7.1f}")
bets.append((m,picks[k],mp,o,ev))
else:
print(f" {m:<8}{picks[k]:<10}{100*mp:>7.0f}%{'-':>8} {'-':>6} {'-':>7}")
# Double Chance derived from MS (no odds shown — Nesine'de oranına bakarsın)
if ms_probs is not None:
h,d,a=ms_probs
print(f" {'DC':<8}{'1X':<10}{100*(h+d):>7.0f}% (türetilmiş 'en güvenli' seçenek)")
print(f" {'DC':<8}{'X2':<10}{100*(d+a):>7.0f}%")
print(f" {'DC':<8}{'12':<10}{100*(h+a):>7.0f}%")
print(" "+"-"*58)
if bets:
safe=max(bets,key=lambda x:x[2]) # highest probability
value=max(bets,key=lambda x:x[4]) # least-negative EV
print(f" >>> EN GÜVENLİ : {safe[0]} {safe[1]} (model %{100*safe[2]:.0f}, oran {safe[3]:.2f})")
print(f" >>> EN İYİ DEĞER: {value[0]} {value[1]} (EV %{100*value[4]:+.1f}, model %{100*value[2]:.0f}, oran {value[3]:.2f})")
if value[4] <= 0:
print(f" (EV negatif → marj yüzünden 'kâr' yok; en az kaybettiren bu. Değer yoksa PAS geç.)")
shown+=1
print("\nNOT: olasılıklar kalibre (model %X ⇒ gerçekte ~%X). EV<0 her yerde olabilir")
print("(İddaa marjı); amaç KAYBI MİNİMİZE etmek + en doğru maç okumasını görmek.")
if __name__ == "__main__":
main()