""" Gerçek Odds Bazlı Backtest ============================ Model olasılığı vs gerçek bookmaker odds karşılaştırır. Edge varsa bahis açıldığı varsayılır, gerçek ROI hesaplanır. """ import os, sys, json import numpy as np import pandas as pd import xgboost as xgb sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data', 'training_data.csv') MODELS_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'models', 'v25') REPORT_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'reports') SKIP_COLS = { 'match_id','home_team_id','away_team_id','league_id','mst_utc', 'score_home','score_away','total_goals','ht_score_home','ht_score_away','ht_total_goals', 'label_ms','label_ou05','label_ou15','label_ou25','label_ou35','label_btts', 'label_ht_result','label_ht_ou05','label_ht_ou15','label_ht_ft', 'label_odd_even','label_yellow_cards','label_cards_ou45','label_handicap_ms', } # (model_key, n_class, pred_class, label_col, odds_col, isim) MARKETS = [ ('ms', 3, 0, 'label_ms', 'odds_ms_h', 'MS-Ev'), ('ms', 3, 1, 'label_ms', 'odds_ms_d', 'MS-Ber'), ('ms', 3, 2, 'label_ms', 'odds_ms_a', 'MS-Dep'), ('ou15', 2, 1, 'label_ou15', 'odds_ou15_o', 'OU15-Ust'), ('ou15', 2, 0, 'label_ou15', 'odds_ou15_u', 'OU15-Alt'), ('ou25', 2, 1, 'label_ou25', 'odds_ou25_o', 'OU25-Ust'), ('ou25', 2, 0, 'label_ou25', 'odds_ou25_u', 'OU25-Alt'), ('ou35', 2, 1, 'label_ou35', 'odds_ou35_o', 'OU35-Ust'), ('ou35', 2, 0, 'label_ou35', 'odds_ou35_u', 'OU35-Alt'), ('btts', 2, 1, 'label_btts', 'odds_btts_y', 'BTTS-Var'), ('btts', 2, 0, 'label_btts', 'odds_btts_n', 'BTTS-Yok'), ] MIN_ODDS = 1.10 MAX_ODDS = 10.0 def load_model(market): path = os.path.join(MODELS_DIR, f'xgb_v25_{market}.json') if not os.path.exists(path): return None b = xgb.Booster() b.load_model(path) return b def main(): print('Veri yukleniyor...') df = pd.read_csv(DATA_PATH, low_memory=False) df = df.sort_values('mst_utc') n_test = int(len(df) * 0.20) df_test = df.tail(n_test).copy().reset_index(drop=True) print(f'Test seti: {len(df_test):,} mac') feature_cols = [c for c in df.columns if c not in SKIP_COLS] X = df_test[feature_cols].fillna(0).values # Modelleri yukle loaded = {} for mkey, n_class, *_ in MARKETS: if mkey not in loaded: m = load_model(mkey) if m: loaded[mkey] = (m, n_class) print(f'Modeller: {list(loaded.keys())}') # Toplu tahmin raw_preds = {} for mkey, (model, n_class) in loaded.items(): dmat = xgb.DMatrix(pd.DataFrame(X, columns=feature_cols)) raw = model.predict(dmat) raw_preds[mkey] = raw.reshape(-1, n_class) if n_class > 2 else np.column_stack([1-raw, raw]) # Backtest all_results = [] print(f'\n{"Market":<12} {"Edge>=":>7} {"Bahis":>7} {"Hit%":>7} {"AvgOdds":>9} {"ROI/b":>8} {"Toplam":>10}') print('-' * 65) for mkey, n_class, pred_cls, label_col, odds_col, isim in MARKETS: if mkey not in raw_preds or label_col not in df_test.columns or odds_col not in df_test.columns: continue mp = raw_preds[mkey][:, pred_cls] act = pd.to_numeric(df_test[label_col], errors='coerce').values bko = pd.to_numeric(df_test[odds_col], errors='coerce').values valid = (~np.isnan(act) & ~np.isnan(bko) & (bko >= MIN_ODDS) & (bko <= MAX_ODDS)) mp, act, bko = mp[valid], act[valid].astype(int), bko[valid] implied = 1.0 / bko edge = mp - implied print(f'\n{isim}:') for min_e in [0.02, 0.03, 0.05, 0.07, 0.10]: mask = edge >= min_e n = mask.sum() if n < 20: continue won = (act[mask] == pred_cls).astype(int) roi = (bko[mask] - 1) * won - (1 - won) hit = won.mean() avg_roi = roi.mean() total = roi.sum() avg_odds = bko[mask].mean() sign = '+' if total > 0 else '' print(f' edge>={min_e:+.0%} n={n:>5,} hit={hit:.1%} odds={avg_odds:.2f} roi/b={avg_roi:+.3f} toplam={sign}{total:.1f}') all_results.append({'market': isim, 'min_edge': min_e, 'n': n, 'hit': round(hit, 4), 'avg_odds': round(avg_odds, 3), 'avg_roi': round(avg_roi, 4), 'total_roi': round(total, 2)}) # En iyi winners = sorted([r for r in all_results if r['total_roi'] > 0], key=lambda x: x['avg_roi'], reverse=True) print(f'\n{"="*65}') print('KAZANCLI KOMBINASYONLAR (total_roi > 0):') print(f'{"="*65}') for r in winners[:20]: print(f' {r["market"]:<12} edge>={r["min_edge"]:+.0%} | n={r["n"]:>5,} | ' f'hit={r["hit"]:.0%} | roi/b={r["avg_roi"]:+.3f} | toplam={r["total_roi"]:+.1f}') os.makedirs(REPORT_DIR, exist_ok=True) with open(os.path.join(REPORT_DIR, 'backtest_real_odds.json'), 'w') as f: json.dump(all_results, f, indent=2) print(f'\nRapor kaydedildi.') if __name__ == '__main__': main()