137 lines
5.2 KiB
Python
137 lines
5.2 KiB
Python
"""
|
||
Gerçek Odds Bazlı Backtest
|
||
============================
|
||
Model olasılığı vs gerçek bookmaker odds karşılaştırır.
|
||
Edge varsa bahis açıldığı varsayılır, gerçek ROI hesaplanır.
|
||
"""
|
||
|
||
import os, sys, json
|
||
import numpy as np
|
||
import pandas as pd
|
||
import xgboost as xgb
|
||
|
||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||
|
||
DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data', 'training_data.csv')
|
||
MODELS_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'models', 'v25')
|
||
REPORT_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'reports')
|
||
|
||
SKIP_COLS = {
|
||
'match_id','home_team_id','away_team_id','league_id','mst_utc',
|
||
'score_home','score_away','total_goals','ht_score_home','ht_score_away','ht_total_goals',
|
||
'label_ms','label_ou05','label_ou15','label_ou25','label_ou35','label_btts',
|
||
'label_ht_result','label_ht_ou05','label_ht_ou15','label_ht_ft',
|
||
'label_odd_even','label_yellow_cards','label_cards_ou45','label_handicap_ms',
|
||
}
|
||
|
||
# (model_key, n_class, pred_class, label_col, odds_col, isim)
|
||
MARKETS = [
|
||
('ms', 3, 0, 'label_ms', 'odds_ms_h', 'MS-Ev'),
|
||
('ms', 3, 1, 'label_ms', 'odds_ms_d', 'MS-Ber'),
|
||
('ms', 3, 2, 'label_ms', 'odds_ms_a', 'MS-Dep'),
|
||
('ou15', 2, 1, 'label_ou15', 'odds_ou15_o', 'OU15-Ust'),
|
||
('ou15', 2, 0, 'label_ou15', 'odds_ou15_u', 'OU15-Alt'),
|
||
('ou25', 2, 1, 'label_ou25', 'odds_ou25_o', 'OU25-Ust'),
|
||
('ou25', 2, 0, 'label_ou25', 'odds_ou25_u', 'OU25-Alt'),
|
||
('ou35', 2, 1, 'label_ou35', 'odds_ou35_o', 'OU35-Ust'),
|
||
('ou35', 2, 0, 'label_ou35', 'odds_ou35_u', 'OU35-Alt'),
|
||
('btts', 2, 1, 'label_btts', 'odds_btts_y', 'BTTS-Var'),
|
||
('btts', 2, 0, 'label_btts', 'odds_btts_n', 'BTTS-Yok'),
|
||
]
|
||
|
||
MIN_ODDS = 1.10
|
||
MAX_ODDS = 10.0
|
||
|
||
|
||
def load_model(market):
|
||
path = os.path.join(MODELS_DIR, f'xgb_v25_{market}.json')
|
||
if not os.path.exists(path):
|
||
return None
|
||
b = xgb.Booster()
|
||
b.load_model(path)
|
||
return b
|
||
|
||
|
||
def main():
|
||
print('Veri yukleniyor...')
|
||
df = pd.read_csv(DATA_PATH, low_memory=False)
|
||
df = df.sort_values('mst_utc')
|
||
n_test = int(len(df) * 0.20)
|
||
df_test = df.tail(n_test).copy().reset_index(drop=True)
|
||
print(f'Test seti: {len(df_test):,} mac')
|
||
|
||
feature_cols = [c for c in df.columns if c not in SKIP_COLS]
|
||
X = df_test[feature_cols].fillna(0).values
|
||
|
||
# Modelleri yukle
|
||
loaded = {}
|
||
for mkey, n_class, *_ in MARKETS:
|
||
if mkey not in loaded:
|
||
m = load_model(mkey)
|
||
if m:
|
||
loaded[mkey] = (m, n_class)
|
||
print(f'Modeller: {list(loaded.keys())}')
|
||
|
||
# Toplu tahmin
|
||
raw_preds = {}
|
||
for mkey, (model, n_class) in loaded.items():
|
||
dmat = xgb.DMatrix(pd.DataFrame(X, columns=feature_cols))
|
||
raw = model.predict(dmat)
|
||
raw_preds[mkey] = raw.reshape(-1, n_class) if n_class > 2 else np.column_stack([1-raw, raw])
|
||
|
||
# Backtest
|
||
all_results = []
|
||
print(f'\n{"Market":<12} {"Edge>=":>7} {"Bahis":>7} {"Hit%":>7} {"AvgOdds":>9} {"ROI/b":>8} {"Toplam":>10}')
|
||
print('-' * 65)
|
||
|
||
for mkey, n_class, pred_cls, label_col, odds_col, isim in MARKETS:
|
||
if mkey not in raw_preds or label_col not in df_test.columns or odds_col not in df_test.columns:
|
||
continue
|
||
|
||
mp = raw_preds[mkey][:, pred_cls]
|
||
act = pd.to_numeric(df_test[label_col], errors='coerce').values
|
||
bko = pd.to_numeric(df_test[odds_col], errors='coerce').values
|
||
|
||
valid = (~np.isnan(act) & ~np.isnan(bko) &
|
||
(bko >= MIN_ODDS) & (bko <= MAX_ODDS))
|
||
mp, act, bko = mp[valid], act[valid].astype(int), bko[valid]
|
||
implied = 1.0 / bko
|
||
edge = mp - implied
|
||
|
||
print(f'\n{isim}:')
|
||
for min_e in [0.02, 0.03, 0.05, 0.07, 0.10]:
|
||
mask = edge >= min_e
|
||
n = mask.sum()
|
||
if n < 20:
|
||
continue
|
||
won = (act[mask] == pred_cls).astype(int)
|
||
roi = (bko[mask] - 1) * won - (1 - won)
|
||
hit = won.mean()
|
||
avg_roi = roi.mean()
|
||
total = roi.sum()
|
||
avg_odds = bko[mask].mean()
|
||
sign = '+' if total > 0 else ''
|
||
print(f' edge>={min_e:+.0%} n={n:>5,} hit={hit:.1%} odds={avg_odds:.2f} roi/b={avg_roi:+.3f} toplam={sign}{total:.1f}')
|
||
all_results.append({'market': isim, 'min_edge': min_e, 'n': n,
|
||
'hit': round(hit, 4), 'avg_odds': round(avg_odds, 3),
|
||
'avg_roi': round(avg_roi, 4), 'total_roi': round(total, 2)})
|
||
|
||
# En iyi
|
||
winners = sorted([r for r in all_results if r['total_roi'] > 0],
|
||
key=lambda x: x['avg_roi'], reverse=True)
|
||
print(f'\n{"="*65}')
|
||
print('KAZANCLI KOMBINASYONLAR (total_roi > 0):')
|
||
print(f'{"="*65}')
|
||
for r in winners[:20]:
|
||
print(f' {r["market"]:<12} edge>={r["min_edge"]:+.0%} | n={r["n"]:>5,} | '
|
||
f'hit={r["hit"]:.0%} | roi/b={r["avg_roi"]:+.3f} | toplam={r["total_roi"]:+.1f}')
|
||
|
||
os.makedirs(REPORT_DIR, exist_ok=True)
|
||
with open(os.path.join(REPORT_DIR, 'backtest_real_odds.json'), 'w') as f:
|
||
json.dump(all_results, f, indent=2)
|
||
print(f'\nRapor kaydedildi.')
|
||
|
||
|
||
if __name__ == '__main__':
|
||
main()
|