main
Deploy Iddaai Backend / build-and-deploy (push) Successful in 37s

This commit is contained in:
2026-05-17 02:17:22 +03:00
parent 17ace9bd12
commit 94c7a4481a
53 changed files with 29602 additions and 7832 deletions
+352
View File
@@ -0,0 +1,352 @@
"""
Tutarsızlık Bazlı Backtest
============================
Modeller arası tutarsızlığı ölçer, tutarlı maçlarda bahis açılsaydı
ROI ne olurdu hesaplar.
Mantık:
- Her maç için market'ler arası çelişkileri tespit et
- Tutarsız maçları filtrele
- Tutarlı maçlarda hit rate ve ROI hesapla
Usage:
python scripts/backtest_consistency.py
"""
import os, sys, json
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.metrics import accuracy_score
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
'data', 'training_data.csv')
MODELS_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
'models', 'v25')
SKIP_COLS = {
'match_id','home_team_id','away_team_id','league_id','mst_utc',
'score_home','score_away','total_goals','ht_score_home','ht_score_away','ht_total_goals',
'label_ms','label_ou05','label_ou15','label_ou25','label_ou35','label_btts',
'label_ht_result','label_ht_ou05','label_ht_ou15','label_ht_ft',
'label_odd_even','label_yellow_cards','label_cards_ou45','label_handicap_ms',
}
def load_model(market: str):
path = os.path.join(MODELS_DIR, f'xgb_v25_{market}.json')
if not os.path.exists(path):
return None
b = xgb.Booster()
b.load_model(path)
return b
def predict_proba(model, X: np.ndarray, feature_cols: list, n_class: int):
dmat = xgb.DMatrix(pd.DataFrame(X, columns=feature_cols))
raw = model.predict(dmat)
if n_class > 2:
return raw.reshape(-1, n_class)
return np.column_stack([1 - raw, raw])
def consistency_score(probs: dict) -> tuple[float, list]:
"""
Market'ler arası tutarsızlığı hesapla.
0 = tamamen tutarlı, 1 = tamamen çelişkili.
Kontrol edilen çelişkiler:
1. OU15 üst yüksek ama OU25 üst de yüksek → ok
OU15 üst yüksek ama OU25 alt yüksek → ÇELISKI (1 gol bekleniyor ama 2.5+ da bekleniyor?)
2. HT_OU05 üst yüksek ama HT sonucu draw yüksek → ÇELISKI
3. OU35 üst yüksek ama BTTS düşük → şüpheli
4. MS home yüksek ama HT away yüksek → çelişkili
"""
conflicts = []
total_weight = 0
total_conflict = 0
# OU tutarlılığı: P(OU25>0.5) <= P(OU15>0.5) matematiksel zorunluluk
ou15_over = probs.get('ou15_over', 0.5)
ou25_over = probs.get('ou25_over', 0.5)
ou35_over = probs.get('ou35_over', 0.5)
# OU hiyerarşisi: ou35 <= ou25 <= ou15 olmalı
if ou25_over > ou15_over + 0.05:
gap = ou25_over - ou15_over
conflicts.append(f'OU25>{ou25_over:.0%} > OU15>{ou15_over:.0%} (imkansız)')
total_conflict += gap * 2
total_weight += 1
if ou35_over > ou25_over + 0.05:
gap = ou35_over - ou25_over
conflicts.append(f'OU35>{ou35_over:.0%} > OU25>{ou25_over:.0%} (imkansız)')
total_conflict += gap * 2
total_weight += 1
# HT_OU05 ve HT sonuç tutarlılığı
ht_ou05_over = probs.get('ht_ou05_over', 0.5)
ht_draw_prob = probs.get('ht_draw', 0.34)
# İlk yarıda gol bekleniyor ama beraberlik de bekleniyor (0-0 draw?)
# HT_OU05 >%70 ama HT draw >%50 → çelişkili (0-0 berabere çok?)
if ht_ou05_over > 0.70 and ht_draw_prob > 0.50:
conflict = min(ht_ou05_over - 0.5, ht_draw_prob - 0.4)
conflicts.append(f'HT_OU05>{ht_ou05_over:.0%} ama HT_Draw>{ht_draw_prob:.0%}')
total_conflict += conflict
total_weight += 1
# HT_OU05 ve HT_OU15 tutarlılığı
ht_ou15_over = probs.get('ht_ou15_over', 0.3)
if ht_ou15_over > ht_ou05_over + 0.05:
gap = ht_ou15_over - ht_ou05_over
conflicts.append(f'HT_OU15>{ht_ou15_over:.0%} > HT_OU05>{ht_ou05_over:.0%} (imkansız)')
total_conflict += gap * 2
total_weight += 1
# MS ve OU tutarlılığı
ms_home = probs.get('ms_home', 0.33)
ms_away = probs.get('ms_away', 0.33)
btts_yes = probs.get('btts_yes', 0.5)
# Tek takım galibiyeti kuvvetli ama BTTS yüksek → şüpheli
dominant = max(ms_home, ms_away)
if dominant > 0.65 and btts_yes > 0.65:
conflict = (dominant - 0.5) * (btts_yes - 0.5)
conflicts.append(f'MS dominant>{dominant:.0%} ama BTTS_Yes>{btts_yes:.0%}')
total_conflict += conflict * 0.5
total_weight += 1
# OU25 ve BTTS tutarlılığı
# BTTS yüksekse en az 2 gol → OU25 üst de yüksek olmalı
if btts_yes > 0.65 and ou25_over < 0.45:
conflict = btts_yes - ou25_over
conflicts.append(f'BTTS_Yes>{btts_yes:.0%} ama OU25>{ou25_over:.0%} düşük')
total_conflict += conflict
total_weight += 1
# OU35 üst yüksek ama BTTS düşük → şüpheli (3+ gol ama tek takım mı?)
if ou35_over > 0.45 and btts_yes < 0.40:
conflict = (ou35_over - 0.35) * (0.5 - btts_yes)
conflicts.append(f'OU35>{ou35_over:.0%} ama BTTS_Yes<{btts_yes:.0%}')
total_conflict += conflict
total_weight += 1
score = min(1.0, total_conflict / max(total_weight * 0.3, 0.1))
return score, conflicts
def main():
print('Loading data...')
df = pd.read_csv(DATA_PATH, low_memory=False)
# Son %20 = test seti (kronolojik)
df = df.sort_values('mst_utc')
n_test = int(len(df) * 0.20)
df_test = df.tail(n_test).copy()
print(f'Test seti: {len(df_test):,} maç')
feature_cols = [c for c in df.columns if c not in SKIP_COLS]
# Modelleri yükle
print('Modeller yükleniyor...')
models = {
'ms': (load_model('ms'), 3),
'ou15': (load_model('ou15'), 2),
'ou25': (load_model('ou25'), 2),
'ou35': (load_model('ou35'), 2),
'btts': (load_model('btts'), 2),
'ht_result':(load_model('ht_result'), 3),
'ht_ou05': (load_model('ht_ou05'), 2),
'ht_ou15': (load_model('ht_ou15'), 2),
}
models = {k: v for k, v in models.items() if v[0] is not None}
print(f'Yüklenen model: {list(models.keys())}')
X = df_test[feature_cols].fillna(0).values
# Tüm tahminleri al
print('Tahminler yapılıyor...')
preds = {}
for mkey, (model, n_class) in models.items():
p = predict_proba(model, X, feature_cols, n_class)
preds[mkey] = p
# Her maç için tutarsızlık skoru ve tahmin kararı
results = []
for i in range(len(df_test)):
row = df_test.iloc[i]
# Olasılıkları topla
probs = {}
if 'ms' in preds:
probs['ms_home'] = preds['ms'][i][0]
probs['ms_draw'] = preds['ms'][i][1]
probs['ms_away'] = preds['ms'][i][2]
if 'ou15' in preds:
probs['ou15_over'] = preds['ou15'][i][1]
if 'ou25' in preds:
probs['ou25_over'] = preds['ou25'][i][1]
if 'ou35' in preds:
probs['ou35_over'] = preds['ou35'][i][1]
if 'btts' in preds:
probs['btts_yes'] = preds['btts'][i][1]
if 'ht_result' in preds:
probs['ht_home'] = preds['ht_result'][i][0]
probs['ht_draw'] = preds['ht_result'][i][1]
probs['ht_away'] = preds['ht_result'][i][2]
if 'ht_ou05' in preds:
probs['ht_ou05_over'] = preds['ht_ou05'][i][1]
if 'ht_ou15' in preds:
probs['ht_ou15_over'] = preds['ht_ou15'][i][1]
c_score, conflicts = consistency_score(probs)
# Gerçek sonuçlar
actual = {
'ms': int(row.get('label_ms', -1)),
'ou15': int(row.get('label_ou15', -1)),
'ou25': int(row.get('label_ou25', -1)),
'ou35': int(row.get('label_ou35', -1)),
'btts': int(row.get('label_btts', -1)),
}
# Her market için tahmin ve doğruluk
market_results = {}
for mkt, label_key in [('ms','ms'),('ou15','ou15'),('ou25','ou25'),
('ou35','ou35'),('btts','btts')]:
if mkt not in preds or actual[label_key] < 0:
continue
pred_class = int(np.argmax(preds[mkt][i]))
correct = int(pred_class == actual[label_key])
# Odds (implied prob → odds = 1/prob)
pred_prob = float(preds[mkt][i][pred_class])
implied_odds = 1 / pred_prob if pred_prob > 0.01 else 10.0
# ROI hesabı: 1 birim bahis, kazanırsa (odds-1) kazanç, kaybederse -1
roi = (implied_odds - 1) * correct - (1 - correct)
market_results[mkt] = {
'pred': pred_class,
'actual': actual[label_key],
'correct': correct,
'prob': pred_prob,
'roi': roi,
}
results.append({
'idx': i,
'consistency_score': c_score,
'conflicts': conflicts,
'probs': probs,
'market_results': market_results,
})
df_results = pd.DataFrame([{
'consistency_score': r['consistency_score'],
'n_conflicts': len(r['conflicts']),
**{f'{m}_correct': r['market_results'].get(m, {}).get('correct', None)
for m in ['ms','ou15','ou25','ou35','btts']},
**{f'{m}_roi': r['market_results'].get(m, {}).get('roi', None)
for m in ['ms','ou15','ou25','ou35','btts']},
} for r in results])
# ── Analiz ──────────────────────────────────────────────────────────
print(f'\n{"="*70}')
print('TUTARSIZLIK ANALİZİ')
print(f'{"="*70}')
thresholds = [0.0, 0.1, 0.2, 0.3, 0.5]
markets = ['ms', 'ou15', 'ou25', 'ou35', 'btts']
for t in thresholds:
mask = df_results['consistency_score'] <= t
n = mask.sum()
if n < 50:
continue
print(f'\n[Tutarsızlık <= {t:.1f}] → {n:,} maç ({n/len(df_results)*100:.0f}%)')
print(f' {"Market":<8} {"HitRate":>8} {"ROI/bahis":>10} {"Toplam ROI":>12}')
print(f' {"-"*42}')
for m in markets:
col_c = f'{m}_correct'
col_r = f'{m}_roi'
if col_c not in df_results.columns:
continue
sub = df_results[mask][col_c].dropna()
roi_sub = df_results[mask][col_r].dropna()
if len(sub) < 20:
continue
hit = sub.mean()
avg_roi = roi_sub.mean()
total_roi = roi_sub.sum()
print(f' {m:<8} {hit:>7.1%} {avg_roi:>+9.3f} {total_roi:>+11.1f}')
# Çelişki türlerine göre breakdown
print(f'\n{"="*70}')
print('EN SIK ÇELIŞKILER')
print(f'{"="*70}')
all_conflicts = [c for r in results for c in r['conflicts']]
from collections import Counter
for conflict, cnt in Counter(all_conflicts).most_common(10):
print(f' {cnt:>5}x {conflict}')
# Tutarsızlık dağılımı
print(f'\n{"="*70}')
print('TUTARSIZLIK DAĞILIMI')
print(f'{"="*70}')
for label, lo, hi in [
('Tamamen tutarlı', 0.0, 0.05),
('Çok tutarlı', 0.05, 0.15),
('Orta', 0.15, 0.30),
('Tutarsız', 0.30, 0.50),
('Çok tutarsız', 0.50, 1.01),
]:
mask = (df_results['consistency_score'] >= lo) & (df_results['consistency_score'] < hi)
n = mask.sum()
ou25_hit = df_results[mask]['ou25_correct'].mean()
ms_hit = df_results[mask]['ms_correct'].mean()
print(f' {label:<20} {n:>6,} maç ({n/len(df_results)*100:>4.0f}%) | '
f'MS={ms_hit:.0%} OU25={ou25_hit:.0%}')
# Raporu kaydet
report = {
'total_test': len(df_results),
'thresholds': {},
}
for t in thresholds:
mask = df_results['consistency_score'] <= t
n = mask.sum()
report['thresholds'][str(t)] = {
'n_matches': int(n),
'pct': round(n/len(df_results)*100, 1),
'markets': {},
}
for m in markets:
col_c = f'{m}_correct'
col_r = f'{m}_roi'
if col_c not in df_results.columns:
continue
sub_c = df_results[mask][col_c].dropna()
sub_r = df_results[mask][col_r].dropna()
if len(sub_c) > 0:
report['thresholds'][str(t)]['markets'][m] = {
'hit_rate': round(float(sub_c.mean()), 4),
'avg_roi': round(float(sub_r.mean()), 4),
'total_roi': round(float(sub_r.sum()), 2),
}
out_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
'reports', 'backtest_consistency.json')
with open(out_path, 'w') as f:
json.dump(report, f, indent=2)
print(f'\nRapor: {out_path}')
if __name__ == '__main__':
main()