Files
iddaai-be/ai-engine/scripts/backtest_niche.py
T
2026-04-22 02:17:02 +03:00

216 lines
7.5 KiB
Python

"""
V27 FINAL BACKTEST — Conservative Flat Bet
Only the strongest validated edges. No Kelly compounding.
"""
import pandas as pd, numpy as np
df = pd.read_csv('data/training_data_v27.csv', low_memory=False)
for c in df.columns:
if c not in ['match_id','league_name','home_team','away_team']:
df[c] = pd.to_numeric(df[c], errors='coerce')
df = df.dropna(subset=['odds_ms_h','odds_ms_d','odds_ms_a'])
df = df[(df.odds_ms_h>1.01)&(df.odds_ms_d>1.01)&(df.odds_ms_a>1.01)]
n = len(df)
# 5-fold walk-forward: train on 60%, validate patterns, test on remaining
folds = 5
fold_size = n // folds
all_results = []
print("="*65)
print(" V27 WALK-FORWARD FLAT-BET BACKTEST")
print("="*65)
for fold in range(2, folds): # start from fold 2 so we have enough training data
train_end = fold * fold_size
test_start = train_end
test_end = (fold+1)*fold_size if fold < folds-1 else n
train_df = df.iloc[:train_end]
test_df = df.iloc[test_start:test_end]
print(f"\n --- Fold {fold}: train={len(train_df)}, test={len(test_df)} ---")
# Discover REST edges from training data
strategies = []
for hr in [5, 7, 10, 14]:
for ar in [3, 4, 5]:
for cls, col in [(0,'odds_ms_h'), (2,'odds_ms_a')]:
idx = (train_df.home_days_rest > hr) & (train_df.away_days_rest < ar)
sub = train_df[idx]
if len(sub) < 50:
continue
rate = (sub.label_ms == cls).mean()
avg_odds = sub[col].mean()
ev = rate * avg_odds
if ev > 1.02: # only strong edges (>2% edge)
strategies.append((hr, ar, cls, rate, avg_odds, ev, len(sub)))
if not strategies:
print(" No strong edges found in training data")
continue
# Apply best strategies to test
strategies.sort(key=lambda x: x[5], reverse=True)
best = strategies[:3] # top 3 only
fold_bets = 0
fold_wins = 0
fold_pnl = 0
stake = 10 # flat 10 units
for _, row in test_df.iterrows():
for hr, ar, cls, est_p, _, _, _ in best:
if pd.isna(row.home_days_rest) or pd.isna(row.away_days_rest):
continue
if row.home_days_rest <= hr or row.away_days_rest >= ar:
continue
odds_col = ['odds_ms_h','odds_ms_d','odds_ms_a'][cls]
odds_val = row[odds_col]
if pd.isna(odds_val) or odds_val < 1.50 or odds_val > 5.0:
continue
# Additional filter: only bet when odds give reasonable EV
if est_p * odds_val < 1.0:
continue
won = (row.label_ms == cls)
pnl = stake * (odds_val - 1) if won else -stake
fold_bets += 1
if won:
fold_wins += 1
fold_pnl += pnl
all_results.append({'fold': fold, 'won': won, 'pnl': pnl,
'odds': odds_val, 'stake': stake,
'cls': ['H','D','A'][cls]})
if fold_bets > 0:
roi = fold_pnl / (fold_bets * stake) * 100
print(f" Best strategies: {[(h,a,['H','D','A'][c],f'EV={e:.3f}') for h,a,c,_,_,e,_ in best]}")
print(f" Bets: {fold_bets}, Wins: {fold_wins} ({fold_wins/fold_bets*100:.1f}%), "
f"ROI: {roi:+.1f}%, PnL: {fold_pnl:+.0f}")
# Overall
print("\n" + "="*65)
print(" OVERALL RESULTS")
print("="*65)
if all_results:
total = len(all_results)
wins = sum(1 for r in all_results if r['won'])
total_pnl = sum(r['pnl'] for r in all_results)
total_staked = sum(r['stake'] for r in all_results)
roi = total_pnl / total_staked * 100
print(f" Total bets: {total}")
print(f" Wins: {wins} ({wins/total*100:.1f}%)")
print(f" Total staked: {total_staked:.0f}")
print(f" PnL: {total_pnl:+.0f}")
print(f" ROI: {roi:+.1f}%")
print(f" Avg odds: {np.mean([r['odds'] for r in all_results]):.2f}")
# By class
print("\n --- By Bet Type ---")
for cls in ['H','A']:
cb = [r for r in all_results if r['cls'] == cls]
if cb:
cw = sum(1 for r in cb if r['won'])
cp = sum(r['pnl'] for r in cb)
cs = sum(r['stake'] for r in cb)
print(f" {cls}: {len(cb)} bets, hit={cw/len(cb)*100:.1f}%, ROI={cp/cs*100:+.1f}%")
# Cumulative PnL curve
print("\n --- Cumulative PnL ---")
cum = 0
step = max(1, total // 15)
for j in range(0, total, step):
cum = sum(r['pnl'] for r in all_results[:j+1])
print(f" After bet {j+1:4d}: PnL={cum:+.0f}")
cum = sum(r['pnl'] for r in all_results)
print(f" After bet {total:4d}: PnL={cum:+.0f} (FINAL)")
else:
print(" No bets placed!")
# ── Now combine with MODEL for smarter filtering ──
print("\n" + "="*65)
print(" COMBINED: Rest Rules + Fundamentals Model")
print("="*65)
import pickle, json
from pathlib import Path
MODELS_DIR = Path("models/v27")
feat_cols = json.load(open(MODELS_DIR / "v27_feature_cols.json"))
ms_models = {}
for name in ['xgb','lgb','cb']:
p = MODELS_DIR / f"v27_ms_{name}.pkl"
if p.exists():
with open(p,'rb') as f:
ms_models[name] = pickle.load(f)
if ms_models:
test_df = df.iloc[int(n*0.8):].copy()
X_test = test_df[feat_cols].values
# Get model predictions
preds = []
for name, m in ms_models.items():
if name == 'xgb':
import xgboost as xgb
dm = xgb.DMatrix(X_test, feature_names=feat_cols)
preds.append(m.predict(dm))
elif name == 'lgb':
preds.append(m.predict(X_test))
elif name == 'cb':
preds.append(m.predict_proba(X_test))
model_probs = np.mean(preds, axis=0) # (n, 3)
# Now apply rest rules + model agreement
margin = 1/test_df.odds_ms_h.values + 1/test_df.odds_ms_d.values + 1/test_df.odds_ms_a.values
impl = np.column_stack([
(1/test_df.odds_ms_h.values)/margin,
(1/test_df.odds_ms_d.values)/margin,
(1/test_df.odds_ms_a.values)/margin,
])
combo_bets = 0
combo_wins = 0
combo_pnl = 0
for j in range(len(test_df)):
row = test_df.iloc[j]
for hr, ar in [(14,5),(10,5),(7,5),(5,5)]:
if pd.isna(row.home_days_rest) or pd.isna(row.away_days_rest):
continue
if row.home_days_rest <= hr or row.away_days_rest >= ar:
continue
for cls in [0, 2]:
odds_val = [row.odds_ms_h, row.odds_ms_d, row.odds_ms_a][cls]
if pd.isna(odds_val) or odds_val < 1.50 or odds_val > 5.0:
continue
model_p = model_probs[j, cls]
impl_p = impl[j, cls]
# DOUBLE FILTER: rest rule + model agrees (model_prob > implied)
if model_p <= impl_p:
continue # model disagrees, skip
edge = model_p - impl_p
if edge < 0.03:
continue # too small
won = (row.label_ms == cls)
pnl = 10 * (odds_val - 1) if won else -10
combo_bets += 1
if won:
combo_wins += 1
combo_pnl += pnl
if combo_bets > 0:
roi = combo_pnl / (combo_bets * 10) * 100
print(f" Bets: {combo_bets}")
print(f" Wins: {combo_wins} ({combo_wins/combo_bets*100:.1f}%)")
print(f" PnL: {combo_pnl:+.0f}")
print(f" ROI: {roi:+.1f}%")
else:
print(" No combined bets triggered")