216 lines
7.5 KiB
Python
216 lines
7.5 KiB
Python
"""
|
|
V27 FINAL BACKTEST — Conservative Flat Bet
|
|
Only the strongest validated edges. No Kelly compounding.
|
|
"""
|
|
import pandas as pd, numpy as np
|
|
|
|
df = pd.read_csv('data/training_data_v27.csv', low_memory=False)
|
|
for c in df.columns:
|
|
if c not in ['match_id','league_name','home_team','away_team']:
|
|
df[c] = pd.to_numeric(df[c], errors='coerce')
|
|
df = df.dropna(subset=['odds_ms_h','odds_ms_d','odds_ms_a'])
|
|
df = df[(df.odds_ms_h>1.01)&(df.odds_ms_d>1.01)&(df.odds_ms_a>1.01)]
|
|
|
|
n = len(df)
|
|
# 5-fold walk-forward: train on 60%, validate patterns, test on remaining
|
|
folds = 5
|
|
fold_size = n // folds
|
|
all_results = []
|
|
|
|
print("="*65)
|
|
print(" V27 WALK-FORWARD FLAT-BET BACKTEST")
|
|
print("="*65)
|
|
|
|
for fold in range(2, folds): # start from fold 2 so we have enough training data
|
|
train_end = fold * fold_size
|
|
test_start = train_end
|
|
test_end = (fold+1)*fold_size if fold < folds-1 else n
|
|
|
|
train_df = df.iloc[:train_end]
|
|
test_df = df.iloc[test_start:test_end]
|
|
|
|
print(f"\n --- Fold {fold}: train={len(train_df)}, test={len(test_df)} ---")
|
|
|
|
# Discover REST edges from training data
|
|
strategies = []
|
|
|
|
for hr in [5, 7, 10, 14]:
|
|
for ar in [3, 4, 5]:
|
|
for cls, col in [(0,'odds_ms_h'), (2,'odds_ms_a')]:
|
|
idx = (train_df.home_days_rest > hr) & (train_df.away_days_rest < ar)
|
|
sub = train_df[idx]
|
|
if len(sub) < 50:
|
|
continue
|
|
rate = (sub.label_ms == cls).mean()
|
|
avg_odds = sub[col].mean()
|
|
ev = rate * avg_odds
|
|
if ev > 1.02: # only strong edges (>2% edge)
|
|
strategies.append((hr, ar, cls, rate, avg_odds, ev, len(sub)))
|
|
|
|
if not strategies:
|
|
print(" No strong edges found in training data")
|
|
continue
|
|
|
|
# Apply best strategies to test
|
|
strategies.sort(key=lambda x: x[5], reverse=True)
|
|
best = strategies[:3] # top 3 only
|
|
|
|
fold_bets = 0
|
|
fold_wins = 0
|
|
fold_pnl = 0
|
|
stake = 10 # flat 10 units
|
|
|
|
for _, row in test_df.iterrows():
|
|
for hr, ar, cls, est_p, _, _, _ in best:
|
|
if pd.isna(row.home_days_rest) or pd.isna(row.away_days_rest):
|
|
continue
|
|
if row.home_days_rest <= hr or row.away_days_rest >= ar:
|
|
continue
|
|
odds_col = ['odds_ms_h','odds_ms_d','odds_ms_a'][cls]
|
|
odds_val = row[odds_col]
|
|
if pd.isna(odds_val) or odds_val < 1.50 or odds_val > 5.0:
|
|
continue
|
|
# Additional filter: only bet when odds give reasonable EV
|
|
if est_p * odds_val < 1.0:
|
|
continue
|
|
|
|
won = (row.label_ms == cls)
|
|
pnl = stake * (odds_val - 1) if won else -stake
|
|
fold_bets += 1
|
|
if won:
|
|
fold_wins += 1
|
|
fold_pnl += pnl
|
|
all_results.append({'fold': fold, 'won': won, 'pnl': pnl,
|
|
'odds': odds_val, 'stake': stake,
|
|
'cls': ['H','D','A'][cls]})
|
|
|
|
if fold_bets > 0:
|
|
roi = fold_pnl / (fold_bets * stake) * 100
|
|
print(f" Best strategies: {[(h,a,['H','D','A'][c],f'EV={e:.3f}') for h,a,c,_,_,e,_ in best]}")
|
|
print(f" Bets: {fold_bets}, Wins: {fold_wins} ({fold_wins/fold_bets*100:.1f}%), "
|
|
f"ROI: {roi:+.1f}%, PnL: {fold_pnl:+.0f}")
|
|
|
|
# Overall
|
|
print("\n" + "="*65)
|
|
print(" OVERALL RESULTS")
|
|
print("="*65)
|
|
if all_results:
|
|
total = len(all_results)
|
|
wins = sum(1 for r in all_results if r['won'])
|
|
total_pnl = sum(r['pnl'] for r in all_results)
|
|
total_staked = sum(r['stake'] for r in all_results)
|
|
roi = total_pnl / total_staked * 100
|
|
|
|
print(f" Total bets: {total}")
|
|
print(f" Wins: {wins} ({wins/total*100:.1f}%)")
|
|
print(f" Total staked: {total_staked:.0f}")
|
|
print(f" PnL: {total_pnl:+.0f}")
|
|
print(f" ROI: {roi:+.1f}%")
|
|
print(f" Avg odds: {np.mean([r['odds'] for r in all_results]):.2f}")
|
|
|
|
# By class
|
|
print("\n --- By Bet Type ---")
|
|
for cls in ['H','A']:
|
|
cb = [r for r in all_results if r['cls'] == cls]
|
|
if cb:
|
|
cw = sum(1 for r in cb if r['won'])
|
|
cp = sum(r['pnl'] for r in cb)
|
|
cs = sum(r['stake'] for r in cb)
|
|
print(f" {cls}: {len(cb)} bets, hit={cw/len(cb)*100:.1f}%, ROI={cp/cs*100:+.1f}%")
|
|
|
|
# Cumulative PnL curve
|
|
print("\n --- Cumulative PnL ---")
|
|
cum = 0
|
|
step = max(1, total // 15)
|
|
for j in range(0, total, step):
|
|
cum = sum(r['pnl'] for r in all_results[:j+1])
|
|
print(f" After bet {j+1:4d}: PnL={cum:+.0f}")
|
|
cum = sum(r['pnl'] for r in all_results)
|
|
print(f" After bet {total:4d}: PnL={cum:+.0f} (FINAL)")
|
|
else:
|
|
print(" No bets placed!")
|
|
|
|
# ── Now combine with MODEL for smarter filtering ──
|
|
print("\n" + "="*65)
|
|
print(" COMBINED: Rest Rules + Fundamentals Model")
|
|
print("="*65)
|
|
|
|
import pickle, json
|
|
from pathlib import Path
|
|
MODELS_DIR = Path("models/v27")
|
|
|
|
feat_cols = json.load(open(MODELS_DIR / "v27_feature_cols.json"))
|
|
ms_models = {}
|
|
for name in ['xgb','lgb','cb']:
|
|
p = MODELS_DIR / f"v27_ms_{name}.pkl"
|
|
if p.exists():
|
|
with open(p,'rb') as f:
|
|
ms_models[name] = pickle.load(f)
|
|
|
|
if ms_models:
|
|
test_df = df.iloc[int(n*0.8):].copy()
|
|
X_test = test_df[feat_cols].values
|
|
|
|
# Get model predictions
|
|
preds = []
|
|
for name, m in ms_models.items():
|
|
if name == 'xgb':
|
|
import xgboost as xgb
|
|
dm = xgb.DMatrix(X_test, feature_names=feat_cols)
|
|
preds.append(m.predict(dm))
|
|
elif name == 'lgb':
|
|
preds.append(m.predict(X_test))
|
|
elif name == 'cb':
|
|
preds.append(m.predict_proba(X_test))
|
|
model_probs = np.mean(preds, axis=0) # (n, 3)
|
|
|
|
# Now apply rest rules + model agreement
|
|
margin = 1/test_df.odds_ms_h.values + 1/test_df.odds_ms_d.values + 1/test_df.odds_ms_a.values
|
|
impl = np.column_stack([
|
|
(1/test_df.odds_ms_h.values)/margin,
|
|
(1/test_df.odds_ms_d.values)/margin,
|
|
(1/test_df.odds_ms_a.values)/margin,
|
|
])
|
|
|
|
combo_bets = 0
|
|
combo_wins = 0
|
|
combo_pnl = 0
|
|
|
|
for j in range(len(test_df)):
|
|
row = test_df.iloc[j]
|
|
for hr, ar in [(14,5),(10,5),(7,5),(5,5)]:
|
|
if pd.isna(row.home_days_rest) or pd.isna(row.away_days_rest):
|
|
continue
|
|
if row.home_days_rest <= hr or row.away_days_rest >= ar:
|
|
continue
|
|
for cls in [0, 2]:
|
|
odds_val = [row.odds_ms_h, row.odds_ms_d, row.odds_ms_a][cls]
|
|
if pd.isna(odds_val) or odds_val < 1.50 or odds_val > 5.0:
|
|
continue
|
|
|
|
model_p = model_probs[j, cls]
|
|
impl_p = impl[j, cls]
|
|
|
|
# DOUBLE FILTER: rest rule + model agrees (model_prob > implied)
|
|
if model_p <= impl_p:
|
|
continue # model disagrees, skip
|
|
edge = model_p - impl_p
|
|
if edge < 0.03:
|
|
continue # too small
|
|
|
|
won = (row.label_ms == cls)
|
|
pnl = 10 * (odds_val - 1) if won else -10
|
|
combo_bets += 1
|
|
if won:
|
|
combo_wins += 1
|
|
combo_pnl += pnl
|
|
|
|
if combo_bets > 0:
|
|
roi = combo_pnl / (combo_bets * 10) * 100
|
|
print(f" Bets: {combo_bets}")
|
|
print(f" Wins: {combo_wins} ({combo_wins/combo_bets*100:.1f}%)")
|
|
print(f" PnL: {combo_pnl:+.0f}")
|
|
print(f" ROI: {roi:+.1f}%")
|
|
else:
|
|
print(" No combined bets triggered")
|