""" V27 FINAL BACKTEST — Conservative Flat Bet Only the strongest validated edges. No Kelly compounding. """ import pandas as pd, numpy as np df = pd.read_csv('data/training_data_v27.csv', low_memory=False) for c in df.columns: if c not in ['match_id','league_name','home_team','away_team']: df[c] = pd.to_numeric(df[c], errors='coerce') df = df.dropna(subset=['odds_ms_h','odds_ms_d','odds_ms_a']) df = df[(df.odds_ms_h>1.01)&(df.odds_ms_d>1.01)&(df.odds_ms_a>1.01)] n = len(df) # 5-fold walk-forward: train on 60%, validate patterns, test on remaining folds = 5 fold_size = n // folds all_results = [] print("="*65) print(" V27 WALK-FORWARD FLAT-BET BACKTEST") print("="*65) for fold in range(2, folds): # start from fold 2 so we have enough training data train_end = fold * fold_size test_start = train_end test_end = (fold+1)*fold_size if fold < folds-1 else n train_df = df.iloc[:train_end] test_df = df.iloc[test_start:test_end] print(f"\n --- Fold {fold}: train={len(train_df)}, test={len(test_df)} ---") # Discover REST edges from training data strategies = [] for hr in [5, 7, 10, 14]: for ar in [3, 4, 5]: for cls, col in [(0,'odds_ms_h'), (2,'odds_ms_a')]: idx = (train_df.home_days_rest > hr) & (train_df.away_days_rest < ar) sub = train_df[idx] if len(sub) < 50: continue rate = (sub.label_ms == cls).mean() avg_odds = sub[col].mean() ev = rate * avg_odds if ev > 1.02: # only strong edges (>2% edge) strategies.append((hr, ar, cls, rate, avg_odds, ev, len(sub))) if not strategies: print(" No strong edges found in training data") continue # Apply best strategies to test strategies.sort(key=lambda x: x[5], reverse=True) best = strategies[:3] # top 3 only fold_bets = 0 fold_wins = 0 fold_pnl = 0 stake = 10 # flat 10 units for _, row in test_df.iterrows(): for hr, ar, cls, est_p, _, _, _ in best: if pd.isna(row.home_days_rest) or pd.isna(row.away_days_rest): continue if row.home_days_rest <= hr or row.away_days_rest >= ar: continue odds_col = ['odds_ms_h','odds_ms_d','odds_ms_a'][cls] odds_val = row[odds_col] if pd.isna(odds_val) or odds_val < 1.50 or odds_val > 5.0: continue # Additional filter: only bet when odds give reasonable EV if est_p * odds_val < 1.0: continue won = (row.label_ms == cls) pnl = stake * (odds_val - 1) if won else -stake fold_bets += 1 if won: fold_wins += 1 fold_pnl += pnl all_results.append({'fold': fold, 'won': won, 'pnl': pnl, 'odds': odds_val, 'stake': stake, 'cls': ['H','D','A'][cls]}) if fold_bets > 0: roi = fold_pnl / (fold_bets * stake) * 100 print(f" Best strategies: {[(h,a,['H','D','A'][c],f'EV={e:.3f}') for h,a,c,_,_,e,_ in best]}") print(f" Bets: {fold_bets}, Wins: {fold_wins} ({fold_wins/fold_bets*100:.1f}%), " f"ROI: {roi:+.1f}%, PnL: {fold_pnl:+.0f}") # Overall print("\n" + "="*65) print(" OVERALL RESULTS") print("="*65) if all_results: total = len(all_results) wins = sum(1 for r in all_results if r['won']) total_pnl = sum(r['pnl'] for r in all_results) total_staked = sum(r['stake'] for r in all_results) roi = total_pnl / total_staked * 100 print(f" Total bets: {total}") print(f" Wins: {wins} ({wins/total*100:.1f}%)") print(f" Total staked: {total_staked:.0f}") print(f" PnL: {total_pnl:+.0f}") print(f" ROI: {roi:+.1f}%") print(f" Avg odds: {np.mean([r['odds'] for r in all_results]):.2f}") # By class print("\n --- By Bet Type ---") for cls in ['H','A']: cb = [r for r in all_results if r['cls'] == cls] if cb: cw = sum(1 for r in cb if r['won']) cp = sum(r['pnl'] for r in cb) cs = sum(r['stake'] for r in cb) print(f" {cls}: {len(cb)} bets, hit={cw/len(cb)*100:.1f}%, ROI={cp/cs*100:+.1f}%") # Cumulative PnL curve print("\n --- Cumulative PnL ---") cum = 0 step = max(1, total // 15) for j in range(0, total, step): cum = sum(r['pnl'] for r in all_results[:j+1]) print(f" After bet {j+1:4d}: PnL={cum:+.0f}") cum = sum(r['pnl'] for r in all_results) print(f" After bet {total:4d}: PnL={cum:+.0f} (FINAL)") else: print(" No bets placed!") # ── Now combine with MODEL for smarter filtering ── print("\n" + "="*65) print(" COMBINED: Rest Rules + Fundamentals Model") print("="*65) import pickle, json from pathlib import Path MODELS_DIR = Path("models/v27") feat_cols = json.load(open(MODELS_DIR / "v27_feature_cols.json")) ms_models = {} for name in ['xgb','lgb','cb']: p = MODELS_DIR / f"v27_ms_{name}.pkl" if p.exists(): with open(p,'rb') as f: ms_models[name] = pickle.load(f) if ms_models: test_df = df.iloc[int(n*0.8):].copy() X_test = test_df[feat_cols].values # Get model predictions preds = [] for name, m in ms_models.items(): if name == 'xgb': import xgboost as xgb dm = xgb.DMatrix(X_test, feature_names=feat_cols) preds.append(m.predict(dm)) elif name == 'lgb': preds.append(m.predict(X_test)) elif name == 'cb': preds.append(m.predict_proba(X_test)) model_probs = np.mean(preds, axis=0) # (n, 3) # Now apply rest rules + model agreement margin = 1/test_df.odds_ms_h.values + 1/test_df.odds_ms_d.values + 1/test_df.odds_ms_a.values impl = np.column_stack([ (1/test_df.odds_ms_h.values)/margin, (1/test_df.odds_ms_d.values)/margin, (1/test_df.odds_ms_a.values)/margin, ]) combo_bets = 0 combo_wins = 0 combo_pnl = 0 for j in range(len(test_df)): row = test_df.iloc[j] for hr, ar in [(14,5),(10,5),(7,5),(5,5)]: if pd.isna(row.home_days_rest) or pd.isna(row.away_days_rest): continue if row.home_days_rest <= hr or row.away_days_rest >= ar: continue for cls in [0, 2]: odds_val = [row.odds_ms_h, row.odds_ms_d, row.odds_ms_a][cls] if pd.isna(odds_val) or odds_val < 1.50 or odds_val > 5.0: continue model_p = model_probs[j, cls] impl_p = impl[j, cls] # DOUBLE FILTER: rest rule + model agrees (model_prob > implied) if model_p <= impl_p: continue # model disagrees, skip edge = model_p - impl_p if edge < 0.03: continue # too small won = (row.label_ms == cls) pnl = 10 * (odds_val - 1) if won else -10 combo_bets += 1 if won: combo_wins += 1 combo_pnl += pnl if combo_bets > 0: roi = combo_pnl / (combo_bets * 10) * 100 print(f" Bets: {combo_bets}") print(f" Wins: {combo_wins} ({combo_wins/combo_bets*100:.1f}%)") print(f" PnL: {combo_pnl:+.0f}") print(f" ROI: {roi:+.1f}%") else: print(" No combined bets triggered")