gg

2026-04-22 02:17:02 +03:00
parent 2ccd6831eb
commit df428ed1e8
19 changed files with 6436 additions and 9 deletions
@@ -0,0 +1,215 @@
+"""
+V27 FINAL BACKTEST — Conservative Flat Bet
+Only the strongest validated edges. No Kelly compounding.
+"""
+import pandas as pd, numpy as np
+
+df = pd.read_csv('data/training_data_v27.csv', low_memory=False)
+for c in df.columns:
+    if c not in ['match_id','league_name','home_team','away_team']:
+        df[c] = pd.to_numeric(df[c], errors='coerce')
+df = df.dropna(subset=['odds_ms_h','odds_ms_d','odds_ms_a'])
+df = df[(df.odds_ms_h>1.01)&(df.odds_ms_d>1.01)&(df.odds_ms_a>1.01)]
+
+n = len(df)
+# 5-fold walk-forward: train on 60%, validate patterns, test on remaining
+folds = 5
+fold_size = n // folds
+all_results = []
+
+print("="*65)
+print("  V27 WALK-FORWARD FLAT-BET BACKTEST")
+print("="*65)
+
+for fold in range(2, folds):  # start from fold 2 so we have enough training data
+    train_end = fold * fold_size
+    test_start = train_end
+    test_end = (fold+1)*fold_size if fold < folds-1 else n
+
+    train_df = df.iloc[:train_end]
+    test_df = df.iloc[test_start:test_end]
+
+    print(f"\n  --- Fold {fold}: train={len(train_df)}, test={len(test_df)} ---")
+
+    # Discover REST edges from training data
+    strategies = []
+
+    for hr in [5, 7, 10, 14]:
+        for ar in [3, 4, 5]:
+            for cls, col in [(0,'odds_ms_h'), (2,'odds_ms_a')]:
+                idx = (train_df.home_days_rest > hr) & (train_df.away_days_rest < ar)
+                sub = train_df[idx]
+                if len(sub) < 50:
+                    continue
+                rate = (sub.label_ms == cls).mean()
+                avg_odds = sub[col].mean()
+                ev = rate * avg_odds
+                if ev > 1.02:  # only strong edges (>2% edge)
+                    strategies.append((hr, ar, cls, rate, avg_odds, ev, len(sub)))
+
+    if not strategies:
+        print("    No strong edges found in training data")
+        continue
+
+    # Apply best strategies to test
+    strategies.sort(key=lambda x: x[5], reverse=True)
+    best = strategies[:3]  # top 3 only
+
+    fold_bets = 0
+    fold_wins = 0
+    fold_pnl = 0
+    stake = 10  # flat 10 units
+
+    for _, row in test_df.iterrows():
+        for hr, ar, cls, est_p, _, _, _ in best:
+            if pd.isna(row.home_days_rest) or pd.isna(row.away_days_rest):
+                continue
+            if row.home_days_rest <= hr or row.away_days_rest >= ar:
+                continue
+            odds_col = ['odds_ms_h','odds_ms_d','odds_ms_a'][cls]
+            odds_val = row[odds_col]
+            if pd.isna(odds_val) or odds_val < 1.50 or odds_val > 5.0:
+                continue
+            # Additional filter: only bet when odds give reasonable EV
+            if est_p * odds_val < 1.0:
+                continue
+
+            won = (row.label_ms == cls)
+            pnl = stake * (odds_val - 1) if won else -stake
+            fold_bets += 1
+            if won:
+                fold_wins += 1
+            fold_pnl += pnl
+            all_results.append({'fold': fold, 'won': won, 'pnl': pnl,
+                                'odds': odds_val, 'stake': stake,
+                                'cls': ['H','D','A'][cls]})
+
+    if fold_bets > 0:
+        roi = fold_pnl / (fold_bets * stake) * 100
+        print(f"    Best strategies: {[(h,a,['H','D','A'][c],f'EV={e:.3f}') for h,a,c,_,_,e,_ in best]}")
+        print(f"    Bets: {fold_bets}, Wins: {fold_wins} ({fold_wins/fold_bets*100:.1f}%), "
+              f"ROI: {roi:+.1f}%, PnL: {fold_pnl:+.0f}")
+
+# Overall
+print("\n" + "="*65)
+print("  OVERALL RESULTS")
+print("="*65)
+if all_results:
+    total = len(all_results)
+    wins = sum(1 for r in all_results if r['won'])
+    total_pnl = sum(r['pnl'] for r in all_results)
+    total_staked = sum(r['stake'] for r in all_results)
+    roi = total_pnl / total_staked * 100
+
+    print(f"  Total bets:   {total}")
+    print(f"  Wins:         {wins} ({wins/total*100:.1f}%)")
+    print(f"  Total staked: {total_staked:.0f}")
+    print(f"  PnL:          {total_pnl:+.0f}")
+    print(f"  ROI:          {roi:+.1f}%")
+    print(f"  Avg odds:     {np.mean([r['odds'] for r in all_results]):.2f}")
+
+    # By class
+    print("\n  --- By Bet Type ---")
+    for cls in ['H','A']:
+        cb = [r for r in all_results if r['cls'] == cls]
+        if cb:
+            cw = sum(1 for r in cb if r['won'])
+            cp = sum(r['pnl'] for r in cb)
+            cs = sum(r['stake'] for r in cb)
+            print(f"    {cls}: {len(cb)} bets, hit={cw/len(cb)*100:.1f}%, ROI={cp/cs*100:+.1f}%")
+
+    # Cumulative PnL curve
+    print("\n  --- Cumulative PnL ---")
+    cum = 0
+    step = max(1, total // 15)
+    for j in range(0, total, step):
+        cum = sum(r['pnl'] for r in all_results[:j+1])
+        print(f"    After bet {j+1:4d}: PnL={cum:+.0f}")
+    cum = sum(r['pnl'] for r in all_results)
+    print(f"    After bet {total:4d}: PnL={cum:+.0f} (FINAL)")
+else:
+    print("  No bets placed!")
+
+# ── Now combine with MODEL for smarter filtering ──
+print("\n" + "="*65)
+print("  COMBINED: Rest Rules + Fundamentals Model")
+print("="*65)
+
+import pickle, json
+from pathlib import Path
+MODELS_DIR = Path("models/v27")
+
+feat_cols = json.load(open(MODELS_DIR / "v27_feature_cols.json"))
+ms_models = {}
+for name in ['xgb','lgb','cb']:
+    p = MODELS_DIR / f"v27_ms_{name}.pkl"
+    if p.exists():
+        with open(p,'rb') as f:
+            ms_models[name] = pickle.load(f)
+
+if ms_models:
+    test_df = df.iloc[int(n*0.8):].copy()
+    X_test = test_df[feat_cols].values
+
+    # Get model predictions
+    preds = []
+    for name, m in ms_models.items():
+        if name == 'xgb':
+            import xgboost as xgb
+            dm = xgb.DMatrix(X_test, feature_names=feat_cols)
+            preds.append(m.predict(dm))
+        elif name == 'lgb':
+            preds.append(m.predict(X_test))
+        elif name == 'cb':
+            preds.append(m.predict_proba(X_test))
+    model_probs = np.mean(preds, axis=0)  # (n, 3)
+
+    # Now apply rest rules + model agreement
+    margin = 1/test_df.odds_ms_h.values + 1/test_df.odds_ms_d.values + 1/test_df.odds_ms_a.values
+    impl = np.column_stack([
+        (1/test_df.odds_ms_h.values)/margin,
+        (1/test_df.odds_ms_d.values)/margin,
+        (1/test_df.odds_ms_a.values)/margin,
+    ])
+
+    combo_bets = 0
+    combo_wins = 0
+    combo_pnl = 0
+
+    for j in range(len(test_df)):
+        row = test_df.iloc[j]
+        for hr, ar in [(14,5),(10,5),(7,5),(5,5)]:
+            if pd.isna(row.home_days_rest) or pd.isna(row.away_days_rest):
+                continue
+            if row.home_days_rest <= hr or row.away_days_rest >= ar:
+                continue
+            for cls in [0, 2]:
+                odds_val = [row.odds_ms_h, row.odds_ms_d, row.odds_ms_a][cls]
+                if pd.isna(odds_val) or odds_val < 1.50 or odds_val > 5.0:
+                    continue
+
+                model_p = model_probs[j, cls]
+                impl_p = impl[j, cls]
+
+                # DOUBLE FILTER: rest rule + model agrees (model_prob > implied)
+                if model_p <= impl_p:
+                    continue  # model disagrees, skip
+                edge = model_p - impl_p
+                if edge < 0.03:
+                    continue  # too small
+
+                won = (row.label_ms == cls)
+                pnl = 10 * (odds_val - 1) if won else -10
+                combo_bets += 1
+                if won:
+                    combo_wins += 1
+                combo_pnl += pnl
+
+    if combo_bets > 0:
+        roi = combo_pnl / (combo_bets * 10) * 100
+        print(f"  Bets:   {combo_bets}")
+        print(f"  Wins:   {combo_wins} ({combo_wins/combo_bets*100:.1f}%)")
+        print(f"  PnL:    {combo_pnl:+.0f}")
+        print(f"  ROI:    {roi:+.1f}%")
+    else:
+        print("  No combined bets triggered")
@@ -0,0 +1,312 @@
+"""
+V28 — CONDITIONAL FREQUENCY ENGINE
+====================================
+User's strategy automated at scale:
+
+For every match (e.g. Beşiktaş vs Konya):
+  1. Look at Beşiktaş's HOME history when their MS1 odds were in the same band (e.g. 1.30-1.40)
+     → What % of those matches ended OU 1.5 over? OU 2.5 over? MS1?
+  2. Look at Konya's AWAY history when their MS2 odds were in the same band (e.g. 2.00-2.20)
+     → Same questions
+  3. COMBINE both signals:
+     → If BOTH teams historically produce >80% OU1.5 over at these odds → BET OU1.5 over
+     → This is the user's exact Excel strategy, now running on 104K matches
+
+CRITICAL: Only uses PAST matches for each prediction (no future leakage)
+"""
+import pandas as pd
+import numpy as np
+from collections import defaultdict
+import warnings
+warnings.filterwarnings('ignore')
+
+# ─── Load Data ───
+print("Loading data...")
+df = pd.read_csv('data/training_data_v27.csv', low_memory=False)
+KEEP_STR = ['match_id', 'league_name', 'home_team', 'away_team',
+            'home_team_id', 'away_team_id', 'league_id', 'mst_utc']
+for c in df.columns:
+    if c not in KEEP_STR:
+        df[c] = pd.to_numeric(df[c], errors='coerce')
+
+# Ensure chronological order (by match_id or date)
+if 'mst_utc' in df.columns:
+    df['mst_utc'] = pd.to_datetime(df['mst_utc'], errors='coerce')
+    df = df.sort_values('mst_utc').reset_index(drop=True)
+
+# Filter: need valid odds + scores
+df = df.dropna(subset=['odds_ms_h', 'odds_ms_a', 'score_home', 'score_away',
+                        'home_team_id', 'away_team_id', 'label_ms'])
+
+# Compute actual goal labels
+df['total_goals'] = df['score_home'] + df['score_away']
+df['ou15_actual'] = (df['total_goals'] > 1.5).astype(int)
+df['ou25_actual'] = (df['total_goals'] > 2.5).astype(int)
+df['ou35_actual'] = (df['total_goals'] > 3.5).astype(int)
+df['btts_actual'] = ((df['score_home'] > 0) & (df['score_away'] > 0)).astype(int)
+df['ms_result'] = df['label_ms'].astype(int)  # 0=H, 1=D, 2=A
+
+N = len(df)
+print(f"Total matches: {N}")
+print(f"Unique home teams: {df.home_team_id.nunique()}")
+print(f"Unique away teams: {df.away_team_id.nunique()}")
+
+# ─── Odds Band Helper ───
+def get_odds_band(odds, band_width=0.10):
+    """Round odds to nearest band. E.g. 1.35 → (1.30, 1.40)"""
+    lower = round(np.floor(odds / band_width) * band_width, 2)
+    upper = round(lower + band_width, 2)
+    return (lower, upper)
+
+def get_odds_band_wide(odds):
+    """Wider band for less common teams. E.g. 1.35 → (1.20, 1.50)"""
+    if odds < 1.50:
+        return (1.01, 1.50)
+    elif odds < 2.00:
+        return (1.50, 2.00)
+    elif odds < 2.50:
+        return (2.00, 2.50)
+    elif odds < 3.00:
+        return (2.50, 3.00)
+    elif odds < 4.00:
+        return (3.00, 4.00)
+    elif odds < 6.00:
+        return (4.00, 6.00)
+    else:
+        return (6.00, 20.00)
+
+# ─── Build Conditional Frequency Lookup (Expanding Window) ───
+print("\nBuilding conditional frequency features (expanding window)...")
+
+# We'll compute features for each match using only past data
+MIN_MATCHES = 5  # minimum historical matches to generate a signal
+
+# Pre-allocate feature arrays
+feat_names = [
+    'home_ou15_rate_at_band', 'home_ou25_rate_at_band', 'home_ou35_rate_at_band',
+    'home_btts_rate_at_band', 'home_win_rate_at_band', 'home_n_at_band',
+    'away_ou15_rate_at_band', 'away_ou25_rate_at_band', 'away_ou35_rate_at_band',
+    'away_btts_rate_at_band', 'away_win_rate_at_band', 'away_n_at_band',
+    'combined_ou15', 'combined_ou25', 'combined_ou35', 'combined_btts',
+    'home_goals_at_band', 'away_goals_at_band', 'combined_goals_at_band',
+    'home_conceded_at_band', 'away_conceded_at_band',
+]
+features = np.full((N, len(feat_names)), np.nan)
+
+# Historical ledger: team_id → list of (odds_band, ou15, ou25, ou35, btts, ms_result, goals_scored, goals_conceded)
+home_history = defaultdict(list)  # team performances when playing HOME
+away_history = defaultdict(list)  # team performances when playing AWAY
+
+for i in range(N):
+    row = df.iloc[i]
+    ht_id = row.home_team_id
+    at_id = row.away_team_id
+    h_odds = row.odds_ms_h
+    a_odds = row.odds_ms_a
+
+    if pd.isna(h_odds) or pd.isna(a_odds):
+        continue
+
+    h_band = get_odds_band_wide(h_odds)
+    a_band = get_odds_band_wide(a_odds)
+
+    # ── Look up HOME team's historical performance at this odds band ──
+    h_hist = [x for x in home_history[ht_id] if h_band[0] <= x[0] < h_band[1]]
+    if len(h_hist) >= MIN_MATCHES:
+        features[i, 0] = np.mean([x[1] for x in h_hist])  # ou15 rate
+        features[i, 1] = np.mean([x[2] for x in h_hist])  # ou25 rate
+        features[i, 2] = np.mean([x[3] for x in h_hist])  # ou35 rate
+        features[i, 3] = np.mean([x[4] for x in h_hist])  # btts rate
+        features[i, 4] = np.mean([x[5] for x in h_hist])  # win rate (home win = 1 if ms==0)
+        features[i, 5] = len(h_hist)
+        features[i, 16] = np.mean([x[6] for x in h_hist])  # avg goals scored
+        features[i, 19] = np.mean([x[7] for x in h_hist])  # avg goals conceded
+
+    # ── Look up AWAY team's historical performance at this odds band ──
+    a_hist = [x for x in away_history[at_id] if a_band[0] <= x[0] < a_band[1]]
+    if len(a_hist) >= MIN_MATCHES:
+        features[i, 6] = np.mean([x[1] for x in a_hist])   # ou15 rate
+        features[i, 7] = np.mean([x[2] for x in a_hist])   # ou25 rate
+        features[i, 8] = np.mean([x[3] for x in a_hist])   # ou35 rate
+        features[i, 9] = np.mean([x[4] for x in a_hist])   # btts rate
+        features[i, 10] = np.mean([x[5] for x in a_hist])  # away win rate
+        features[i, 11] = len(a_hist)
+        features[i, 17] = np.mean([x[6] for x in a_hist])  # avg goals scored (away)
+        features[i, 20] = np.mean([x[7] for x in a_hist])  # avg goals conceded (away)
+
+    # ── Combined signals ──
+    if not np.isnan(features[i, 0]) and not np.isnan(features[i, 6]):
+        features[i, 12] = (features[i, 0] + features[i, 6]) / 2   # combined ou15
+        features[i, 13] = (features[i, 1] + features[i, 7]) / 2   # combined ou25
+        features[i, 14] = (features[i, 2] + features[i, 8]) / 2   # combined ou35
+        features[i, 15] = (features[i, 3] + features[i, 9]) / 2   # combined btts
+        features[i, 18] = features[i, 16] + features[i, 17]       # combined goals
+
+    # ── Add THIS match to history (for future lookups) ──
+    ou15 = int(row.total_goals > 1.5)
+    ou25 = int(row.total_goals > 2.5)
+    ou35 = int(row.total_goals > 3.5)
+    btts = int(row.score_home > 0 and row.score_away > 0)
+    h_won = int(row.label_ms == 0)
+    a_won = int(row.label_ms == 2)
+
+    home_history[ht_id].append((h_odds, ou15, ou25, ou35, btts, h_won,
+                                 row.score_home, row.score_away))
+    away_history[at_id].append((a_odds, ou15, ou25, ou35, btts, a_won,
+                                 row.score_away, row.score_home))
+
+    if (i+1) % 20000 == 0:
+        valid = np.sum(~np.isnan(features[:i+1, 12]))
+        print(f"  Processed {i+1}/{N} matches, {valid} with combined signals")
+
+# Count valid features
+valid_mask = ~np.isnan(features[:, 12])
+print(f"\nMatches with combined conditional signals: {valid_mask.sum()} / {N}")
+
+# ─── BACKTEST: Walk-Forward ───
+print("\n" + "="*70)
+print("  CONDITIONAL FREQUENCY BACKTEST")
+print("="*70)
+
+# Only test on last 20% of data (to avoid early sparse data)
+test_start = int(N * 0.7)
+test_idx = range(test_start, N)
+test_valid = [i for i in test_idx if valid_mask[i]]
+print(f"Test window: matches {test_start}-{N} ({len(test_valid)} with signals)")
+
+# Strategy: bet on OU1.5 over when combined_ou15 > threshold
+markets = [
+    ('OU 1.5 Over', 'combined_ou15', 12, 'ou15_actual', 'odds_ou15_o'),
+    ('OU 2.5 Over', 'combined_ou25', 13, 'ou25_actual', 'odds_ou25_o'),
+    ('OU 3.5 Over', 'combined_ou35', 14, 'ou35_actual', 'odds_ou35_o'),
+    ('BTTS Yes',    'combined_btts', 15, 'btts_actual', 'odds_btts_y'),
+]
+
+for market_name, feat_key, feat_idx, label_col, odds_col in markets:
+    print(f"\n  ── {market_name} ──")
+
+    if odds_col not in df.columns:
+        print(f"    No odds column '{odds_col}', skipping")
+        continue
+
+    for threshold in [0.60, 0.65, 0.70, 0.75, 0.80, 0.85, 0.90]:
+        bets = 0
+        wins = 0
+        pnl = 0.0
+
+        for i in test_valid:
+            signal = features[i, feat_idx]
+            if np.isnan(signal) or signal < threshold:
+                continue
+            odds_val = df.iloc[i][odds_col]
+            if pd.isna(odds_val) or odds_val < 1.05:
+                continue
+            actual = df.iloc[i][label_col]
+            if pd.isna(actual):
+                continue
+
+            bets += 1
+            if actual == 1:
+                wins += 1
+                pnl += odds_val - 1
+            else:
+                pnl -= 1
+
+        if bets >= 20:
+            roi = pnl / bets * 100
+            hit = wins / bets * 100
+            ev = (wins/bets) * (pnl/wins + 1) if wins > 0 else 0
+            marker = " *** PROFITABLE ***" if roi > 0 else ""
+            print(f"    Threshold>{threshold:.2f}: {bets:5d} bets, "
+                  f"hit={hit:.1f}%, ROI={roi:+.1f}%{marker}")
+
+# Also test MS (1X2) market
+print(f"\n  ── Maç Sonucu (1X2) ──")
+# Home win when home_win_rate_at_band > X AND away team loses often at that band
+for threshold in [0.50, 0.55, 0.60, 0.65, 0.70, 0.75, 0.80]:
+    bets = wins = 0
+    pnl = 0.0
+    for i in test_valid:
+        h_wr = features[i, 4]   # home win rate at band
+        a_lr = 1 - features[i, 10] if not np.isnan(features[i, 10]) else np.nan  # away loss rate
+        if np.isnan(h_wr) or np.isnan(a_lr):
+            continue
+        combined = (h_wr + a_lr) / 2
+        if combined < threshold:
+            continue
+        odds_val = df.iloc[i].odds_ms_h
+        if pd.isna(odds_val) or odds_val < 1.10 or odds_val > 5.0:
+            continue
+        bets += 1
+        if df.iloc[i].label_ms == 0:
+            wins += 1
+            pnl += odds_val - 1
+        else:
+            pnl -= 1
+    if bets >= 20:
+        roi = pnl / bets * 100
+        hit = wins / bets * 100
+        marker = " *** PROFITABLE ***" if roi > 0 else ""
+        print(f"    Home win comb>{threshold:.2f}: {bets:5d} bets, "
+              f"hit={hit:.1f}%, ROI={roi:+.1f}%{marker}")
+
+# ─── DEEP DIVE: Best performing niches ───
+print("\n" + "="*70)
+print("  DEEP DIVE: Combined OU15 + Odds Value Filter")
+print("="*70)
+
+# The user's strategy: high confidence + the odds must pay enough
+for threshold in [0.75, 0.80, 0.85, 0.90]:
+    for min_odds in [1.10, 1.20, 1.30, 1.40]:
+        bets = wins = 0
+        pnl = 0.0
+        for i in test_valid:
+            signal = features[i, 12]  # combined ou15
+            if np.isnan(signal) or signal < threshold:
+                continue
+            odds_val = df.iloc[i].get('odds_ou15_o', np.nan) if 'odds_ou15_o' in df.columns else np.nan
+            if pd.isna(odds_val) or odds_val < min_odds:
+                continue
+            actual = df.iloc[i].ou15_actual
+
+            bets += 1
+            if actual == 1:
+                wins += 1
+                pnl += odds_val - 1
+            else:
+                pnl -= 1
+
+        if bets >= 30:
+            roi = pnl / bets * 100
+            hit = wins / bets * 100
+            if roi > -5:  # show near-profitable too
+                marker = " *** PROFITABLE ***" if roi > 0 else ""
+                print(f"  OU15 sig>{threshold:.2f} odds>{min_odds}: "
+                      f"{bets:5d} bets, hit={hit:.1f}%, ROI={roi:+.1f}%{marker}")
+
+# ─── Additional: Goal expectation accuracy ───
+print("\n" + "="*70)
+print("  GOAL PREDICTION ACCURACY")
+print("="*70)
+valid_goals = [i for i in test_valid if not np.isnan(features[i, 18])]
+if valid_goals:
+    pred_goals = [features[i, 18] for i in valid_goals]
+    actual_goals = [df.iloc[i].total_goals for i in valid_goals]
+    from sklearn.metrics import mean_absolute_error
+    mae = mean_absolute_error(actual_goals, pred_goals)
+    corr = np.corrcoef(pred_goals, actual_goals)[0, 1]
+    print(f"  Combined goal prediction MAE: {mae:.3f}")
+    print(f"  Correlation: {corr:.4f}")
+    print(f"  Avg predicted: {np.mean(pred_goals):.2f}, Avg actual: {np.mean(actual_goals):.2f}")
+
+    # Bucket analysis
+    print("\n  Goal prediction buckets:")
+    for low, high in [(0, 1.5), (1.5, 2.0), (2.0, 2.5), (2.5, 3.0), (3.0, 3.5), (3.5, 5.0)]:
+        bucket = [i for i, pg in zip(valid_goals, pred_goals) if low <= pg < high]
+        if len(bucket) >= 20:
+            avg_actual = np.mean([df.iloc[i].total_goals for i in bucket])
+            ou25_rate = np.mean([df.iloc[i].ou25_actual for i in bucket])
+            print(f"    Predicted {low:.1f}-{high:.1f}: n={len(bucket)}, "
+                  f"actual_avg={avg_actual:.2f}, OU25%={ou25_rate*100:.1f}%")
+
+print("\nDone!")
@@ -1071,13 +1071,13 @@ class FeatureExtractor:
        
        for mst, poss, sot, total_shots, corners, team_goals in rows:
            if poss and poss > 0:
-                poss_sum += poss
+                poss_sum += float(poss)
                poss_count += 1
-            sot_sum += sot or 0
-            shots_sum += total_shots or 0
-            corners_sum += corners or 0
+            sot_sum += float(sot or 0)
+            shots_sum += float(total_shots or 0)
+            corners_sum += float(corners or 0)
            
-            goals_scored += team_goals or 0
+            goals_scored += float(team_goals or 0)
        
        return {
            "possession": (poss_sum / poss_count / 100) if poss_count > 0 else 0.50,
@@ -0,0 +1,305 @@
+"""
+V27 Training Data Extraction - Value Sniper
+Extends V25 to ALL matches with odds (~104K).
+Adds rolling window, league quality, time, H2H, strength features.
+Usage: python3 scripts/extract_training_data_v27.py
+"""
+import os, sys, csv, time
+from collections import defaultdict
+
+AI_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+sys.path.insert(0, AI_DIR)
+
+from scripts.extract_training_data import (
+    BatchDataLoader as V25Loader,
+    FeatureExtractor as V25Extractor,
+    FEATURE_COLS as V25_COLS,
+    get_conn,
+)
+from features.rolling_features import (
+    calc_rolling_features, calc_league_quality,
+    calc_time_features, calc_advanced_h2h, calc_strength_diff,
+)
+
+OUTPUT = os.path.join(AI_DIR, "data", "training_data_v27.csv")
+os.makedirs(os.path.dirname(OUTPUT), exist_ok=True)
+
+V27_NEW = [
+    "home_rolling5_goals","home_rolling5_conceded",
+    "home_rolling10_goals","home_rolling10_conceded",
+    "home_rolling20_goals","home_rolling20_conceded",
+    "away_rolling5_goals","away_rolling5_conceded",
+    "away_rolling10_goals","away_rolling10_conceded",
+    "home_rolling5_cs","away_rolling5_cs",
+    "home_venue_goals","home_venue_conceded",
+    "away_venue_goals","away_venue_conceded",
+    "home_goal_trend","away_goal_trend",
+    "league_home_win_rate","league_draw_rate",
+    "league_btts_rate","league_ou25_rate",
+    "league_reliability_score",
+    "home_days_rest","away_days_rest",
+    "match_month","is_season_start","is_season_end",
+    "h2h_home_goals_avg","h2h_away_goals_avg",
+    "h2h_recent_trend","h2h_venue_advantage",
+    "attack_vs_defense_home","attack_vs_defense_away",
+    "xg_diff","form_momentum_interaction",
+    "elo_form_consistency","upset_x_elo_gap",
+]
+ALL_COLS = V25_COLS + V27_NEW
+
+
+class V27Loader(V25Loader):
+    """Load ALL matches with odds, not just top leagues."""
+    def __init__(self, conn):
+        super().__init__(conn, [])
+        self.league_matches_cache = {}
+
+    def _load_matches(self):
+        self.cur.execute("""
+            SELECT m.id, m.home_team_id, m.away_team_id,
+                   m.score_home, m.score_away,
+                   m.ht_score_home, m.ht_score_away,
+                   m.mst_utc, m.league_id,
+                   ht.name, at.name, l.name
+            FROM matches m
+            JOIN teams ht ON m.home_team_id = ht.id
+            JOIN teams at ON m.away_team_id = at.id
+            JOIN leagues l ON m.league_id = l.id
+            WHERE m.status='FT' AND m.score_home IS NOT NULL
+              AND m.sport='football'
+              AND EXISTS(SELECT 1 FROM odd_categories oc WHERE oc.match_id=m.id)
+            ORDER BY m.mst_utc ASC
+        """)
+        self.matches = self.cur.fetchall()
+
+    def _load_odds(self):
+        self.cur.execute("""
+            SELECT oc.match_id, oc.name, os.name, os.odd_value
+            FROM odd_selections os
+            JOIN odd_categories oc ON os.odd_category_db_id=oc.db_id
+            JOIN matches m ON oc.match_id=m.id
+            WHERE m.status='FT' AND m.sport='football'
+        """)
+        for mid, cat, sel, val in self.cur.fetchall():
+            try:
+                v = float(val) if val else 0
+                if v <= 0 or not cat or not sel: continue
+                if mid not in self.odds_cache: self.odds_cache[mid] = {}
+                c = cat.lower().strip()
+                s = sel.lower().strip()
+                o = self.odds_cache[mid]
+                if c == 'maç sonucu':
+                    if sel=='1': o['ms_h']=v
+                    elif sel in('0','X'): o['ms_d']=v
+                    elif sel=='2': o['ms_a']=v
+                elif c == '1. yarı sonucu':
+                    if sel=='1': o['ht_ms_h']=v
+                    elif sel in('0','X'): o['ht_ms_d']=v
+                    elif sel=='2': o['ht_ms_a']=v
+                elif c == 'karşılıklı gol':
+                    if 'var' in s: o['btts_y']=v
+                    elif 'yok' in s: o['btts_n']=v
+                elif c == '2,5 alt/üst':
+                    if 'alt' in s: o['ou25_u']=v
+                    elif 'üst' in s: o['ou25_o']=v
+                elif c == '1,5 alt/üst':
+                    if 'alt' in s: o['ou15_u']=v
+                    elif 'üst' in s: o['ou15_o']=v
+                elif c == '3,5 alt/üst':
+                    if 'alt' in s: o['ou35_u']=v
+                    elif 'üst' in s: o['ou35_o']=v
+                elif c == '0,5 alt/üst':
+                    if 'alt' in s: o['ou05_u']=v
+                    elif 'üst' in s: o['ou05_o']=v
+                elif c == '1. yarı 0,5 alt/üst':
+                    if 'alt' in s: o['ht_ou05_u']=v
+                    elif 'üst' in s: o['ht_ou05_o']=v
+                elif c == '1. yarı 1,5 alt/üst':
+                    if 'alt' in s: o['ht_ou15_u']=v
+                    elif 'üst' in s: o['ht_ou15_o']=v
+            except (ValueError, TypeError): pass
+
+    def _load_league_stats(self):
+        self.cur.execute("""
+            SELECT league_id,
+                   AVG(score_home+score_away), AVG(CASE WHEN score_home=0 AND score_away=0 THEN 1.0 ELSE 0.0 END),
+                   COUNT(*)
+            FROM matches WHERE status='FT' AND score_home IS NOT NULL AND sport='football'
+            GROUP BY league_id
+        """)
+        for lid, ag, zr, cnt in self.cur.fetchall():
+            self.league_stats_cache[lid] = {
+                "avg_goals": float(ag) if ag else 2.5,
+                "zero_rate": float(zr) if zr else 0.07,
+                "match_count": cnt
+            }
+
+    def _load_squad_data(self):
+        self.cur.execute("""
+            SELECT mpp.match_id, mpp.team_id,
+                   COUNT(*) FILTER(WHERE mpp.is_starting=true),
+                   COUNT(*),
+                   COUNT(*) FILTER(WHERE mpp.is_starting=true
+                       AND LOWER(COALESCE(mpp.position::TEXT,''))~'(forward|fwd|forvet|striker)')
+            FROM match_player_participation mpp
+            JOIN matches m ON mpp.match_id=m.id
+            WHERE m.status='FT' AND m.sport='football'
+            GROUP BY mpp.match_id, mpp.team_id
+        """)
+        part = {}
+        for mid,tid,st,tot,fwd in self.cur.fetchall():
+            part[(mid,tid)]={'starting_count':st or 0,'total_squad':tot or 0,'fwd_count':fwd or 0}
+
+        self.cur.execute("""
+            SELECT mpe.match_id, mpe.team_id,
+                   COUNT(*) FILTER(WHERE mpe.event_type='goal' AND COALESCE(mpe.event_subtype,'') NOT ILIKE '%%penaltı kaçırma%%'),
+                   COUNT(DISTINCT mpe.assist_player_id) FILTER(WHERE mpe.event_type='goal' AND mpe.assist_player_id IS NOT NULL),
+                   COUNT(DISTINCT mpe.player_id) FILTER(WHERE mpe.event_type='goal' AND COALESCE(mpe.event_subtype,'') NOT ILIKE '%%penaltı kaçırma%%')
+            FROM match_player_events mpe
+            JOIN matches m ON mpe.match_id=m.id
+            WHERE m.status='FT' AND m.sport='football'
+            GROUP BY mpe.match_id, mpe.team_id
+        """)
+        evts = {}
+        for mid,tid,g,a,sc in self.cur.fetchall():
+            evts[(mid,tid)]={'goals':g or 0,'assists':a or 0,'unique_scorers':sc or 0}
+
+        self.cur.execute("""
+            SELECT mpe.team_id, mpe.player_id, COUNT(*)
+            FROM match_player_events mpe JOIN matches m ON mpe.match_id=m.id
+            WHERE m.status='FT' AND m.sport='football' AND mpe.event_type='goal'
+              AND COALESCE(mpe.event_subtype,'') NOT ILIKE '%%penaltı kaçırma%%'
+            GROUP BY mpe.team_id, mpe.player_id HAVING COUNT(*)>=3
+        """)
+        kp_by_team = defaultdict(set)
+        for tid,pid,_ in self.cur.fetchall(): kp_by_team[tid].add(pid)
+
+        self.cur.execute("""
+            SELECT mpp.match_id, mpp.team_id, mpp.player_id
+            FROM match_player_participation mpp JOIN matches m ON mpp.match_id=m.id
+            WHERE mpp.is_starting=true AND m.status='FT' AND m.sport='football'
+        """)
+        starters = defaultdict(list)
+        for mid,tid,pid in self.cur.fetchall(): starters[(mid,tid)].append(pid)
+
+        for key in set(part)|set(evts):
+            mid,tid = key
+            p = part.get(key,{'starting_count':0,'total_squad':0,'fwd_count':0})
+            e = evts.get(key,{'goals':0,'assists':0,'unique_scorers':0})
+            s = starters.get(key,[])
+            kp_in = sum(1 for x in s if x in kp_by_team.get(tid,set()))
+            kp_tot = len(kp_by_team.get(tid,set()))
+            kp_miss = max(0, kp_tot - kp_in)
+            sq = p['starting_count']*0.3 + e['goals']*2.0 + e['assists']*1.0 + kp_in*3.0 + p['fwd_count']*1.5
+            mi = min(kp_miss/max(kp_tot,1), 1.0)
+            self.squad_cache[key] = {'squad_quality':sq,'key_players':kp_in,'missing_impact':mi,'goals_form':e['goals']}
+
+    def _load_cards_data(self):
+        self.cur.execute("""
+            SELECT mpe.match_id,
+                   SUM(CASE WHEN mpe.event_type::text LIKE '%%yellow_card%%' THEN 1
+                            WHEN mpe.event_type::text LIKE '%%red_card%%' THEN 2 ELSE 1 END)
+            FROM match_player_events mpe JOIN matches m ON mpe.match_id=m.id
+            WHERE m.status='FT' AND m.sport='football' AND mpe.event_type::text LIKE '%%card%%'
+            GROUP BY mpe.match_id
+        """)
+        for mid, cw in self.cur.fetchall():
+            self.cards_cache[mid] = float(cw) if cw else 0.0
+
+    def load_league_matches(self):
+        for m in self.matches:
+            lid = m[8]
+            if lid not in self.league_matches_cache:
+                self.league_matches_cache[lid] = []
+            self.league_matches_cache[lid].append((m[7],None,m[3],m[4],None))
+
+
+class V27Extractor(V25Extractor):
+    """Adds V27 features on top of V25."""
+    def _extract_one(self, mid, hid, aid, sh, sa, hth, hta, mst, lid,
+                     hn, an, ln):
+        row = super()._extract_one(mid,hid,aid,sh,sa,hth,hta,mst,lid,hn,an,ln)
+        if not row: return None
+
+        hm = self.loader.team_matches.get(hid,[])
+        am = self.loader.team_matches.get(aid,[])
+
+        hr = calc_rolling_features(hm, mst, True)
+        ar = calc_rolling_features(am, mst, False)
+        for pfx,r in [("home",hr),("away",ar)]:
+            row[f"{pfx}_rolling5_goals"]=r["rolling5_goals_avg"]
+            row[f"{pfx}_rolling5_conceded"]=r["rolling5_conceded_avg"]
+            row[f"{pfx}_rolling10_goals"]=r["rolling10_goals_avg"]
+            row[f"{pfx}_rolling10_conceded"]=r["rolling10_conceded_avg"]
+            row[f"{pfx}_rolling20_goals"]=r["rolling20_goals_avg"]
+            row[f"{pfx}_rolling20_conceded"]=r["rolling20_conceded_avg"]
+            row[f"{pfx}_rolling5_cs"]=r["rolling5_clean_sheets"]
+            row[f"{pfx}_venue_goals"]=r["venue_goals_avg"]
+            row[f"{pfx}_venue_conceded"]=r["venue_conceded_avg"]
+            row[f"{pfx}_goal_trend"]=r["goal_trend"]
+
+        lb = [x for x in self.loader.league_matches_cache.get(lid,[]) if x[0]<mst]
+        lq = calc_league_quality(lb)
+        for k,v in lq.items(): row[k]=v
+
+        ht = calc_time_features(hm, mst)
+        at = calc_time_features(am, mst)
+        row["home_days_rest"]=ht["days_rest"]
+        row["away_days_rest"]=at["days_rest"]
+        row["match_month"]=ht["match_month"]
+        row["is_season_start"]=ht["is_season_start"]
+        row["is_season_end"]=ht["is_season_end"]
+
+        h2h = calc_advanced_h2h(hm, hid, aid, mst)
+        for k,v in h2h.items(): row[k]=v
+
+        sd = calc_strength_diff(
+            {"goals_avg":row.get("home_goals_avg",1.3),"conceded_avg":row.get("home_conceded_avg",1.2),"scoring_rate":row.get("home_scoring_rate",0.75)},
+            {"goals_avg":row.get("away_goals_avg",1.3),"conceded_avg":row.get("away_conceded_avg",1.2),"scoring_rate":row.get("away_scoring_rate",0.75)},
+            self.elo_ratings[hid], self.elo_ratings[aid],
+            row.get("home_momentum_score",0.5), row.get("away_momentum_score",0.5),
+            row.get("upset_potential",0.0),
+        )
+        row.update(sd)
+        return row
+
+
+def main():
+    print("🚀 V27 Value Sniper — Training Data Extraction")
+    print("="*60)
+    t0 = time.time()
+    conn = get_conn()
+
+    print("\n📦 Loading ALL odds-bearing matches...")
+    loader = V27Loader(conn)
+    loader.load_all()
+    loader.load_league_matches()
+    print(f"  Matches: {len(loader.matches)}")
+    print(f"  Leagues: {len(loader.league_stats_cache)}")
+    print(f"  Odds: {len(loader.odds_cache)}")
+
+    ext = V27Extractor(conn, loader)
+    rows = ext.extract_all()
+    if not rows:
+        print("❌ No data!"); return
+
+    print(f"\n💾 Writing {len(rows)} rows...")
+    with open(OUTPUT,"w",newline="",encoding="utf-8") as f:
+        w = csv.DictWriter(f, fieldnames=ALL_COLS, extrasaction='ignore')
+        w.writeheader(); w.writerows(rows)
+
+    n = len(rows)
+    wo = sum(1 for r in rows if r.get("odds_ms_h",0)>0)
+    md = defaultdict(int)
+    for r in rows: md[r["label_ms"]]+=1
+    print(f"\n📊 Summary:")
+    print(f"   Rows: {n}")
+    print(f"   With odds: {wo} ({wo/n*100:.1f}%)")
+    print(f"   Features: {len(ALL_COLS)} ({len(V25_COLS)} V25 + {len(V27_NEW)} new)")
+    print(f"   MS: H={md[0]/n*100:.1f}% D={md[1]/n*100:.1f}% A={md[2]/n*100:.1f}%")
+    print(f"   Time: {(time.time()-t0)/60:.1f}min")
+    print(f"\n✅ Done! → {OUTPUT}")
+    conn.close()
+
+if __name__=="__main__":
+    main()
@@ -0,0 +1,317 @@
+"""
+Strategy Generator — Senin Excel mantığını DB üzerinde otomatize eder.
+
+Mantık:
+1. Ev sahibi takım X, evinde oran bandı Y'de oynadığında → OU1.5/OU2.5/BTTS oranları
+2. Deplasman takım Z, deplasmanda oran bandı W'de oynadığında → OU1.5/OU2.5/BTTS oranları
+3. İkisi de yüksekse → STRATEJİ ÜRET
+
+Çıktı: Her maç için hangi bahis oynanabilir, neden, ve geçmiş başarı oranı
+"""
+import psycopg2
+import pandas as pd
+import numpy as np
+from collections import defaultdict
+from datetime import datetime
+
+# DB connection
+conn = psycopg2.connect(
+    host="localhost",
+    port=15432,
+    dbname="boilerplate_db",
+    user="suggestbet",
+    password="SuGGesT2026SecuRe"
+)
+
+print("=" * 70)
+print("  STRATEGY GENERATOR — Veritabanından Strateji Üretimi")
+print("=" * 70)
+
+# 1. Tüm biten maçları, takım adları ve MS oranlarıyla çek
+query = """
+SELECT 
+    m.id as match_id,
+    m.home_team_id,
+    m.away_team_id,
+    m.league_id,
+    m.score_home,
+    m.score_away,
+    m.mst_utc,
+    ht.name as home_team,
+    at.name as away_team,
+    l.name as league_name
+FROM matches m
+JOIN teams ht ON m.home_team_id = ht.id
+JOIN teams at ON m.away_team_id = at.id
+JOIN leagues l ON m.league_id = l.id
+WHERE m.status = 'FT' 
+AND m.score_home IS NOT NULL
+ORDER BY m.mst_utc ASC
+"""
+df = pd.read_sql(query, conn)
+print(f"\nToplam biten maç: {len(df):,}")
+
+# 2. Tüm oranları çek (MS, OU25, BTTS, OU15)
+odds_query = """
+SELECT 
+    oc.match_id,
+    oc.name as market,
+    os.name as selection,
+    CAST(os.odd_value AS DECIMAL) as odds
+FROM odd_categories oc
+JOIN odd_selections os ON os.odd_category_db_id = oc.db_id
+WHERE oc.name IN (
+    'Maç Sonucu', 
+    '2,5 Alt/Üst', 
+    '1,5 Alt/Üst',
+    '3,5 Alt/Üst',
+    'Karşılıklı Gol'
+)
+"""
+odds_df = pd.read_sql(odds_query, conn)
+print(f"Toplam oran kaydı: {len(odds_df):,}")
+
+# Pivot: her maç için oranları sütunlara çevir
+def get_odds(match_id, market, selection):
+    mask = (odds_df.match_id == match_id) & (odds_df.market == market) & (odds_df.selection == selection)
+    vals = odds_df.loc[mask, 'odds']
+    return float(vals.iloc[0]) if len(vals) > 0 else None
+
+# Daha verimli: oran lookup dict oluştur
+print("Oran lookup oluşturuluyor...")
+odds_lookup = {}
+for _, row in odds_df.iterrows():
+    key = (row.match_id, row.market, row.selection)
+    odds_lookup[key] = float(row.odds)
+
+def get_o(mid, market, sel):
+    return odds_lookup.get((mid, market, sel))
+
+# 3. Her maça oranları ekle
+print("Maçlara oranlar ekleniyor...")
+df['odds_ms_h'] = df.match_id.map(lambda x: get_o(x, 'Maç Sonucu', '1'))
+df['odds_ms_a'] = df.match_id.map(lambda x: get_o(x, 'Maç Sonucu', '2'))
+df['odds_ms_d'] = df.match_id.map(lambda x: get_o(x, 'Maç Sonucu', '0'))
+df['odds_ou25_o'] = df.match_id.map(lambda x: get_o(x, '2,5 Alt/Üst', 'Üst'))
+df['odds_ou25_u'] = df.match_id.map(lambda x: get_o(x, '2,5 Alt/Üst', 'Alt'))
+df['odds_ou15_o'] = df.match_id.map(lambda x: get_o(x, '1,5 Alt/Üst', 'Üst'))
+df['odds_ou15_u'] = df.match_id.map(lambda x: get_o(x, '1,5 Alt/Üst', 'Alt'))
+df['odds_ou35_o'] = df.match_id.map(lambda x: get_o(x, '3,5 Alt/Üst', 'Üst'))
+df['odds_ou35_u'] = df.match_id.map(lambda x: get_o(x, '3,5 Alt/Üst', 'Alt'))
+df['odds_btts_y'] = df.match_id.map(lambda x: get_o(x, 'Karşılıklı Gol', 'Var'))
+df['odds_btts_n'] = df.match_id.map(lambda x: get_o(x, 'Karşılıklı Gol', 'Yok'))
+
+# Sonuç hesapla
+df['total_goals'] = df.score_home + df.score_away
+df['ou15'] = (df.total_goals > 1).astype(int)
+df['ou25'] = (df.total_goals > 2).astype(int)
+df['ou35'] = (df.total_goals > 3).astype(int)
+df['btts'] = ((df.score_home > 0) & (df.score_away > 0)).astype(int)
+
+print(f"Oranı olan maç sayısı: {df.odds_ms_h.notna().sum():,}")
+
+# 4. ORAN BANDI fonksiyonu
+def odds_band(odds):
+    if pd.isna(odds): return None
+    if odds < 1.30: return '1.00-1.30'
+    if odds < 1.50: return '1.30-1.50'
+    if odds < 1.80: return '1.50-1.80'
+    if odds < 2.20: return '1.80-2.20'
+    if odds < 2.80: return '2.20-2.80'
+    if odds < 4.00: return '2.80-4.00'
+    if odds < 6.00: return '4.00-6.00'
+    return '6.00+'
+
+# 5. STRATEJİ: Expanding window — sadece geçmiş veriye bakarak tahmin
+print("\n" + "=" * 70)
+print("  STRATEJİ BACKTEST — Expanding Window")
+print("=" * 70)
+
+# Ev sahibi geçmişi: {team_id: {odds_band: [ou15, ou25, btts, ou35, ...]}}
+home_history = defaultdict(lambda: defaultdict(list))
+away_history = defaultdict(lambda: defaultdict(list))
+
+MIN_MATCHES = 8  # Minimum geçmiş maç sayısı
+TEST_PCT = 0.30  # Son %30 test
+N = len(df)
+test_start = int(N * (1 - TEST_PCT))
+
+results = {
+    'ou15_over': [], 'ou25_over': [], 'ou35_over': [],
+    'btts_yes': [], 'btts_no': [],
+    'ou25_under': [], 'ou15_under': [],
+    'ms_home': []
+}
+
+for i in range(N):
+    row = df.iloc[i]
+    h_odds = row.odds_ms_h
+    a_odds = row.odds_ms_a
+    
+    if pd.isna(h_odds) or pd.isna(a_odds):
+        continue
+    
+    h_band = odds_band(h_odds)
+    a_band = odds_band(a_odds)
+    
+    # TEST: sadece test bölümünde bahis yap
+    if i >= test_start:
+        h_hist = home_history[row.home_team_id][h_band]
+        a_hist = away_history[row.away_team_id][a_band]
+        
+        if len(h_hist) >= MIN_MATCHES and len(a_hist) >= MIN_MATCHES:
+            # Ev sahibi bu oran bandında ne yapmış?
+            h_ou15 = np.mean([x[0] for x in h_hist])
+            h_ou25 = np.mean([x[1] for x in h_hist])
+            h_ou35 = np.mean([x[2] for x in h_hist])
+            h_btts = np.mean([x[3] for x in h_hist])
+            h_win  = np.mean([x[4] for x in h_hist])
+            
+            # Deplasman bu oran bandında ne yapmış?
+            a_ou15 = np.mean([x[0] for x in a_hist])
+            a_ou25 = np.mean([x[1] for x in a_hist])
+            a_ou35 = np.mean([x[2] for x in a_hist])
+            a_btts = np.mean([x[3] for x in a_hist])
+            a_loss = np.mean([x[4] for x in a_hist])  # deplasman kaybetme oranı
+            
+            # KOMBİNE SİNYAL
+            sig_ou15 = (h_ou15 + a_ou15) / 2
+            sig_ou25 = (h_ou25 + a_ou25) / 2
+            sig_ou35 = (h_ou35 + a_ou35) / 2
+            sig_btts = (h_btts + a_btts) / 2
+            sig_hw = (h_win + a_loss) / 2  # ev kazanma + deplasman kaybetme
+            
+            base = {
+                'match': f"{row.home_team} vs {row.away_team}",
+                'league': row.league_name,
+                'home_team': row.home_team,
+                'away_team': row.away_team,
+                'h_band': h_band,
+                'a_band': a_band,
+                'h_n': len(h_hist),
+                'a_n': len(a_hist),
+            }
+            
+            # OU 1.5 OVER
+            if sig_ou15 >= 0.85 and row.odds_ou15_o and row.odds_ou15_o > 1.01:
+                results['ou15_over'].append({
+                    **base, 'signal': sig_ou15, 'odds': row.odds_ou15_o,
+                    'won': row.ou15 == 1, 'actual_goals': row.total_goals,
+                    'h_sig': h_ou15, 'a_sig': a_ou15
+                })
+            
+            # OU 2.5 OVER
+            if sig_ou25 >= 0.70 and row.odds_ou25_o and row.odds_ou25_o > 1.10:
+                results['ou25_over'].append({
+                    **base, 'signal': sig_ou25, 'odds': row.odds_ou25_o,
+                    'won': row.ou25 == 1, 'actual_goals': row.total_goals,
+                    'h_sig': h_ou25, 'a_sig': a_ou25
+                })
+            
+            # OU 3.5 OVER
+            if sig_ou35 >= 0.60 and row.odds_ou35_o and row.odds_ou35_o > 1.20:
+                results['ou35_over'].append({
+                    **base, 'signal': sig_ou35, 'odds': row.odds_ou35_o,
+                    'won': row.ou35 == 1, 'actual_goals': row.total_goals,
+                    'h_sig': h_ou35, 'a_sig': a_ou35
+                })
+            
+            # BTTS YES
+            if sig_btts >= 0.70 and row.odds_btts_y and row.odds_btts_y > 1.10:
+                results['btts_yes'].append({
+                    **base, 'signal': sig_btts, 'odds': row.odds_btts_y,
+                    'won': row.btts == 1, 'actual_goals': row.total_goals,
+                    'h_sig': h_btts, 'a_sig': a_btts
+                })
+            
+            # OU 2.5 UNDER (düşük gol beklentisi)
+            if sig_ou25 <= 0.30 and row.odds_ou25_u and row.odds_ou25_u > 1.10:
+                results['ou25_under'].append({
+                    **base, 'signal': 1-sig_ou25, 'odds': row.odds_ou25_u,
+                    'won': row.ou25 == 0, 'actual_goals': row.total_goals,
+                    'h_sig': 1-h_ou25, 'a_sig': 1-a_ou25
+                })
+            
+            # MS HOME WIN (ev sahibi kazanma)
+            if sig_hw >= 0.75 and row.odds_ms_h and 1.10 < row.odds_ms_h < 3.50:
+                results['ms_home'].append({
+                    **base, 'signal': sig_hw, 'odds': row.odds_ms_h,
+                    'won': row.score_home > row.score_away,
+                    'actual_goals': row.total_goals,
+                    'h_sig': h_win, 'a_sig': a_loss
+                })
+    
+    # History güncelle (her zaman)
+    home_history[row.home_team_id][h_band].append((
+        row.ou15, row.ou25, row.ou35, row.btts,
+        int(row.score_home > row.score_away)
+    ))
+    away_history[row.away_team_id][a_band].append((
+        row.ou15, row.ou25, row.ou35, row.btts,
+        int(row.score_away < row.score_home)  # deplasman kaybetme
+    ))
+
+# 6. SONUÇLARI YAZIDIR
+print(f"\nTest bölümü: son {TEST_PCT*100:.0f}% ({N - test_start:,} maç)")
+print(f"Minimum geçmiş: {MIN_MATCHES} maç\n")
+
+for market_name, bets in results.items():
+    if not bets:
+        print(f"\n  {market_name}: sinyal yok")
+        continue
+    
+    bdf = pd.DataFrame(bets)
+    total = len(bdf)
+    wins = bdf.won.sum()
+    hit = wins / total * 100
+    pnl = (bdf.won * (bdf.odds - 1) - (~bdf.won) * 1).sum()
+    roi = pnl / total * 100
+    avg_odds = bdf.odds.mean()
+    
+    print(f"\n{'='*60}")
+    print(f"  {market_name.upper()}")
+    print(f"{'='*60}")
+    print(f"  Toplam bahis: {total}")
+    print(f"  Kazanan: {wins} ({hit:.1f}%)")
+    print(f"  Ortalama odds: {avg_odds:.2f}")
+    print(f"  PnL: {pnl:+.1f} birim")
+    print(f"  ROI: {roi:+.1f}%")
+    
+    # Farklı sinyal eşiklerinde performans
+    print(f"\n  Sinyal eşik analizi:")
+    for threshold in [0.70, 0.75, 0.80, 0.85, 0.90, 0.95]:
+        sub = bdf[bdf.signal >= threshold]
+        if len(sub) < 5: continue
+        w = sub.won.sum()
+        p = (sub.won * (sub.odds - 1) - (~sub.won) * 1).sum()
+        r = p / len(sub) * 100
+        star = ' ✅ PROFIT' if r > 0 else (' ⚖️  BE' if r > -3 else '')
+        print(f"    ≥{threshold:.2f}: {len(sub):5d} bahis, hit={w/len(sub)*100:.1f}%, ROI={r:+.1f}%{star}")
+    
+    # En iyi 10 örnek (kazanan)
+    if wins > 0:
+        best = bdf[bdf.won].nlargest(min(5, wins), 'signal')
+        print(f"\n  Örnek kazanan bahisler:")
+        for _, b in best.iterrows():
+            print(f"    {b.home_team} vs {b.away_team} ({b.league})")
+            print(f"      Ev {b.h_band} ({b.h_sig:.0%}) + Dep {b.a_band} ({b.a_sig:.0%}) → sinyal={b.signal:.0%}, odds={b.odds:.2f}, gol={b.actual_goals:.0f}")
+
+# 7. ÖZET TABLO
+print("\n\n" + "=" * 70)
+print("  ÖZET TABLO")
+print("=" * 70)
+print(f"{'Market':<15} {'Bahis':>6} {'Hit':>7} {'ROI':>8} {'Avg Odds':>9}")
+print("-" * 50)
+for market_name, bets in results.items():
+    if not bets: continue
+    bdf = pd.DataFrame(bets)
+    total = len(bdf)
+    wins = bdf.won.sum()
+    hit = wins / total * 100
+    pnl = (bdf.won * (bdf.odds - 1) - (~bdf.won) * 1).sum()
+    roi = pnl / total * 100
+    avg_odds = bdf.odds.mean()
+    print(f"{market_name:<15} {total:>6} {hit:>6.1f}% {roi:>+7.1f}% {avg_odds:>8.2f}")
+
+conn.close()
+print("\n✅ Tamamlandı!")
@@ -0,0 +1,480 @@
+"""
+V27 Value Sniper — PRO Training Script
+========================================
+KEY INSIGHT: Train model WITHOUT odds to get independent probability.
+Then compare with market odds to find genuine value edges.
+
+Strategy:
+  Stage A: "Fundamentals Model" — odds-free, learns from ELO/form/rolling/H2H
+  Stage B: "Value Model" — uses fundamentals + odds disagreement as features
+  Stage C: Multi-market — 1X2, O/U 2.5, BTTS
+  Stage D: Walk-forward backtest with Kelly sizing
+"""
+import os, sys, json, pickle, time, warnings
+import numpy as np
+import pandas as pd
+from pathlib import Path
+from sklearn.metrics import accuracy_score, log_loss
+from sklearn.isotonic import IsotonicRegression
+
+warnings.filterwarnings("ignore")
+
+AI_DIR = Path(__file__).resolve().parent.parent
+DATA_CSV = AI_DIR / "data" / "training_data_v27.csv"
+MODELS_DIR = AI_DIR / "models" / "v27"
+MODELS_DIR.mkdir(parents=True, exist_ok=True)
+
+# ── Leakage & category definitions ──
+LEAKAGE_COLS = [
+    "total_goals", "goal_diff", "ht_total_goals", "ht_goal_diff",
+    "score_home", "score_away", "ht_score_home", "ht_score_away",
+    "home_goals_form", "away_goals_form",
+    "home_squad_quality", "away_squad_quality", "squad_diff",
+    "home_key_players", "away_key_players",
+    "home_missing_impact", "away_missing_impact",
+    "referee_home_bias", "referee_avg_goals", "referee_cards_total",
+    "referee_avg_yellow", "referee_avg_red", "referee_penalty_rate",
+    "referee_over25_rate", "referee_experience", "referee_matches",
+]
+LABEL_COLS = [c for c in [] ]  # populated dynamically
+META_COLS = ["match_id", "league_name", "home_team", "away_team"]
+ODDS_COLS_PATTERNS = ["odds_", "implied_"]
+
+
+def get_odds_cols(df):
+    return [c for c in df.columns if any(c.startswith(p) for p in ODDS_COLS_PATTERNS)]
+
+
+def get_label_cols(df):
+    return [c for c in df.columns if c.startswith("label_")]
+
+
+def get_clean_features(df):
+    """Features with NO odds and NO leakage — pure fundamentals."""
+    odds = set(get_odds_cols(df))
+    labels = set(get_label_cols(df))
+    exclude = odds | labels | set(LEAKAGE_COLS) | set(META_COLS)
+    # Also exclude ID columns
+    exclude |= {c for c in df.columns if c.endswith("_id") and c != "match_id"}
+    feats = [c for c in df.columns if c not in exclude]
+    # Keep only numeric
+    feats = [c for c in feats if pd.to_numeric(df[c], errors="coerce").notna().sum() > len(df)*0.3]
+    return feats
+
+
+def load_data():
+    print(f"Loading {DATA_CSV}...")
+    df = pd.read_csv(DATA_CSV, low_memory=False)
+    print(f"  Raw: {len(df)} rows")
+
+    # Ensure odds exist for value comparison
+    for c in ["odds_ms_h","odds_ms_d","odds_ms_a"]:
+        df[c] = pd.to_numeric(df[c], errors="coerce")
+    df = df.dropna(subset=["odds_ms_h","odds_ms_d","odds_ms_a"])
+    df = df[(df.odds_ms_h>1.01)&(df.odds_ms_d>1.01)&(df.odds_ms_a>1.01)]
+
+    # OU25 odds
+    for c in ["odds_ou25_over","odds_ou25_under"]:
+        if c in df.columns:
+            df[c] = pd.to_numeric(df[c], errors="coerce")
+
+    # Implied probabilities
+    margin = 1/df.odds_ms_h + 1/df.odds_ms_d + 1/df.odds_ms_a
+    df["implied_h"] = (1/df.odds_ms_h)/margin
+    df["implied_d"] = (1/df.odds_ms_d)/margin
+    df["implied_a"] = (1/df.odds_ms_a)/margin
+
+    print(f"  After filter: {len(df)} rows")
+    return df
+
+
+def temporal_split(df, val_ratio=0.15, test_ratio=0.10):
+    n = len(df)
+    tr = int(n*(1-val_ratio-test_ratio))
+    va = int(n*(1-test_ratio))
+    return df.iloc[:tr].copy(), df.iloc[tr:va].copy(), df.iloc[va:].copy()
+
+
+# ═══════════════════════════════════════════════════════════════════
+#  STAGE A: Fundamentals-Only Model (NO ODDS)
+# ═══════════════════════════════════════════════════════════════════
+def train_fundamentals_model(X_tr, y_tr, X_va, y_va, feat_cols, market="ms"):
+    """Train ensemble WITHOUT odds features."""
+    models = {}
+    n_class = 3 if market == "ms" else 2
+
+    # XGBoost
+    try:
+        import xgboost as xgb
+        print(f"  [XGB] Training {market.upper()}...")
+        dtrain = xgb.DMatrix(X_tr, label=y_tr, feature_names=feat_cols)
+        dval = xgb.DMatrix(X_va, label=y_va, feature_names=feat_cols)
+        params = {
+            "objective": "multi:softprob" if n_class==3 else "binary:logistic",
+            "eval_metric": "mlogloss" if n_class==3 else "logloss",
+            "max_depth": 6, "learning_rate": 0.02, "subsample": 0.75,
+            "colsample_bytree": 0.75, "min_child_weight": 10,
+            "reg_alpha": 0.5, "reg_lambda": 2.0,
+            "verbosity": 0, "tree_method": "hist",
+        }
+        if n_class == 3:
+            params["num_class"] = 3
+        m = xgb.train(params, dtrain, num_boost_round=2000,
+                      evals=[(dval,"val")], early_stopping_rounds=80,
+                      verbose_eval=False)
+        p = m.predict(dval)
+        if n_class == 2:
+            p = np.column_stack([1-p, p])
+        acc = accuracy_score(y_va, p.argmax(1))
+        print(f"    acc={acc:.4f}")
+        models["xgb"] = m
+    except ImportError:
+        pass
+
+    # LightGBM
+    try:
+        import lightgbm as lgb
+        print(f"  [LGB] Training {market.upper()}...")
+        ds_tr = lgb.Dataset(X_tr, label=y_tr)
+        ds_va = lgb.Dataset(X_va, label=y_va, reference=ds_tr)
+        par = {
+            "objective": "multiclass" if n_class==3 else "binary",
+            "metric": "multi_logloss" if n_class==3 else "binary_logloss",
+            "num_leaves": 48, "learning_rate": 0.02,
+            "feature_fraction": 0.7, "bagging_fraction": 0.7,
+            "bagging_freq": 1, "min_child_samples": 30,
+            "lambda_l1": 0.5, "lambda_l2": 2.0, "verbose": -1,
+        }
+        if n_class == 3:
+            par["num_class"] = 3
+        m = lgb.train(par, ds_tr, 2000, valid_sets=[ds_va],
+                      callbacks=[lgb.early_stopping(80, verbose=False)])
+        p = m.predict(X_va)
+        if n_class == 2:
+            p = np.column_stack([1-p, p])
+        acc = accuracy_score(y_va, p.argmax(1))
+        print(f"    acc={acc:.4f}")
+        models["lgb"] = m
+    except ImportError:
+        pass
+
+    # CatBoost
+    try:
+        from catboost import CatBoostClassifier
+        print(f"  [CB] Training {market.upper()}...")
+        m = CatBoostClassifier(
+            iterations=2000, learning_rate=0.02, depth=6,
+            l2_leaf_reg=5, loss_function="MultiClass" if n_class==3 else "Logloss",
+            early_stopping_rounds=80, verbose=0, task_type="CPU",
+            **({"classes_count": 3} if n_class==3 else {}),
+        )
+        m.fit(X_tr, y_tr, eval_set=(X_va, y_va))
+        p = m.predict_proba(X_va)
+        acc = accuracy_score(y_va, p.argmax(1))
+        print(f"    acc={acc:.4f}")
+        models["cb"] = m
+    except ImportError:
+        pass
+
+    return models
+
+
+def ensemble_predict(models, X, feat_cols, n_class=3):
+    preds = []
+    for name, m in models.items():
+        if name == "xgb":
+            import xgboost as xgb
+            dm = xgb.DMatrix(X, feature_names=feat_cols)
+            p = m.predict(dm)
+            if n_class == 2 and p.ndim == 1:
+                p = np.column_stack([1-p, p])
+        elif name == "lgb":
+            p = m.predict(X)
+            if n_class == 2 and p.ndim == 1:
+                p = np.column_stack([1-p, p])
+        elif name == "cb":
+            p = m.predict_proba(X)
+        preds.append(np.array(p))
+    if not preds:
+        raise RuntimeError("No models!")
+    return np.mean(preds, axis=0)
+
+
+# ═══════════════════════════════════════════════════════════════════
+#  STAGE B: Walk-Forward Backtest with Kelly
+# ═══════════════════════════════════════════════════════════════════
+def kelly_fraction(model_prob, odds, fraction=0.25):
+    """Fractional Kelly: f = fraction * (p*odds - 1) / (odds - 1)"""
+    edge = model_prob * odds - 1
+    if edge <= 0 or odds <= 1:
+        return 0.0
+    f = edge / (odds - 1)
+    return max(0, min(fraction * f, 0.10))  # cap at 10% bankroll
+
+
+def backtest_value(models, df_test, feat_cols, market="ms",
+                   min_edge=0.05, min_odds=1.40, max_odds=4.50,
+                   use_kelly=True):
+    """Realistic backtest: flat or Kelly sizing, edge filtering."""
+    X = df_test[feat_cols].values
+    n_class = 3 if market == "ms" else 2
+    probs = ensemble_predict(models, X, feat_cols, n_class)
+
+    if market == "ms":
+        y = df_test["label_ms"].values
+        odds_arr = df_test[["odds_ms_h","odds_ms_d","odds_ms_a"]].values
+        implied = df_test[["implied_h","implied_d","implied_a"]].values
+        class_names = ["Home","Draw","Away"]
+    elif market == "ou25":
+        if "label_ou25" not in df_test.columns:
+            return {}
+        y = df_test["label_ou25"].values
+        o_over = pd.to_numeric(df_test["odds_ou25_o"], errors="coerce").fillna(1.85).values if "odds_ou25_o" in df_test.columns else np.full(len(df_test), 1.85)
+        o_under = pd.to_numeric(df_test["odds_ou25_u"], errors="coerce").fillna(1.85).values if "odds_ou25_u" in df_test.columns else np.full(len(df_test), 1.85)
+        odds_arr = np.column_stack([o_under, o_over])
+        m = 1/odds_arr
+        implied = m / m.sum(axis=1, keepdims=True)
+        class_names = ["Under","Over"]
+    else:
+        return {}
+
+    results = {"bets": [], "total": 0, "wins": 0, "pnl": 0.0, "bankroll_curve": [1000.0]}
+    bankroll = 1000.0
+
+    for i in range(len(y)):
+        for cls in range(n_class):
+            edge = probs[i, cls] - implied[i, cls]
+            odds_val = odds_arr[i, cls]
+
+            # FILTERS
+            if edge < min_edge:
+                continue
+            if odds_val < min_odds or odds_val > max_odds:
+                continue
+            # Don't bet on heavy favorites with tiny edge
+            if implied[i, cls] > 0.65 and edge < 0.08:
+                continue
+
+            # Sizing
+            if use_kelly:
+                frac = kelly_fraction(probs[i, cls], odds_val, fraction=0.15)
+                stake = bankroll * frac
+            else:
+                stake = 10.0  # flat
+
+            if stake < 1:
+                continue
+
+            won = (y[i] == cls)
+            pnl = stake * (odds_val - 1) if won else -stake
+            bankroll += pnl
+
+            results["bets"].append({
+                "edge": float(edge), "odds": float(odds_val),
+                "model_p": float(probs[i,cls]), "implied_p": float(implied[i,cls]),
+                "won": bool(won), "pnl": float(pnl), "stake": float(stake),
+                "class": class_names[cls],
+            })
+            results["bankroll_curve"].append(bankroll)
+            results["total"] += 1
+            if won:
+                results["wins"] += 1
+            results["pnl"] = bankroll - 1000.0
+
+    return results
+
+
+def print_backtest(results, label=""):
+    total = results.get("total", 0)
+    if total == 0:
+        print(f"  {label}: No bets placed")
+        return
+    wins = results["wins"]
+    pnl = results["pnl"]
+    hit = wins/total*100
+    roi = pnl / sum(b["stake"] for b in results["bets"]) * 100
+    curve = results["bankroll_curve"]
+    peak = max(curve)
+    dd = min((c - peak) / peak * 100 for c in curve if c <= peak) if len(curve) > 1 else 0
+
+    # Per-class breakdown
+    by_class = {}
+    for b in results["bets"]:
+        cls = b["class"]
+        if cls not in by_class:
+            by_class[cls] = {"n": 0, "w": 0, "pnl": 0}
+        by_class[cls]["n"] += 1
+        if b["won"]:
+            by_class[cls]["w"] += 1
+        by_class[cls]["pnl"] += b["pnl"]
+
+    print(f"\n  {label}")
+    print(f"    Bets: {total}  |  Hit: {hit:.1f}%  |  ROI: {roi:+.1f}%")
+    print(f"    PnL: {pnl:+.0f}  |  Final: {curve[-1]:.0f}  |  MaxDD: {dd:.1f}%")
+    for cls, d in sorted(by_class.items()):
+        r = d["pnl"]/d["n"]*100 if d["n"] > 0 else 0
+        print(f"      {cls:6s}: {d['n']:4d} bets, "
+              f"hit={d['w']/d['n']*100:.1f}%, avg_pnl={r:+.1f}%")
+
+
+# ═══════════════════════════════════════════════════════════════════
+#  MAIN
+# ═══════════════════════════════════════════════════════════════════
+def main():
+    print("=" * 65)
+    print("  V27 VALUE SNIPER — PRO TRAINING (Odds-Free Fundamentals)")
+    print("=" * 65)
+    t0 = time.time()
+
+    df = load_data()
+    clean_feats = get_clean_features(df)
+    print(f"  Clean features (no odds): {len(clean_feats)}")
+
+    # Numerify
+    for c in clean_feats:
+        df[c] = pd.to_numeric(df[c], errors="coerce")
+    df[clean_feats] = df[clean_feats].fillna(df[clean_feats].median())
+
+    # Remove constant columns
+    clean_feats = [c for c in clean_feats if df[c].nunique() > 1]
+    print(f"  After removing constants: {len(clean_feats)}")
+
+    # Split
+    tr, va, te = temporal_split(df)
+    print(f"  Train: {len(tr)}, Val: {len(va)}, Test: {len(te)}")
+    print(f"  Target: H={tr.label_ms.eq(0).mean():.1%}, "
+          f"D={tr.label_ms.eq(1).mean():.1%}, A={tr.label_ms.eq(2).mean():.1%}")
+
+    X_tr = tr[clean_feats].values
+    y_tr = tr["label_ms"].values
+    X_va = va[clean_feats].values
+    y_va = va["label_ms"].values
+
+    # ── STAGE A: Train fundamentals model (1X2) ──
+    print("\n" + "─"*65)
+    print("  STAGE A: Fundamentals-Only 1X2 Model")
+    print("─"*65)
+    ms_models = train_fundamentals_model(X_tr, y_tr, X_va, y_va, clean_feats, "ms")
+
+    val_probs = ensemble_predict(ms_models, X_va, clean_feats, 3)
+    val_acc = accuracy_score(y_va, val_probs.argmax(1))
+    val_ll = log_loss(y_va, val_probs)
+    print(f"\n  Ensemble Val: acc={val_acc:.4f}, logloss={val_ll:.4f}")
+
+    # Compare with odds baseline
+    odds_pred = va[["implied_h","implied_d","implied_a"]].values.argmax(1)
+    odds_acc = accuracy_score(y_va, odds_pred)
+    print(f"  Odds baseline: acc={odds_acc:.4f}")
+    print(f"  Model vs Odds: {val_acc - odds_acc:+.4f}")
+
+    # ── STAGE B: O/U 2.5 Model ──
+    ou_models = None
+    if "label_ou25" in tr.columns:
+        print("\n" + "─"*65)
+        print("  STAGE A.2: Fundamentals-Only O/U 2.5 Model")
+        print("─"*65)
+        y_tr_ou = tr["label_ou25"].values
+        y_va_ou = va["label_ou25"].values
+        mask_tr = ~np.isnan(y_tr_ou)
+        mask_va = ~np.isnan(y_va_ou)
+        if mask_tr.sum() > 1000:
+            ou_models = train_fundamentals_model(
+                X_tr[mask_tr], y_tr_ou[mask_tr].astype(int),
+                X_va[mask_va], y_va_ou[mask_va].astype(int),
+                clean_feats, "ou25")
+
+    # ── STAGE C: Backtest ──
+    print("\n" + "─"*65)
+    print("  STAGE B: Walk-Forward Backtest (Test Set)")
+    print("─"*65)
+
+    # Try multiple edge thresholds
+    best_roi = -999
+    best_cfg = {}
+    for min_edge in [0.03, 0.05, 0.07, 0.10, 0.12, 0.15]:
+        for min_odds in [1.35, 1.50, 1.70]:
+            r = backtest_value(ms_models, te, clean_feats, "ms",
+                               min_edge=min_edge, min_odds=min_odds,
+                               max_odds=5.0, use_kelly=True)
+            if r.get("total", 0) >= 20:
+                invested = sum(b["stake"] for b in r["bets"])
+                roi = r["pnl"] / invested * 100 if invested > 0 else -100
+                if roi > best_roi:
+                    best_roi = roi
+                    best_cfg = {"edge": min_edge, "min_odds": min_odds, "result": r}
+
+    if best_cfg:
+        cfg = best_cfg
+        print(f"\n  Best 1X2 Config: edge>{cfg['edge']}, odds>{cfg['min_odds']}")
+        print_backtest(cfg["result"], "1X2 VALUE")
+
+    # Flat bet comparison
+    print("\n  --- Flat Bet Comparison ---")
+    for edge in [0.05, 0.07, 0.10]:
+        r = backtest_value(ms_models, te, clean_feats, "ms",
+                           min_edge=edge, min_odds=1.50, max_odds=4.5,
+                           use_kelly=False)
+        if r.get("total", 0) > 0:
+            inv = r["total"] * 10
+            roi = r["pnl"]/inv*100
+            print(f"    Edge>{edge:.2f}: {r['total']} bets, "
+                  f"hit={r['wins']/r['total']*100:.1f}%, ROI={roi:+.1f}%")
+
+    # OU25 backtest
+    if ou_models:
+        print("\n  --- O/U 2.5 Backtest ---")
+        for edge in [0.05, 0.07, 0.10]:
+            r = backtest_value(ou_models, te, clean_feats, "ou25",
+                               min_edge=edge, min_odds=1.50, max_odds=3.0,
+                               use_kelly=True)
+            if r.get("total", 0) > 0:
+                print_backtest(r, f"OU25 edge>{edge}")
+
+    # ── Feature importance ──
+    if "lgb" in ms_models:
+        imp = ms_models["lgb"].feature_importance(importance_type="gain")
+        imp_df = pd.DataFrame({"feature": clean_feats, "importance": imp}
+                              ).sort_values("importance", ascending=False)
+        print("\n  TOP 15 FEATURES (no odds!):")
+        for _, r in imp_df.head(15).iterrows():
+            print(f"    {r['feature']:40s} {r['importance']:.0f}")
+        imp_df.to_csv(MODELS_DIR / "v27_feature_importance.csv", index=False)
+
+    # ── Save ──
+    print("\n" + "─"*65)
+    print("  SAVING MODELS")
+    print("─"*65)
+    for name, m in ms_models.items():
+        p = MODELS_DIR / f"v27_ms_{name}.pkl"
+        with open(p, "wb") as f:
+            pickle.dump(m, f)
+        print(f"  ✓ {p.name}")
+
+    if ou_models:
+        for name, m in ou_models.items():
+            p = MODELS_DIR / f"v27_ou25_{name}.pkl"
+            with open(p, "wb") as f:
+                pickle.dump(m, f)
+            print(f"  ✓ {p.name}")
+
+    meta = {
+        "version": "v27-pro", "trained_at": time.strftime("%Y-%m-%d %H:%M:%S"),
+        "approach": "odds-free fundamentals + value edge detection",
+        "feature_count": len(clean_feats),
+        "total_samples": len(df),
+        "val_acc": round(val_acc, 4), "val_ll": round(val_ll, 4),
+        "best_config": {k: v for k, v in best_cfg.items() if k != "result"} if best_cfg else {},
+        "markets": ["ms"] + (["ou25"] if ou_models else []),
+    }
+    with open(MODELS_DIR / "v27_metadata.json", "w") as f:
+        json.dump(meta, f, indent=2, default=str)
+    with open(MODELS_DIR / "v27_feature_cols.json", "w") as f:
+        json.dump(clean_feats, f, indent=2)
+    print(f"  ✓ metadata + feature_cols")
+
+    print(f"\n  Total time: {(time.time()-t0)/60:.1f} min")
+    print("  DONE!")
+
+
+if __name__ == "__main__":
+    main()