""" VQWEN v3 Stress Test (Time Series Validation) ============================================= Trains on OLDER data, Tests on NEWER data (Simulating Real Future). """ import os import sys import json import time import pickle import psycopg2 import pandas as pd import numpy as np import lightgbm as lgb AI_DIR = os.path.dirname(os.path.abspath(__file__)) ROOT_DIR = os.path.dirname(AI_DIR) sys.path.insert(0, ROOT_DIR) def get_clean_dsn() -> str: return "postgresql://suggestbet:SuGGesT2026SecuRe@localhost:15432/boilerplate_db" def run_stress_test(): print("🧪 VQWEN v3 STRESS TEST (Time-Series Validation)") print("="*60) dsn = get_clean_dsn() conn = psycopg2.connect(dsn) cur = conn.cursor() # ─── 1. VERİ ÇEKME (En yeniden eskiye doğru) ─── # İlk baştakiler en yeni maçlar (Test Set), sonrakiler eski maçlar (Train Set) query = """ WITH match_data AS ( SELECT m.id, m.home_team_id, m.away_team_id, m.score_home, m.score_away, m.mst_utc, COALESCE(maf.home_elo, 1500) as home_elo, COALESCE(maf.away_elo, 1500) as away_elo, -- Contextual Goals COALESCE((SELECT AVG(m2.score_home) FROM matches m2 WHERE m2.home_team_id = m.home_team_id AND m2.status = 'FT' AND m2.mst_utc < m.mst_utc), 1.2) as h_home_goals, COALESCE((SELECT AVG(m2.score_away) FROM matches m2 WHERE m2.away_team_id = m.away_team_id AND m2.status = 'FT' AND m2.mst_utc < m.mst_utc), 1.2) as a_away_goals, -- Rest Days COALESCE(EXTRACT(EPOCH FROM (to_timestamp(m.mst_utc/1000) - (SELECT MAX(to_timestamp(m2.mst_utc/1000)) FROM matches m2 WHERE m2.home_team_id = m.home_team_id AND m2.status = 'FT' AND m2.mst_utc < m.mst_utc)) / 86400), 7) as h_rest, COALESCE(EXTRACT(EPOCH FROM (to_timestamp(m.mst_utc/1000) - (SELECT MAX(to_timestamp(m2.mst_utc/1000)) FROM matches m2 WHERE m2.away_team_id = m.away_team_id AND m2.status = 'FT' AND m2.mst_utc < m.mst_utc)) / 86400), 7) as a_rest, -- Squad COALESCE((SELECT COUNT(*) FROM match_player_participation mp WHERE mp.match_id = m.id AND mp.team_id = m.home_team_id AND mp.is_starting = true), 11) as h_xi, COALESCE((SELECT COUNT(*) FROM match_player_participation mp WHERE mp.match_id = m.id AND mp.team_id = m.away_team_id AND mp.is_starting = true), 11) as a_xi, -- Odds (SELECT os.odd_value FROM odd_categories oc JOIN odd_selections os ON os.odd_category_db_id = oc.db_id WHERE oc.match_id = m.id AND oc.name ILIKE 'Maç Sonucu' AND os.name = '1' LIMIT 1) as oh, (SELECT os.odd_value FROM odd_categories oc JOIN odd_selections os ON os.odd_category_db_id = oc.db_id WHERE oc.match_id = m.id AND oc.name ILIKE 'Maç Sonucu' AND os.name = 'X' LIMIT 1) as od, (SELECT os.odd_value FROM odd_categories oc JOIN odd_selections os ON os.odd_category_db_id = oc.db_id WHERE oc.match_id = m.id AND oc.name ILIKE 'Maç Sonucu' AND os.name = '2' LIMIT 1) as oa FROM matches m LEFT JOIN football_ai_features maf ON maf.match_id = m.id WHERE m.status = 'FT' AND m.score_home IS NOT NULL AND m.sport = 'football' AND EXISTS (SELECT 1 FROM odd_categories oc WHERE oc.match_id = m.id) ORDER BY m.mst_utc DESC LIMIT 150000 ) SELECT md.*, -- H2H Win Rate for Home Team COALESCE(( SELECT COUNT(*) FILTER (WHERE m2.score_home > m2.score_away)::float / NULLIF(COUNT(*), 0) FROM matches m2 WHERE m2.home_team_id = md.home_team_id AND m2.away_team_id = md.away_team_id AND m2.status = 'FT' AND m2.mst_utc < md.mst_utc ), 0.5) as h2h_h_win_rate, -- Form Points (Last 5) COALESCE((SELECT SUM(pts) FROM (SELECT CASE WHEN m2.score_home > m2.score_away THEN 3 WHEN m2.score_home = m2.score_away THEN 1 ELSE 0 END as pts FROM matches m2 WHERE m2.home_team_id = md.home_team_id AND m2.status = 'FT' AND m2.mst_utc < md.mst_utc ORDER BY m2.mst_utc DESC LIMIT 5) sub), 0) as h_form_pts, COALESCE((SELECT SUM(pts) FROM (SELECT CASE WHEN m2.score_away > m2.score_home THEN 3 WHEN m2.score_away = m2.score_home THEN 1 ELSE 0 END as pts FROM matches m2 WHERE m2.away_team_id = md.away_team_id AND m2.status = 'FT' AND m2.mst_utc < md.mst_utc ORDER BY m2.mst_utc DESC LIMIT 5) sub), 0) as a_form_pts FROM match_data md """ print("📊 Veri çekiliyor (Time-Series)...") start = time.time() cur.execute(query) rows = cur.fetchall() print(f"✅ {len(rows)} maç çekildi ({time.time()-start:.1f}s)") df = pd.DataFrame(rows, columns=[ 'id', 'h_id', 'a_id', 'sh', 'sa', 'utc', 'h_elo', 'a_elo', 'h_home_goals', 'a_away_goals', 'h_rest', 'a_rest', 'h_xi', 'a_xi', 'oh', 'od', 'oa', 'h2h_h_wr', 'h_form_pts', 'a_form_pts' ]) # Temizlik for col in df.columns[2:]: df[col] = pd.to_numeric(df[col], errors='coerce') df = df.fillna(df.median(numeric_only=True)) df = df[(df['oh'] > 1.0) & (df['oa'] > 1.0)] # Özellikler df['elo_diff'] = df['h_elo'] - df['a_elo'] def fatigue(rest): if rest < 3: return 0.85 if rest < 5: return 0.95 return 1.0 df['h_fat'] = df['h_rest'].apply(fatigue) df['a_fat'] = df['a_rest'].apply(fatigue) df['h_xg'] = df['h_home_goals'] * df['h_fat'] df['a_xg'] = df['a_away_goals'] * df['a_fat'] df['total_xg'] = df['h_xg'] + df['a_xg'] df['rest_diff'] = df['h_rest'] - df['a_rest'] df['pow_diff'] = (df['h_elo']/100)*df['h_fat'] - (df['a_elo']/100)*df['a_fat'] df['form_diff'] = df['h_form_pts'] - df['a_form_pts'] margin = (1/df['oh']) + (1/df['od']) + (1/df['oa']) df['imp_h'] = (1/df['oh']) / margin df['imp_d'] = (1/df['od']) / margin df['imp_a'] = (1/df['oa']) / margin df['t_ms'] = df.apply(lambda r: 0 if r['sh']>r['sa'] else (2 if r['sh'] 2.5).astype(int) df['t_btts'] = ((df['sh'] > 0) & (df['sa'] > 0)).astype(int) feats = ['elo_diff', 'h_xg', 'a_xg', 'total_xg', 'pow_diff', 'rest_diff', 'h_fat', 'a_fat', 'imp_h', 'imp_d', 'imp_a', 'h_xi', 'a_xi', 'h2h_h_wr', 'form_diff'] # ─── 2. ZAMAN BAZLI BÖLME (Time-Series Split) ─── # DataFrame zaten en yeniden eskiye (DESC) sıralı. # İlk %30'luk kısım (en yeniler) TEST SET olacak. # Geri kalan %70 (daha eskiler) TRAIN SET olacak. split_point = int(len(df) * 0.30) # Test Set: En yeni maçlar (Model bunları "Gelecek" olarak görecek) test_set = df.iloc[:split_point].copy() # Train Set: Daha eski maçlar (Model bunlardan "Öğrenecek") train_set = df.iloc[split_point:].copy() print(f"\n📅 SPLIT INFO:") print(f" Train Set (Eski): {len(train_set)} maç") print(f" Test Set (YENİ/GELECEK): {len(test_set)} maç") if len(train_set) < 1000: print("❌ Yetersiz eğitim verisi.") return # ─── 3. EĞİTİM (Sadece Geçmişle) ─── print("\n🤖 Geçmiş verilerle model eğitiliyor...") model_ms = lgb.train({'objective': 'multiclass', 'num_class': 3, 'verbose': -1, 'num_leaves': 63}, lgb.Dataset(train_set[feats], train_set['t_ms']), num_boost_round=500) model_ou = lgb.train({'objective': 'binary', 'verbose': -1}, lgb.Dataset(train_set[feats], train_set['t_ou']), num_boost_round=500) model_btts = lgb.train({'objective': 'binary', 'verbose': -1}, lgb.Dataset(train_set[feats], train_set['t_btts']), num_boost_round=500) print("✅ Model eğitimi tamamlandı. Şimdi Gelecek (Test Set) tahmin ediliyor...") # ─── 4. TEST (Geleceği Tahmin) ─── # Value Betting Stratejisi results = {'ms': {'bet': 0, 'won': 0, 'profit': 0}, 'ou25': {'bet': 0, 'won': 0, 'profit': 0}, 'btts': {'bet': 0, 'won': 0, 'profit': 0}} for idx, row in test_set.iterrows(): oh = row['oh'] od = row['od'] oa = row['oa'] f = pd.DataFrame([row[feats]]) # MS Tahminleri ms_probs = model_ms.predict(f)[0] for pick, prob, odd in zip(['1', 'X', '2'], ms_probs, [oh, od, oa]): if odd <= 1.0: continue edge = prob - (1/odd) # Value Check: Modelin olasılığı piyasa olasılığından %5 yüksekse oyna if edge > 0.05 and prob > 0.45: results['ms']['bet'] += 1 h, a = row['sh'], row['sa'] w = (pick=='1' and h>a) or (pick=='X' and h==a) or (pick=='2' and a>h) if w: results['ms']['won'] += 1; results['ms']['profit'] += (odd - 1.0) else: results['ms']['profit'] -= 1.0 break # OU2.5 p_over = float(model_ou.predict(f)[0]) if p_over > 0.55: # Threshold results['ou25']['bet'] += 1 if (row['sh'] + row['sa']) > 2.5: results['ou25']['won'] += 1; results['ou25']['profit'] += 0.85 else: results['ou25']['profit'] -= 1.0 # BTTS p_btts = float(model_btts.predict(f)[0]) if p_btts > 0.55: results['btts']['bet'] += 1 if row['sh'] > 0 and row['sa'] > 0: results['btts']['won'] += 1; results['btts']['profit'] += 0.85 else: results['btts']['profit'] -= 1.0 # ─── 5. SONUÇLAR ─── print("\n" + "="*60) print("📊 STRESS TEST SONUÇLARI (GELECEK TAHMİNİ)") print("="*60) for mkt in ['ms', 'ou25', 'btts']: r = results[mkt] wr = (r['won'] / r['bet'] * 100) if r['bet'] > 0 else 0 print(f"{mkt.upper():<10} Oyn: {r['bet']:<5} Kaz: {r['won']:<5} WR: {wr:.1f}% Kâr: {r['profit']:+.2f}") total = sum(r['profit'] for r in results.values()) print(f"\n💰 TOPLAM GELECEK KÂRI: {total:+.2f} Units") if total > 0: print("🟢 MODEL GÜVENİLİR! (Geleceği öngörebiliyor)") else: print("🔴 MODEL ZAYIF! (Sadece ezber yapmış olabilir)") cur.close() conn.close() if __name__ == "__main__": run_stress_test()