This commit is contained in:
@@ -0,0 +1,216 @@
|
||||
"""
|
||||
VQWEN v3 Stress Test (Time Series Validation)
|
||||
=============================================
|
||||
Trains on OLDER data, Tests on NEWER data (Simulating Real Future).
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import time
|
||||
import pickle
|
||||
import psycopg2
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import lightgbm as lgb
|
||||
|
||||
AI_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
ROOT_DIR = os.path.dirname(AI_DIR)
|
||||
sys.path.insert(0, ROOT_DIR)
|
||||
|
||||
def get_clean_dsn() -> str:
|
||||
return "postgresql://suggestbet:SuGGesT2026SecuRe@localhost:15432/boilerplate_db"
|
||||
|
||||
def run_stress_test():
|
||||
print("🧪 VQWEN v3 STRESS TEST (Time-Series Validation)")
|
||||
print("="*60)
|
||||
|
||||
dsn = get_clean_dsn()
|
||||
conn = psycopg2.connect(dsn)
|
||||
cur = conn.cursor()
|
||||
|
||||
# ─── 1. VERİ ÇEKME (En yeniden eskiye doğru) ───
|
||||
# İlk baştakiler en yeni maçlar (Test Set), sonrakiler eski maçlar (Train Set)
|
||||
query = """
|
||||
WITH match_data AS (
|
||||
SELECT
|
||||
m.id, m.home_team_id, m.away_team_id, m.score_home, m.score_away, m.mst_utc,
|
||||
COALESCE(maf.home_elo, 1500) as home_elo,
|
||||
COALESCE(maf.away_elo, 1500) as away_elo,
|
||||
-- Contextual Goals
|
||||
COALESCE((SELECT AVG(m2.score_home) FROM matches m2 WHERE m2.home_team_id = m.home_team_id AND m2.status = 'FT' AND m2.mst_utc < m.mst_utc), 1.2) as h_home_goals,
|
||||
COALESCE((SELECT AVG(m2.score_away) FROM matches m2 WHERE m2.away_team_id = m.away_team_id AND m2.status = 'FT' AND m2.mst_utc < m.mst_utc), 1.2) as a_away_goals,
|
||||
-- Rest Days
|
||||
COALESCE(EXTRACT(EPOCH FROM (to_timestamp(m.mst_utc/1000) - (SELECT MAX(to_timestamp(m2.mst_utc/1000)) FROM matches m2 WHERE m2.home_team_id = m.home_team_id AND m2.status = 'FT' AND m2.mst_utc < m.mst_utc)) / 86400), 7) as h_rest,
|
||||
COALESCE(EXTRACT(EPOCH FROM (to_timestamp(m.mst_utc/1000) - (SELECT MAX(to_timestamp(m2.mst_utc/1000)) FROM matches m2 WHERE m2.away_team_id = m.away_team_id AND m2.status = 'FT' AND m2.mst_utc < m.mst_utc)) / 86400), 7) as a_rest,
|
||||
-- Squad
|
||||
COALESCE((SELECT COUNT(*) FROM match_player_participation mp WHERE mp.match_id = m.id AND mp.team_id = m.home_team_id AND mp.is_starting = true), 11) as h_xi,
|
||||
COALESCE((SELECT COUNT(*) FROM match_player_participation mp WHERE mp.match_id = m.id AND mp.team_id = m.away_team_id AND mp.is_starting = true), 11) as a_xi,
|
||||
-- Odds
|
||||
(SELECT os.odd_value FROM odd_categories oc JOIN odd_selections os ON os.odd_category_db_id = oc.db_id WHERE oc.match_id = m.id AND oc.name ILIKE 'Maç Sonucu' AND os.name = '1' LIMIT 1) as oh,
|
||||
(SELECT os.odd_value FROM odd_categories oc JOIN odd_selections os ON os.odd_category_db_id = oc.db_id WHERE oc.match_id = m.id AND oc.name ILIKE 'Maç Sonucu' AND os.name = 'X' LIMIT 1) as od,
|
||||
(SELECT os.odd_value FROM odd_categories oc JOIN odd_selections os ON os.odd_category_db_id = oc.db_id WHERE oc.match_id = m.id AND oc.name ILIKE 'Maç Sonucu' AND os.name = '2' LIMIT 1) as oa
|
||||
FROM matches m
|
||||
LEFT JOIN football_ai_features maf ON maf.match_id = m.id
|
||||
WHERE m.status = 'FT' AND m.score_home IS NOT NULL AND m.sport = 'football'
|
||||
AND EXISTS (SELECT 1 FROM odd_categories oc WHERE oc.match_id = m.id)
|
||||
ORDER BY m.mst_utc DESC
|
||||
LIMIT 150000
|
||||
)
|
||||
SELECT
|
||||
md.*,
|
||||
-- H2H Win Rate for Home Team
|
||||
COALESCE((
|
||||
SELECT COUNT(*) FILTER (WHERE m2.score_home > m2.score_away)::float / NULLIF(COUNT(*), 0)
|
||||
FROM matches m2
|
||||
WHERE m2.home_team_id = md.home_team_id AND m2.away_team_id = md.away_team_id AND m2.status = 'FT' AND m2.mst_utc < md.mst_utc
|
||||
), 0.5) as h2h_h_win_rate,
|
||||
|
||||
-- Form Points (Last 5)
|
||||
COALESCE((SELECT SUM(pts) FROM (SELECT CASE WHEN m2.score_home > m2.score_away THEN 3 WHEN m2.score_home = m2.score_away THEN 1 ELSE 0 END as pts FROM matches m2 WHERE m2.home_team_id = md.home_team_id AND m2.status = 'FT' AND m2.mst_utc < md.mst_utc ORDER BY m2.mst_utc DESC LIMIT 5) sub), 0) as h_form_pts,
|
||||
COALESCE((SELECT SUM(pts) FROM (SELECT CASE WHEN m2.score_away > m2.score_home THEN 3 WHEN m2.score_away = m2.score_home THEN 1 ELSE 0 END as pts FROM matches m2 WHERE m2.away_team_id = md.away_team_id AND m2.status = 'FT' AND m2.mst_utc < md.mst_utc ORDER BY m2.mst_utc DESC LIMIT 5) sub), 0) as a_form_pts
|
||||
|
||||
FROM match_data md
|
||||
"""
|
||||
|
||||
print("📊 Veri çekiliyor (Time-Series)...")
|
||||
start = time.time()
|
||||
cur.execute(query)
|
||||
rows = cur.fetchall()
|
||||
print(f"✅ {len(rows)} maç çekildi ({time.time()-start:.1f}s)")
|
||||
|
||||
df = pd.DataFrame(rows, columns=[
|
||||
'id', 'h_id', 'a_id', 'sh', 'sa', 'utc', 'h_elo', 'a_elo',
|
||||
'h_home_goals', 'a_away_goals', 'h_rest', 'a_rest', 'h_xi', 'a_xi',
|
||||
'oh', 'od', 'oa',
|
||||
'h2h_h_wr', 'h_form_pts', 'a_form_pts'
|
||||
])
|
||||
|
||||
# Temizlik
|
||||
for col in df.columns[2:]:
|
||||
df[col] = pd.to_numeric(df[col], errors='coerce')
|
||||
df = df.fillna(df.median(numeric_only=True))
|
||||
df = df[(df['oh'] > 1.0) & (df['oa'] > 1.0)]
|
||||
|
||||
# Özellikler
|
||||
df['elo_diff'] = df['h_elo'] - df['a_elo']
|
||||
|
||||
def fatigue(rest):
|
||||
if rest < 3: return 0.85
|
||||
if rest < 5: return 0.95
|
||||
return 1.0
|
||||
df['h_fat'] = df['h_rest'].apply(fatigue)
|
||||
df['a_fat'] = df['a_rest'].apply(fatigue)
|
||||
|
||||
df['h_xg'] = df['h_home_goals'] * df['h_fat']
|
||||
df['a_xg'] = df['a_away_goals'] * df['a_fat']
|
||||
df['total_xg'] = df['h_xg'] + df['a_xg']
|
||||
df['rest_diff'] = df['h_rest'] - df['a_rest']
|
||||
df['pow_diff'] = (df['h_elo']/100)*df['h_fat'] - (df['a_elo']/100)*df['a_fat']
|
||||
df['form_diff'] = df['h_form_pts'] - df['a_form_pts']
|
||||
|
||||
margin = (1/df['oh']) + (1/df['od']) + (1/df['oa'])
|
||||
df['imp_h'] = (1/df['oh']) / margin
|
||||
df['imp_d'] = (1/df['od']) / margin
|
||||
df['imp_a'] = (1/df['oa']) / margin
|
||||
|
||||
df['t_ms'] = df.apply(lambda r: 0 if r['sh']>r['sa'] else (2 if r['sh']<r['sa'] else 1), axis=1)
|
||||
df['t_ou'] = ((df['sh'] + df['sa']) > 2.5).astype(int)
|
||||
df['t_btts'] = ((df['sh'] > 0) & (df['sa'] > 0)).astype(int)
|
||||
|
||||
feats = ['elo_diff', 'h_xg', 'a_xg', 'total_xg', 'pow_diff', 'rest_diff',
|
||||
'h_fat', 'a_fat', 'imp_h', 'imp_d', 'imp_a',
|
||||
'h_xi', 'a_xi', 'h2h_h_wr', 'form_diff']
|
||||
|
||||
# ─── 2. ZAMAN BAZLI BÖLME (Time-Series Split) ───
|
||||
# DataFrame zaten en yeniden eskiye (DESC) sıralı.
|
||||
# İlk %30'luk kısım (en yeniler) TEST SET olacak.
|
||||
# Geri kalan %70 (daha eskiler) TRAIN SET olacak.
|
||||
|
||||
split_point = int(len(df) * 0.30)
|
||||
|
||||
# Test Set: En yeni maçlar (Model bunları "Gelecek" olarak görecek)
|
||||
test_set = df.iloc[:split_point].copy()
|
||||
# Train Set: Daha eski maçlar (Model bunlardan "Öğrenecek")
|
||||
train_set = df.iloc[split_point:].copy()
|
||||
|
||||
print(f"\n📅 SPLIT INFO:")
|
||||
print(f" Train Set (Eski): {len(train_set)} maç")
|
||||
print(f" Test Set (YENİ/GELECEK): {len(test_set)} maç")
|
||||
|
||||
if len(train_set) < 1000:
|
||||
print("❌ Yetersiz eğitim verisi.")
|
||||
return
|
||||
|
||||
# ─── 3. EĞİTİM (Sadece Geçmişle) ───
|
||||
print("\n🤖 Geçmiş verilerle model eğitiliyor...")
|
||||
model_ms = lgb.train({'objective': 'multiclass', 'num_class': 3, 'verbose': -1, 'num_leaves': 63},
|
||||
lgb.Dataset(train_set[feats], train_set['t_ms']), num_boost_round=500)
|
||||
|
||||
model_ou = lgb.train({'objective': 'binary', 'verbose': -1},
|
||||
lgb.Dataset(train_set[feats], train_set['t_ou']), num_boost_round=500)
|
||||
|
||||
model_btts = lgb.train({'objective': 'binary', 'verbose': -1},
|
||||
lgb.Dataset(train_set[feats], train_set['t_btts']), num_boost_round=500)
|
||||
print("✅ Model eğitimi tamamlandı. Şimdi Gelecek (Test Set) tahmin ediliyor...")
|
||||
|
||||
# ─── 4. TEST (Geleceği Tahmin) ───
|
||||
# Value Betting Stratejisi
|
||||
results = {'ms': {'bet': 0, 'won': 0, 'profit': 0}, 'ou25': {'bet': 0, 'won': 0, 'profit': 0}, 'btts': {'bet': 0, 'won': 0, 'profit': 0}}
|
||||
|
||||
for idx, row in test_set.iterrows():
|
||||
oh = row['oh']
|
||||
od = row['od']
|
||||
oa = row['oa']
|
||||
|
||||
f = pd.DataFrame([row[feats]])
|
||||
|
||||
# MS Tahminleri
|
||||
ms_probs = model_ms.predict(f)[0]
|
||||
for pick, prob, odd in zip(['1', 'X', '2'], ms_probs, [oh, od, oa]):
|
||||
if odd <= 1.0: continue
|
||||
edge = prob - (1/odd)
|
||||
# Value Check: Modelin olasılığı piyasa olasılığından %5 yüksekse oyna
|
||||
if edge > 0.05 and prob > 0.45:
|
||||
results['ms']['bet'] += 1
|
||||
h, a = row['sh'], row['sa']
|
||||
w = (pick=='1' and h>a) or (pick=='X' and h==a) or (pick=='2' and a>h)
|
||||
if w: results['ms']['won'] += 1; results['ms']['profit'] += (odd - 1.0)
|
||||
else: results['ms']['profit'] -= 1.0
|
||||
break
|
||||
|
||||
# OU2.5
|
||||
p_over = float(model_ou.predict(f)[0])
|
||||
if p_over > 0.55: # Threshold
|
||||
results['ou25']['bet'] += 1
|
||||
if (row['sh'] + row['sa']) > 2.5: results['ou25']['won'] += 1; results['ou25']['profit'] += 0.85
|
||||
else: results['ou25']['profit'] -= 1.0
|
||||
|
||||
# BTTS
|
||||
p_btts = float(model_btts.predict(f)[0])
|
||||
if p_btts > 0.55:
|
||||
results['btts']['bet'] += 1
|
||||
if row['sh'] > 0 and row['sa'] > 0: results['btts']['won'] += 1; results['btts']['profit'] += 0.85
|
||||
else: results['btts']['profit'] -= 1.0
|
||||
|
||||
# ─── 5. SONUÇLAR ───
|
||||
print("\n" + "="*60)
|
||||
print("📊 STRESS TEST SONUÇLARI (GELECEK TAHMİNİ)")
|
||||
print("="*60)
|
||||
for mkt in ['ms', 'ou25', 'btts']:
|
||||
r = results[mkt]
|
||||
wr = (r['won'] / r['bet'] * 100) if r['bet'] > 0 else 0
|
||||
print(f"{mkt.upper():<10} Oyn: {r['bet']:<5} Kaz: {r['won']:<5} WR: {wr:.1f}% Kâr: {r['profit']:+.2f}")
|
||||
|
||||
total = sum(r['profit'] for r in results.values())
|
||||
print(f"\n💰 TOPLAM GELECEK KÂRI: {total:+.2f} Units")
|
||||
if total > 0:
|
||||
print("🟢 MODEL GÜVENİLİR! (Geleceği öngörebiliyor)")
|
||||
else:
|
||||
print("🔴 MODEL ZAYIF! (Sadece ezber yapmış olabilir)")
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_stress_test()
|
||||
Reference in New Issue
Block a user