Files
fahricansecer 2f0b85a0c7
Deploy Iddaai Backend / build-and-deploy (push) Failing after 18s
first (part 2: other directories)
2026-04-16 15:11:25 +03:00

217 lines
10 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
VQWEN v3 Stress Test (Time Series Validation)
=============================================
Trains on OLDER data, Tests on NEWER data (Simulating Real Future).
"""
import os
import sys
import json
import time
import pickle
import psycopg2
import pandas as pd
import numpy as np
import lightgbm as lgb
AI_DIR = os.path.dirname(os.path.abspath(__file__))
ROOT_DIR = os.path.dirname(AI_DIR)
sys.path.insert(0, ROOT_DIR)
def get_clean_dsn() -> str:
return "postgresql://suggestbet:SuGGesT2026SecuRe@localhost:15432/boilerplate_db"
def run_stress_test():
print("🧪 VQWEN v3 STRESS TEST (Time-Series Validation)")
print("="*60)
dsn = get_clean_dsn()
conn = psycopg2.connect(dsn)
cur = conn.cursor()
# ─── 1. VERİ ÇEKME (En yeniden eskiye doğru) ───
# İlk baştakiler en yeni maçlar (Test Set), sonrakiler eski maçlar (Train Set)
query = """
WITH match_data AS (
SELECT
m.id, m.home_team_id, m.away_team_id, m.score_home, m.score_away, m.mst_utc,
COALESCE(maf.home_elo, 1500) as home_elo,
COALESCE(maf.away_elo, 1500) as away_elo,
-- Contextual Goals
COALESCE((SELECT AVG(m2.score_home) FROM matches m2 WHERE m2.home_team_id = m.home_team_id AND m2.status = 'FT' AND m2.mst_utc < m.mst_utc), 1.2) as h_home_goals,
COALESCE((SELECT AVG(m2.score_away) FROM matches m2 WHERE m2.away_team_id = m.away_team_id AND m2.status = 'FT' AND m2.mst_utc < m.mst_utc), 1.2) as a_away_goals,
-- Rest Days
COALESCE(EXTRACT(EPOCH FROM (to_timestamp(m.mst_utc/1000) - (SELECT MAX(to_timestamp(m2.mst_utc/1000)) FROM matches m2 WHERE m2.home_team_id = m.home_team_id AND m2.status = 'FT' AND m2.mst_utc < m.mst_utc)) / 86400), 7) as h_rest,
COALESCE(EXTRACT(EPOCH FROM (to_timestamp(m.mst_utc/1000) - (SELECT MAX(to_timestamp(m2.mst_utc/1000)) FROM matches m2 WHERE m2.away_team_id = m.away_team_id AND m2.status = 'FT' AND m2.mst_utc < m.mst_utc)) / 86400), 7) as a_rest,
-- Squad
COALESCE((SELECT COUNT(*) FROM match_player_participation mp WHERE mp.match_id = m.id AND mp.team_id = m.home_team_id AND mp.is_starting = true), 11) as h_xi,
COALESCE((SELECT COUNT(*) FROM match_player_participation mp WHERE mp.match_id = m.id AND mp.team_id = m.away_team_id AND mp.is_starting = true), 11) as a_xi,
-- Odds
(SELECT os.odd_value FROM odd_categories oc JOIN odd_selections os ON os.odd_category_db_id = oc.db_id WHERE oc.match_id = m.id AND oc.name ILIKE 'Maç Sonucu' AND os.name = '1' LIMIT 1) as oh,
(SELECT os.odd_value FROM odd_categories oc JOIN odd_selections os ON os.odd_category_db_id = oc.db_id WHERE oc.match_id = m.id AND oc.name ILIKE 'Maç Sonucu' AND os.name = 'X' LIMIT 1) as od,
(SELECT os.odd_value FROM odd_categories oc JOIN odd_selections os ON os.odd_category_db_id = oc.db_id WHERE oc.match_id = m.id AND oc.name ILIKE 'Maç Sonucu' AND os.name = '2' LIMIT 1) as oa
FROM matches m
LEFT JOIN football_ai_features maf ON maf.match_id = m.id
WHERE m.status = 'FT' AND m.score_home IS NOT NULL AND m.sport = 'football'
AND EXISTS (SELECT 1 FROM odd_categories oc WHERE oc.match_id = m.id)
ORDER BY m.mst_utc DESC
LIMIT 150000
)
SELECT
md.*,
-- H2H Win Rate for Home Team
COALESCE((
SELECT COUNT(*) FILTER (WHERE m2.score_home > m2.score_away)::float / NULLIF(COUNT(*), 0)
FROM matches m2
WHERE m2.home_team_id = md.home_team_id AND m2.away_team_id = md.away_team_id AND m2.status = 'FT' AND m2.mst_utc < md.mst_utc
), 0.5) as h2h_h_win_rate,
-- Form Points (Last 5)
COALESCE((SELECT SUM(pts) FROM (SELECT CASE WHEN m2.score_home > m2.score_away THEN 3 WHEN m2.score_home = m2.score_away THEN 1 ELSE 0 END as pts FROM matches m2 WHERE m2.home_team_id = md.home_team_id AND m2.status = 'FT' AND m2.mst_utc < md.mst_utc ORDER BY m2.mst_utc DESC LIMIT 5) sub), 0) as h_form_pts,
COALESCE((SELECT SUM(pts) FROM (SELECT CASE WHEN m2.score_away > m2.score_home THEN 3 WHEN m2.score_away = m2.score_home THEN 1 ELSE 0 END as pts FROM matches m2 WHERE m2.away_team_id = md.away_team_id AND m2.status = 'FT' AND m2.mst_utc < md.mst_utc ORDER BY m2.mst_utc DESC LIMIT 5) sub), 0) as a_form_pts
FROM match_data md
"""
print("📊 Veri çekiliyor (Time-Series)...")
start = time.time()
cur.execute(query)
rows = cur.fetchall()
print(f"{len(rows)} maç çekildi ({time.time()-start:.1f}s)")
df = pd.DataFrame(rows, columns=[
'id', 'h_id', 'a_id', 'sh', 'sa', 'utc', 'h_elo', 'a_elo',
'h_home_goals', 'a_away_goals', 'h_rest', 'a_rest', 'h_xi', 'a_xi',
'oh', 'od', 'oa',
'h2h_h_wr', 'h_form_pts', 'a_form_pts'
])
# Temizlik
for col in df.columns[2:]:
df[col] = pd.to_numeric(df[col], errors='coerce')
df = df.fillna(df.median(numeric_only=True))
df = df[(df['oh'] > 1.0) & (df['oa'] > 1.0)]
# Özellikler
df['elo_diff'] = df['h_elo'] - df['a_elo']
def fatigue(rest):
if rest < 3: return 0.85
if rest < 5: return 0.95
return 1.0
df['h_fat'] = df['h_rest'].apply(fatigue)
df['a_fat'] = df['a_rest'].apply(fatigue)
df['h_xg'] = df['h_home_goals'] * df['h_fat']
df['a_xg'] = df['a_away_goals'] * df['a_fat']
df['total_xg'] = df['h_xg'] + df['a_xg']
df['rest_diff'] = df['h_rest'] - df['a_rest']
df['pow_diff'] = (df['h_elo']/100)*df['h_fat'] - (df['a_elo']/100)*df['a_fat']
df['form_diff'] = df['h_form_pts'] - df['a_form_pts']
margin = (1/df['oh']) + (1/df['od']) + (1/df['oa'])
df['imp_h'] = (1/df['oh']) / margin
df['imp_d'] = (1/df['od']) / margin
df['imp_a'] = (1/df['oa']) / margin
df['t_ms'] = df.apply(lambda r: 0 if r['sh']>r['sa'] else (2 if r['sh']<r['sa'] else 1), axis=1)
df['t_ou'] = ((df['sh'] + df['sa']) > 2.5).astype(int)
df['t_btts'] = ((df['sh'] > 0) & (df['sa'] > 0)).astype(int)
feats = ['elo_diff', 'h_xg', 'a_xg', 'total_xg', 'pow_diff', 'rest_diff',
'h_fat', 'a_fat', 'imp_h', 'imp_d', 'imp_a',
'h_xi', 'a_xi', 'h2h_h_wr', 'form_diff']
# ─── 2. ZAMAN BAZLI BÖLME (Time-Series Split) ───
# DataFrame zaten en yeniden eskiye (DESC) sıralı.
# İlk %30'luk kısım (en yeniler) TEST SET olacak.
# Geri kalan %70 (daha eskiler) TRAIN SET olacak.
split_point = int(len(df) * 0.30)
# Test Set: En yeni maçlar (Model bunları "Gelecek" olarak görecek)
test_set = df.iloc[:split_point].copy()
# Train Set: Daha eski maçlar (Model bunlardan "Öğrenecek")
train_set = df.iloc[split_point:].copy()
print(f"\n📅 SPLIT INFO:")
print(f" Train Set (Eski): {len(train_set)} maç")
print(f" Test Set (YENİ/GELECEK): {len(test_set)} maç")
if len(train_set) < 1000:
print("❌ Yetersiz eğitim verisi.")
return
# ─── 3. EĞİTİM (Sadece Geçmişle) ───
print("\n🤖 Geçmiş verilerle model eğitiliyor...")
model_ms = lgb.train({'objective': 'multiclass', 'num_class': 3, 'verbose': -1, 'num_leaves': 63},
lgb.Dataset(train_set[feats], train_set['t_ms']), num_boost_round=500)
model_ou = lgb.train({'objective': 'binary', 'verbose': -1},
lgb.Dataset(train_set[feats], train_set['t_ou']), num_boost_round=500)
model_btts = lgb.train({'objective': 'binary', 'verbose': -1},
lgb.Dataset(train_set[feats], train_set['t_btts']), num_boost_round=500)
print("✅ Model eğitimi tamamlandı. Şimdi Gelecek (Test Set) tahmin ediliyor...")
# ─── 4. TEST (Geleceği Tahmin) ───
# Value Betting Stratejisi
results = {'ms': {'bet': 0, 'won': 0, 'profit': 0}, 'ou25': {'bet': 0, 'won': 0, 'profit': 0}, 'btts': {'bet': 0, 'won': 0, 'profit': 0}}
for idx, row in test_set.iterrows():
oh = row['oh']
od = row['od']
oa = row['oa']
f = pd.DataFrame([row[feats]])
# MS Tahminleri
ms_probs = model_ms.predict(f)[0]
for pick, prob, odd in zip(['1', 'X', '2'], ms_probs, [oh, od, oa]):
if odd <= 1.0: continue
edge = prob - (1/odd)
# Value Check: Modelin olasılığı piyasa olasılığından %5 yüksekse oyna
if edge > 0.05 and prob > 0.45:
results['ms']['bet'] += 1
h, a = row['sh'], row['sa']
w = (pick=='1' and h>a) or (pick=='X' and h==a) or (pick=='2' and a>h)
if w: results['ms']['won'] += 1; results['ms']['profit'] += (odd - 1.0)
else: results['ms']['profit'] -= 1.0
break
# OU2.5
p_over = float(model_ou.predict(f)[0])
if p_over > 0.55: # Threshold
results['ou25']['bet'] += 1
if (row['sh'] + row['sa']) > 2.5: results['ou25']['won'] += 1; results['ou25']['profit'] += 0.85
else: results['ou25']['profit'] -= 1.0
# BTTS
p_btts = float(model_btts.predict(f)[0])
if p_btts > 0.55:
results['btts']['bet'] += 1
if row['sh'] > 0 and row['sa'] > 0: results['btts']['won'] += 1; results['btts']['profit'] += 0.85
else: results['btts']['profit'] -= 1.0
# ─── 5. SONUÇLAR ───
print("\n" + "="*60)
print("📊 STRESS TEST SONUÇLARI (GELECEK TAHMİNİ)")
print("="*60)
for mkt in ['ms', 'ou25', 'btts']:
r = results[mkt]
wr = (r['won'] / r['bet'] * 100) if r['bet'] > 0 else 0
print(f"{mkt.upper():<10} Oyn: {r['bet']:<5} Kaz: {r['won']:<5} WR: {wr:.1f}% Kâr: {r['profit']:+.2f}")
total = sum(r['profit'] for r in results.values())
print(f"\n💰 TOPLAM GELECEK KÂRI: {total:+.2f} Units")
if total > 0:
print("🟢 MODEL GÜVENİLİR! (Geleceği öngörebiliyor)")
else:
print("🔴 MODEL ZAYIF! (Sadece ezber yapmış olabilir)")
cur.close()
conn.close()
if __name__ == "__main__":
run_stress_test()