Files
fahricansecer 2f0b85a0c7
Deploy Iddaai Backend / build-and-deploy (push) Failing after 18s
first (part 2: other directories)
2026-04-16 15:11:25 +03:00

138 lines
6.8 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
VQWEN Model Training Script (Optimized)
========================================
Fast, efficient, uses all 180k+ matches with rich features.
"""
import os
import sys
import json
import time
import pickle
import psycopg2
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import lightgbm as lgb
AI_DIR = os.path.dirname(os.path.abspath(__file__))
ROOT_DIR = os.path.dirname(AI_DIR)
sys.path.insert(0, ROOT_DIR)
def get_clean_dsn() -> str:
return "postgresql://suggestbet:SuGGesT2026SecuRe@localhost:15432/boilerplate_db"
def train_vqwen():
print("🧠 VQWEN MODEL EĞİTİMİ (OPTIMIZED)")
print("="*60)
dsn = get_clean_dsn()
conn = psycopg2.connect(dsn)
cur = conn.cursor()
# ─── 1. HIZLI VERİ ÇEKME (Optimized Query) ───
query = """
SELECT
m.id, m.home_team_id, m.away_team_id, m.score_home, m.score_away,
-- Odds
(SELECT os.odd_value FROM odd_categories oc JOIN odd_selections os ON os.odd_category_db_id = oc.db_id
WHERE oc.match_id = m.id AND oc.name ILIKE 'Maç Sonucu' AND os.name = '1' LIMIT 1) as odds_h,
(SELECT os.odd_value FROM odd_categories oc JOIN odd_selections os ON os.odd_category_db_id = oc.db_id
WHERE oc.match_id = m.id AND oc.name ILIKE 'Maç Sonucu' AND os.name = 'X' LIMIT 1) as odds_d,
(SELECT os.odd_value FROM odd_categories oc JOIN odd_selections os ON os.odd_category_db_id = oc.db_id
WHERE oc.match_id = m.id AND oc.name ILIKE 'Maç Sonucu' AND os.name = '2' LIMIT 1) as odds_a,
-- Form (Last 5)
COALESCE((SELECT AVG(CASE WHEN m2.home_team_id = m.home_team_id AND m2.score_home > m2.score_away THEN 3 WHEN m2.home_team_id = m.home_team_id AND m2.score_home = m2.score_away THEN 1 ELSE 0 END) FROM matches m2 WHERE m2.home_team_id = m.home_team_id AND m2.status = 'FT' AND m2.mst_utc < m.mst_utc LIMIT 5), 0) as home_form,
COALESCE((SELECT AVG(CASE WHEN m2.away_team_id = m.away_team_id AND m2.score_away > m2.score_home THEN 3 WHEN m2.away_team_id = m.away_team_id AND m2.score_away = m2.score_home THEN 1 ELSE 0 END) FROM matches m2 WHERE m2.away_team_id = m.away_team_id AND m2.status = 'FT' AND m2.mst_utc < m.mst_utc LIMIT 5), 0) as away_form,
-- Goal Averages
COALESCE((SELECT AVG(m2.score_home) FROM matches m2 WHERE m2.home_team_id = m.home_team_id AND m2.status = 'FT' LIMIT 10), 1.2) as h_avg_scored,
COALESCE((SELECT AVG(m2.score_away) FROM matches m2 WHERE m2.away_team_id = m.home_team_id AND m2.status = 'FT' LIMIT 10), 1.2) as h_avg_conceded,
COALESCE((SELECT AVG(m2.score_away) FROM matches m2 WHERE m2.away_team_id = m.away_team_id AND m2.status = 'FT' LIMIT 10), 1.2) as a_avg_scored,
COALESCE((SELECT AVG(m2.score_home) FROM matches m2 WHERE m2.home_team_id = m.away_team_id AND m2.status = 'FT' LIMIT 10), 1.2) as a_avg_conceded,
-- Team Stats
COALESCE(ts_home.possession_percentage, 50) as h_poss,
COALESCE(ts_home.shots_on_target, 4) as h_sot,
COALESCE(ts_home.corners, 5) as h_corners,
COALESCE(ts_away.possession_percentage, 50) as a_poss,
COALESCE(ts_away.shots_on_target, 3) as a_sot,
COALESCE(ts_away.corners, 4) as a_corners
FROM matches m
LEFT JOIN football_team_stats ts_home ON ts_home.match_id = m.id AND ts_home.team_id = m.home_team_id
LEFT JOIN football_team_stats ts_away ON ts_away.match_id = m.id AND ts_away.team_id = m.away_team_id
WHERE m.status = 'FT' AND m.score_home IS NOT NULL AND m.sport = 'football'
AND EXISTS (SELECT 1 FROM odd_categories oc WHERE oc.match_id = m.id)
ORDER BY m.mst_utc DESC
LIMIT 200000
"""
print("📊 Veritabanından özellikler çekiliyor (Limit 200k)...")
start = time.time()
cur.execute(query)
rows = cur.fetchall()
print(f"{len(rows)} maç çekildi ({time.time()-start:.1f}s)")
df = pd.DataFrame(rows, columns=[
'id', 'h_id', 'a_id', 'sh', 'sa', 'oh', 'od', 'oa',
'h_form', 'a_form', 'h_sc', 'h_co', 'a_sc', 'a_co',
'h_poss', 'h_sot', 'h_corn', 'a_poss', 'a_sot', 'a_corn'
])
for col in df.columns[5:]:
df[col] = pd.to_numeric(df[col], errors='coerce')
df = df.fillna(df.median(numeric_only=True))
# ─── 2. ÖZELLİK MÜHENDİSLİĞİ ───
df['h_xg'] = (df['h_sc'] + df['a_co']) / 2
df['a_xg'] = (df['a_sc'] + df['h_co']) / 2
df['total_xg'] = df['h_xg'] + df['a_xg']
df['h_pow'] = (df['h_form']*10) + (df['h_sc']*5) - (df['h_co']*5) + (df['h_sot']*2)
df['a_pow'] = (df['a_form']*10) + (df['a_sc']*5) - (df['a_co']*5) + (df['a_sot']*2)
df['pow_diff'] = df['h_pow'] - df['a_pow']
margin = (1/df['oh']) + (1/df['od']) + (1/df['oa'])
df['imp_h'] = (1/df['oh']) / margin
df['imp_d'] = (1/df['od']) / margin
df['imp_a'] = (1/df['oa']) / margin
# Targets
df['t_ms'] = df.apply(lambda r: 0 if r['sh']>r['sa'] else (2 if r['sh']<r['sa'] else 1), axis=1)
df['t_ou'] = ((df['sh'] + df['sa']) > 2.5).astype(int)
df['t_btts'] = ((df['sh'] > 0) & (df['sa'] > 0)).astype(int)
# ─── 3. MODELLER ───
feats_ms = ['h_form', 'a_form', 'h_xg', 'a_xg', 'pow_diff', 'imp_h', 'imp_d', 'imp_a', 'h_sot', 'a_sot']
X_ms, y_ms = df[feats_ms], df['t_ms']
X_tr, X_te, y_tr, y_te = train_test_split(X_ms, y_ms, test_size=0.15, random_state=42)
print("🤖 MS Modeli eğitiliyor...")
model_ms = lgb.train({'objective': 'multiclass', 'num_class': 3, 'metric': 'multi_logloss', 'verbose': -1, 'num_leaves': 63},
lgb.Dataset(X_tr, y_tr), num_boost_round=1000,
valid_sets=[lgb.Dataset(X_te, y_te)],
callbacks=[lgb.early_stopping(50)])
feats_ou = ['h_xg', 'a_xg', 'total_xg', 'h_sot', 'a_sot']
print("🤖 OU2.5 Modeli...")
model_ou = lgb.train({'objective': 'binary', 'metric': 'binary_logloss', 'verbose': -1},
lgb.Dataset(df[feats_ou], df['t_ou']), num_boost_round=500)
feats_btts = ['h_xg', 'a_xg', 'h_sc', 'a_sc']
print("🤖 BTTS Modeli...")
model_btts = lgb.train({'objective': 'binary', 'metric': 'binary_logloss', 'verbose': -1},
lgb.Dataset(df[feats_btts], df['t_btts']), num_boost_round=500)
# ─── 4. KAYDET ───
mdir = os.path.join(ROOT_DIR, 'models', 'vqwen')
os.makedirs(mdir, exist_ok=True)
for nm, md in [('ms', model_ms), ('ou25', model_ou), ('btts', model_btts)]:
p = os.path.join(mdir, f'vqwen_{nm}.pkl')
with open(p, 'wb') as f: pickle.dump(md, f)
print(f"{p} kaydedildi.")
cur.close()
conn.close()
print("\n🎉 VQWEN EĞİTİMİ BİTTİ!")
if __name__ == "__main__":
train_vqwen()