167 lines
5.0 KiB
Plaintext
167 lines
5.0 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": ["# Training Data Extraction — Google Colab\n", "SSH tunnel ile sunucuya bağlanır, DB'den 270K+ maç çeker, Drive'a kaydeder.\n"]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# 1. Gerekli paketler\n",
|
||
"!pip install sshtunnel psycopg2-binary pandas numpy -q\n",
|
||
"print('Paketler hazır')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# 2. Drive bağla\n",
|
||
"from google.colab import drive\n",
|
||
"drive.mount('/content/drive')\n",
|
||
"import os\n",
|
||
"DRIVE_DIR = '/content/drive/MyDrive/iddaai'\n",
|
||
"os.makedirs(DRIVE_DIR, exist_ok=True)\n",
|
||
"print('Drive hazır:', DRIVE_DIR)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# 3. SSH private key upload\n",
|
||
"# Mac'te terminalde şunu çalıştır, çıktıyı kopyala:\n",
|
||
"# cat ~/.ssh/id_ed25519\n",
|
||
"# Aşağıya yapıştır (BEGIN ve END satırları dahil)\n",
|
||
"\n",
|
||
"SSH_PRIVATE_KEY = \"\"\"-----BEGIN OPENSSH PRIVATE KEY-----\n",
|
||
"BURAYA_KEY_ICERIGINI_YAPISTIR\n",
|
||
"-----END OPENSSH PRIVATE KEY-----\"\"\"\n",
|
||
"\n",
|
||
"# Key dosyasına yaz\n",
|
||
"key_path = '/root/.ssh/id_ed25519'\n",
|
||
"os.makedirs('/root/.ssh', exist_ok=True)\n",
|
||
"with open(key_path, 'w') as f:\n",
|
||
" f.write(SSH_PRIVATE_KEY.strip() + '\\n')\n",
|
||
"os.chmod(key_path, 0o600)\n",
|
||
"print('SSH key hazır')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# 4. SSH Tunnel aç + DB bağlantısını test et\n",
|
||
"from sshtunnel import SSHTunnelForwarder\n",
|
||
"import psycopg2\n",
|
||
"\n",
|
||
"tunnel = SSHTunnelForwarder(\n",
|
||
" ('95.70.252.214', 2222),\n",
|
||
" ssh_username='haruncan',\n",
|
||
" ssh_pkey=key_path,\n",
|
||
" remote_bind_address=('localhost', 5432),\n",
|
||
" local_bind_address=('localhost', 15432),\n",
|
||
")\n",
|
||
"tunnel.start()\n",
|
||
"print(f'Tunnel açık: localhost:{tunnel.local_bind_port}')\n",
|
||
"\n",
|
||
"conn = psycopg2.connect(\n",
|
||
" host='localhost',\n",
|
||
" port=15432,\n",
|
||
" dbname='iddaai_db',\n",
|
||
" user='iddaai_user',\n",
|
||
" password='IddaA1_S4crET!',\n",
|
||
")\n",
|
||
"cur = conn.cursor()\n",
|
||
"cur.execute(\"SELECT COUNT(*) FROM matches WHERE status='FT' AND score_home IS NOT NULL\")\n",
|
||
"print(f'DB bağlantısı OK — FT maç sayısı: {cur.fetchone()[0]:,}')\n",
|
||
"conn.close()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# 5. extract_training_data.py kodunu Drive'dan veya doğrudan çalıştır\n",
|
||
"# Önce repo'yu Drive'a kopyala (yoksa)\n",
|
||
"import subprocess\n",
|
||
"\n",
|
||
"REPO_DIR = f'{DRIVE_DIR}/ai-engine'\n",
|
||
"SCRIPT = f'{REPO_DIR}/scripts/extract_training_data.py'\n",
|
||
"\n",
|
||
"if not os.path.exists(SCRIPT):\n",
|
||
" print('Script bulunamadı — ai-engine klasörünü Drive a yükle:')\n",
|
||
" print(' Yerel makinede: cp -r /Users/piton/Documents/GitHub/iddaai/iddaai-be/ai-engine ~/Google\\ Drive/MyDrive/iddaai/')\n",
|
||
"else:\n",
|
||
" print('Script hazır:', SCRIPT)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# 6. Extraction'ı çalıştır\n",
|
||
"import sys, os\n",
|
||
"sys.path.insert(0, REPO_DIR)\n",
|
||
"\n",
|
||
"# DB URL'i tunnel üzerinden ayarla\n",
|
||
"os.environ['DATABASE_URL'] = 'postgresql://iddaai_user:IddaA1_S4crET!@localhost:15432/iddaai_db'\n",
|
||
"\n",
|
||
"# Output CSV'yi Drive'a kaydet\n",
|
||
"OUTPUT_CSV = f'{DRIVE_DIR}/training_data_full.csv'\n",
|
||
"\n",
|
||
"# Script'i import et ve main'i çalıştır\n",
|
||
"import importlib.util\n",
|
||
"spec = importlib.util.spec_from_file_location('extract', SCRIPT)\n",
|
||
"mod = importlib.util.load_from_spec(spec)\n",
|
||
"spec.loader.exec_module(mod)\n",
|
||
"\n",
|
||
"# OUTPUT_CSV'yi override et\n",
|
||
"mod.OUTPUT_CSV = OUTPUT_CSV\n",
|
||
"mod.TOP_LEAGUES_PATH = f'{DRIVE_DIR}/qualified_leagues.json'\n",
|
||
"\n",
|
||
"mod.main()\n",
|
||
"print(f'\\nKaydedildi: {OUTPUT_CSV}')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# 7. Tunnel kapat\n",
|
||
"tunnel.stop()\n",
|
||
"print('Tunnel kapatıldı')\n",
|
||
"\n",
|
||
"# Dosya boyutunu kontrol et\n",
|
||
"size_mb = os.path.getsize(OUTPUT_CSV) / 1024 / 1024\n",
|
||
"import pandas as pd\n",
|
||
"df = pd.read_csv(OUTPUT_CSV, nrows=5)\n",
|
||
"print(f'CSV: {size_mb:.1f} MB')\n",
|
||
"print(f'Kolonlar: {len(df.columns)}')"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"},
|
||
"language_info": {"name": "python", "version": "3.10.0"}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 4
|
||
}
|