Files
iddaai-be/ai-engine/scripts/extract_training_data_colab.ipynb
fahricansecer 94c7a4481a
Deploy Iddaai Backend / build-and-deploy (push) Successful in 37s
main
2026-05-17 02:17:22 +03:00

167 lines
5.0 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": ["# Training Data Extraction — Google Colab\n", "SSH tunnel ile sunucuya bağlanır, DB'den 270K+ maç çeker, Drive'a kaydeder.\n"]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 1. Gerekli paketler\n",
"!pip install sshtunnel psycopg2-binary pandas numpy -q\n",
"print('Paketler hazır')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 2. Drive bağla\n",
"from google.colab import drive\n",
"drive.mount('/content/drive')\n",
"import os\n",
"DRIVE_DIR = '/content/drive/MyDrive/iddaai'\n",
"os.makedirs(DRIVE_DIR, exist_ok=True)\n",
"print('Drive hazır:', DRIVE_DIR)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 3. SSH private key upload\n",
"# Mac'te terminalde şunu çalıştır, çıktıyı kopyala:\n",
"# cat ~/.ssh/id_ed25519\n",
"# Aşağıya yapıştır (BEGIN ve END satırları dahil)\n",
"\n",
"SSH_PRIVATE_KEY = \"\"\"-----BEGIN OPENSSH PRIVATE KEY-----\n",
"BURAYA_KEY_ICERIGINI_YAPISTIR\n",
"-----END OPENSSH PRIVATE KEY-----\"\"\"\n",
"\n",
"# Key dosyasına yaz\n",
"key_path = '/root/.ssh/id_ed25519'\n",
"os.makedirs('/root/.ssh', exist_ok=True)\n",
"with open(key_path, 'w') as f:\n",
" f.write(SSH_PRIVATE_KEY.strip() + '\\n')\n",
"os.chmod(key_path, 0o600)\n",
"print('SSH key hazır')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 4. SSH Tunnel aç + DB bağlantısını test et\n",
"from sshtunnel import SSHTunnelForwarder\n",
"import psycopg2\n",
"\n",
"tunnel = SSHTunnelForwarder(\n",
" ('95.70.252.214', 2222),\n",
" ssh_username='haruncan',\n",
" ssh_pkey=key_path,\n",
" remote_bind_address=('localhost', 5432),\n",
" local_bind_address=('localhost', 15432),\n",
")\n",
"tunnel.start()\n",
"print(f'Tunnel açık: localhost:{tunnel.local_bind_port}')\n",
"\n",
"conn = psycopg2.connect(\n",
" host='localhost',\n",
" port=15432,\n",
" dbname='iddaai_db',\n",
" user='iddaai_user',\n",
" password='IddaA1_S4crET!',\n",
")\n",
"cur = conn.cursor()\n",
"cur.execute(\"SELECT COUNT(*) FROM matches WHERE status='FT' AND score_home IS NOT NULL\")\n",
"print(f'DB bağlantısı OK — FT maç sayısı: {cur.fetchone()[0]:,}')\n",
"conn.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 5. extract_training_data.py kodunu Drive'dan veya doğrudan çalıştır\n",
"# Önce repo'yu Drive'a kopyala (yoksa)\n",
"import subprocess\n",
"\n",
"REPO_DIR = f'{DRIVE_DIR}/ai-engine'\n",
"SCRIPT = f'{REPO_DIR}/scripts/extract_training_data.py'\n",
"\n",
"if not os.path.exists(SCRIPT):\n",
" print('Script bulunamadı — ai-engine klasörünü Drive a yükle:')\n",
" print(' Yerel makinede: cp -r /Users/piton/Documents/GitHub/iddaai/iddaai-be/ai-engine ~/Google\\ Drive/MyDrive/iddaai/')\n",
"else:\n",
" print('Script hazır:', SCRIPT)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 6. Extraction'ı çalıştır\n",
"import sys, os\n",
"sys.path.insert(0, REPO_DIR)\n",
"\n",
"# DB URL'i tunnel üzerinden ayarla\n",
"os.environ['DATABASE_URL'] = 'postgresql://iddaai_user:IddaA1_S4crET!@localhost:15432/iddaai_db'\n",
"\n",
"# Output CSV'yi Drive'a kaydet\n",
"OUTPUT_CSV = f'{DRIVE_DIR}/training_data_full.csv'\n",
"\n",
"# Script'i import et ve main'i çalıştır\n",
"import importlib.util\n",
"spec = importlib.util.spec_from_file_location('extract', SCRIPT)\n",
"mod = importlib.util.load_from_spec(spec)\n",
"spec.loader.exec_module(mod)\n",
"\n",
"# OUTPUT_CSV'yi override et\n",
"mod.OUTPUT_CSV = OUTPUT_CSV\n",
"mod.TOP_LEAGUES_PATH = f'{DRIVE_DIR}/qualified_leagues.json'\n",
"\n",
"mod.main()\n",
"print(f'\\nKaydedildi: {OUTPUT_CSV}')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 7. Tunnel kapat\n",
"tunnel.stop()\n",
"print('Tunnel kapatıldı')\n",
"\n",
"# Dosya boyutunu kontrol et\n",
"size_mb = os.path.getsize(OUTPUT_CSV) / 1024 / 1024\n",
"import pandas as pd\n",
"df = pd.read_csv(OUTPUT_CSV, nrows=5)\n",
"print(f'CSV: {size_mb:.1f} MB')\n",
"print(f'Kolonlar: {len(df.columns)}')"
]
}
],
"metadata": {
"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"},
"language_info": {"name": "python", "version": "3.10.0"}
},
"nbformat": 4,
"nbformat_minor": 4
}