{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": ["# Training Data Extraction — Google Colab\n", "SSH tunnel ile sunucuya bağlanır, DB'den 270K+ maç çeker, Drive'a kaydeder.\n"] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 1. Gerekli paketler\n", "!pip install sshtunnel psycopg2-binary pandas numpy -q\n", "print('Paketler hazır')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 2. Drive bağla\n", "from google.colab import drive\n", "drive.mount('/content/drive')\n", "import os\n", "DRIVE_DIR = '/content/drive/MyDrive/iddaai'\n", "os.makedirs(DRIVE_DIR, exist_ok=True)\n", "print('Drive hazır:', DRIVE_DIR)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 3. SSH private key upload\n", "# Mac'te terminalde şunu çalıştır, çıktıyı kopyala:\n", "# cat ~/.ssh/id_ed25519\n", "# Aşağıya yapıştır (BEGIN ve END satırları dahil)\n", "\n", "SSH_PRIVATE_KEY = \"\"\"-----BEGIN OPENSSH PRIVATE KEY-----\n", "BURAYA_KEY_ICERIGINI_YAPISTIR\n", "-----END OPENSSH PRIVATE KEY-----\"\"\"\n", "\n", "# Key dosyasına yaz\n", "key_path = '/root/.ssh/id_ed25519'\n", "os.makedirs('/root/.ssh', exist_ok=True)\n", "with open(key_path, 'w') as f:\n", " f.write(SSH_PRIVATE_KEY.strip() + '\\n')\n", "os.chmod(key_path, 0o600)\n", "print('SSH key hazır')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 4. SSH Tunnel aç + DB bağlantısını test et\n", "from sshtunnel import SSHTunnelForwarder\n", "import psycopg2\n", "\n", "tunnel = SSHTunnelForwarder(\n", " ('95.70.252.214', 2222),\n", " ssh_username='haruncan',\n", " ssh_pkey=key_path,\n", " remote_bind_address=('localhost', 5432),\n", " local_bind_address=('localhost', 15432),\n", ")\n", "tunnel.start()\n", "print(f'Tunnel açık: localhost:{tunnel.local_bind_port}')\n", "\n", "conn = psycopg2.connect(\n", " host='localhost',\n", " port=15432,\n", " dbname='iddaai_db',\n", " user='iddaai_user',\n", " password='IddaA1_S4crET!',\n", ")\n", "cur = conn.cursor()\n", "cur.execute(\"SELECT COUNT(*) FROM matches WHERE status='FT' AND score_home IS NOT NULL\")\n", "print(f'DB bağlantısı OK — FT maç sayısı: {cur.fetchone()[0]:,}')\n", "conn.close()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 5. extract_training_data.py kodunu Drive'dan veya doğrudan çalıştır\n", "# Önce repo'yu Drive'a kopyala (yoksa)\n", "import subprocess\n", "\n", "REPO_DIR = f'{DRIVE_DIR}/ai-engine'\n", "SCRIPT = f'{REPO_DIR}/scripts/extract_training_data.py'\n", "\n", "if not os.path.exists(SCRIPT):\n", " print('Script bulunamadı — ai-engine klasörünü Drive a yükle:')\n", " print(' Yerel makinede: cp -r /Users/piton/Documents/GitHub/iddaai/iddaai-be/ai-engine ~/Google\\ Drive/MyDrive/iddaai/')\n", "else:\n", " print('Script hazır:', SCRIPT)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 6. Extraction'ı çalıştır\n", "import sys, os\n", "sys.path.insert(0, REPO_DIR)\n", "\n", "# DB URL'i tunnel üzerinden ayarla\n", "os.environ['DATABASE_URL'] = 'postgresql://iddaai_user:IddaA1_S4crET!@localhost:15432/iddaai_db'\n", "\n", "# Output CSV'yi Drive'a kaydet\n", "OUTPUT_CSV = f'{DRIVE_DIR}/training_data_full.csv'\n", "\n", "# Script'i import et ve main'i çalıştır\n", "import importlib.util\n", "spec = importlib.util.spec_from_file_location('extract', SCRIPT)\n", "mod = importlib.util.load_from_spec(spec)\n", "spec.loader.exec_module(mod)\n", "\n", "# OUTPUT_CSV'yi override et\n", "mod.OUTPUT_CSV = OUTPUT_CSV\n", "mod.TOP_LEAGUES_PATH = f'{DRIVE_DIR}/qualified_leagues.json'\n", "\n", "mod.main()\n", "print(f'\\nKaydedildi: {OUTPUT_CSV}')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 7. Tunnel kapat\n", "tunnel.stop()\n", "print('Tunnel kapatıldı')\n", "\n", "# Dosya boyutunu kontrol et\n", "size_mb = os.path.getsize(OUTPUT_CSV) / 1024 / 1024\n", "import pandas as pd\n", "df = pd.read_csv(OUTPUT_CSV, nrows=5)\n", "print(f'CSV: {size_mb:.1f} MB')\n", "print(f'Kolonlar: {len(df.columns)}')" ] } ], "metadata": { "kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"name": "python", "version": "3.10.0"} }, "nbformat": 4, "nbformat_minor": 4 }