From 659110c806e22597f248b54bff5f527196090e97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fahri=20Can=20Se=C3=A7er?= Date: Mon, 25 May 2026 22:29:05 +0300 Subject: [PATCH] Update handoff doc + add backtest checkpoint/resume --- ai-engine/scripts/diagnostic_backtest.py | 68 +++++++++++++++++++++++- mds/SESSION_HANDOFF.md | 46 +++++++++++++++- 2 files changed, 111 insertions(+), 3 deletions(-) diff --git a/ai-engine/scripts/diagnostic_backtest.py b/ai-engine/scripts/diagnostic_backtest.py index 1d9685d..12f8eb7 100644 --- a/ai-engine/scripts/diagnostic_backtest.py +++ b/ai-engine/scripts/diagnostic_backtest.py @@ -579,6 +579,52 @@ def write_text_summary(rows: List[Dict], agg: Dict, diag: Dict, # ── Main loop ───────────────────────────────────────────────────────── +def _checkpoint_paths(args) -> Tuple[str, str]: + """Stable checkpoint paths derived from the run's date window so a + re-run with the same args picks up the same checkpoint.""" + key = f"{args.start or 'd' + str(args.days)}_{args.end or 'now'}_{args.max_matches}" + key = key.replace("-", "").replace(":", "") + ckpt_csv = os.path.join(REPORTS_DIR, f"_checkpoint_{key}.csv") + ckpt_state = os.path.join(REPORTS_DIR, f"_checkpoint_{key}.state") + return ckpt_csv, ckpt_state + + +def _load_checkpoint(args) -> Tuple[List[Dict], set]: + """Read partial CSV + processed-IDs set if a previous run was interrupted.""" + ckpt_csv, _ = _checkpoint_paths(args) + if not os.path.exists(ckpt_csv): + return [], set() + import csv + rows: List[Dict] = [] + seen: set = set() + try: + with open(ckpt_csv, "r", encoding="utf-8", newline="") as f: + reader = csv.DictReader(f) + for row in reader: + rows.append(row) + seen.add(str(row.get("match_id") or "")) + except Exception as e: + print(f" checkpoint read failed ({e}); starting fresh") + return [], set() + return rows, seen + + +def _flush_checkpoint(args, rows: List[Dict]) -> None: + """Atomic-ish overwrite of the partial CSV. Cheap enough at every 100 rows.""" + if not rows: + return + ckpt_csv, _ = _checkpoint_paths(args) + import csv + tmp = ckpt_csv + ".tmp" + fields = list(rows[0].keys()) + with open(tmp, "w", encoding="utf-8", newline="") as f: + w = csv.DictWriter(f, fieldnames=fields) + w.writeheader() + for r in rows: + w.writerow(r) + os.replace(tmp, ckpt_csv) + + def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("--days", type=int, default=14, @@ -588,6 +634,10 @@ def main(): parser.add_argument("--start", help="Start date YYYY-MM-DD (overrides --days)") parser.add_argument("--end", help="End date YYYY-MM-DD") parser.add_argument("--progress-interval", type=int, default=50) + parser.add_argument("--checkpoint-every", type=int, default=100, + help="Flush partial CSV every N matches (default 100)") + parser.add_argument("--no-resume", action="store_true", + help="Ignore any prior checkpoint and start fresh") args = parser.parse_args() print("=" * 70) @@ -614,12 +664,20 @@ def main(): print("No matches to process. Exiting.") return + # ── Resume from prior checkpoint if available ── rows: List[Dict[str, Any]] = [] + seen_ids: set = set() + if not args.no_resume: + rows, seen_ids = _load_checkpoint(args) + if rows: + print(f" Resuming from checkpoint: {len(rows)} matches already done") errors: List[Tuple[str, str]] = [] t0 = time.time() for i, m in enumerate(matches, start=1): mid = str(m["match_id"]) + if mid in seen_ids: + continue try: pkg = orch.analyze_match(mid) if pkg is None: @@ -627,20 +685,26 @@ def main(): row = capture_bet_row(m, pkg) rows.append(row) except KeyboardInterrupt: - print("\nInterrupted, writing partial results...") + print("\nInterrupted, flushing checkpoint...") + _flush_checkpoint(args, rows) break except Exception as e: errors.append((mid, str(e))) if len(errors) <= 5: traceback.print_exc() + # ── Periodic checkpoint flush so a crash doesn't lose everything ── + if i % args.checkpoint_every == 0: + _flush_checkpoint(args, rows) + if i % args.progress_interval == 0: elapsed = time.time() - t0 rate = i / elapsed eta = (n - i) / rate if rate else 0 playable_so_far = sum(1 for r in rows if r["playable"]) print(f" [{i}/{n}] rate={rate:.1f}/s eta={eta/60:.1f}min " - f"playable={playable_so_far} errors={len(errors)}") + f"playable={playable_so_far} errors={len(errors)} " + f"(checkpoint at every {args.checkpoint_every})") print(f"\nProcessed {len(rows)} rows in {(time.time()-t0):.1f}s " f"({len(errors)} errors)") diff --git a/mds/SESSION_HANDOFF.md b/mds/SESSION_HANDOFF.md index 0b8a3a1..8f3f008 100644 --- a/mds/SESSION_HANDOFF.md +++ b/mds/SESSION_HANDOFF.md @@ -1,10 +1,54 @@ # SESSION HANDOFF — iddaai sistem durumu -**Son güncelleme**: 2026-05-25 ~20:30 +**Son güncelleme**: 2026-05-25 ~23:00 (Windows'tan Mac'e geçiş öncesi) **Hedef**: Başka makinede / yeni Claude session'ında bu doc tek başına okunup işin nerede kaldığı anlaşılabilmeli. --- +## 🚨 EN SON DURUM (Mac'e geçmeden önce oku) + +### Validation backtest ÖLDÜ +- Pencere: 2026-05-01 → 2026-05-14, 1500 maç +- **1200/1500'de SSH tunnel düşünce process sessizce öldü** +- **CSV kayıp** — script eski versiyondu, sadece sonda yazıyordu +- Sebep: localhost:5432 erişimi kayboldu, psycopg2 connection error + +### Script DÜZELTILDI (Mac'te kullanılabilir) +`scripts/diagnostic_backtest.py` artık **crash-safe**: +- `--checkpoint-every 100` → her 100 maçta partial CSV diske yazılır +- Crash sonrası tekrar koşulunca **otomatik kaldığı yerden devam** +- Checkpoint dosyası: `reports/_checkpoint_.csv` +- `--no-resume` flag fresh başlamak için + +### Git push BEKLİYOR +- 36 dosya **commit edildi (local)** — bkz "Bu seansta yapılan KOD değişiklikleri" +- Push **auth hatası** verdi (gitea credentials cached değil) +- **User Mac'te push yapacak** (Gitea Personal Access Token gerekli, repo write yetkisi) + +### Mac'te yapılacaklar (öncelikli sırayla) +1. Repo'yu clone et veya OneDrive'dan kopyala (eğer Mac OneDrive senkronize ediyorsa) +2. `git push origin main` ile pending commit'i remote'a yolla +3. SSH tunnel kur (Pi @ 95.70.252.214, port 2222) → DB için tunnel localhost:5432 +4. Yeni Claude session'ı başlat, bu dosyayı oku, devam et +5. Backtest tekrar koştur (çoktan eski versiyondu, şimdi crash-safe) + ```bash + cd ai-engine + export DATABASE_URL="postgresql://iddaai_user:IddaA1_S4crET!@localhost:5432/iddaai_db?schema=public" + export PYTHONIOENCODING=utf-8 + python scripts/diagnostic_backtest.py --start 2026-05-01 --end 2026-05-14 --max-matches 1500 + # ölürse, aynı komutu tekrar koş — checkpoint'ten devam eder + ``` + +### Mevcut sağlam veri +Validation kayıp ama elimizde **in-sample backtest** ve **grid search** çıktıları var: +- `reports/diagnostic_backtest_20260525_035649.{csv,json,txt}` — 1000 maç, May 11-24 +- `reports/filter_optimization_patch.json` — grid search winners +- Bu data ile in-sample analiz tamamlandı, validation eksik + +--- + +--- + ## 🎯 Üst-seviye hedef Sistem **maç başı-1 saat** kullanıcı tetiklemesiyle çalışacak. Bahis uzmanı seviyesinde: