Update handoff doc + add backtest checkpoint/resume
Deploy Iddaai Backend / build-and-deploy (push) Successful in 4m32s
Deploy Iddaai Backend / build-and-deploy (push) Successful in 4m32s
This commit is contained in:
@@ -579,6 +579,52 @@ def write_text_summary(rows: List[Dict], agg: Dict, diag: Dict,
|
||||
|
||||
|
||||
# ── Main loop ─────────────────────────────────────────────────────────
|
||||
def _checkpoint_paths(args) -> Tuple[str, str]:
|
||||
"""Stable checkpoint paths derived from the run's date window so a
|
||||
re-run with the same args picks up the same checkpoint."""
|
||||
key = f"{args.start or 'd' + str(args.days)}_{args.end or 'now'}_{args.max_matches}"
|
||||
key = key.replace("-", "").replace(":", "")
|
||||
ckpt_csv = os.path.join(REPORTS_DIR, f"_checkpoint_{key}.csv")
|
||||
ckpt_state = os.path.join(REPORTS_DIR, f"_checkpoint_{key}.state")
|
||||
return ckpt_csv, ckpt_state
|
||||
|
||||
|
||||
def _load_checkpoint(args) -> Tuple[List[Dict], set]:
|
||||
"""Read partial CSV + processed-IDs set if a previous run was interrupted."""
|
||||
ckpt_csv, _ = _checkpoint_paths(args)
|
||||
if not os.path.exists(ckpt_csv):
|
||||
return [], set()
|
||||
import csv
|
||||
rows: List[Dict] = []
|
||||
seen: set = set()
|
||||
try:
|
||||
with open(ckpt_csv, "r", encoding="utf-8", newline="") as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
rows.append(row)
|
||||
seen.add(str(row.get("match_id") or ""))
|
||||
except Exception as e:
|
||||
print(f" checkpoint read failed ({e}); starting fresh")
|
||||
return [], set()
|
||||
return rows, seen
|
||||
|
||||
|
||||
def _flush_checkpoint(args, rows: List[Dict]) -> None:
|
||||
"""Atomic-ish overwrite of the partial CSV. Cheap enough at every 100 rows."""
|
||||
if not rows:
|
||||
return
|
||||
ckpt_csv, _ = _checkpoint_paths(args)
|
||||
import csv
|
||||
tmp = ckpt_csv + ".tmp"
|
||||
fields = list(rows[0].keys())
|
||||
with open(tmp, "w", encoding="utf-8", newline="") as f:
|
||||
w = csv.DictWriter(f, fieldnames=fields)
|
||||
w.writeheader()
|
||||
for r in rows:
|
||||
w.writerow(r)
|
||||
os.replace(tmp, ckpt_csv)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--days", type=int, default=14,
|
||||
@@ -588,6 +634,10 @@ def main():
|
||||
parser.add_argument("--start", help="Start date YYYY-MM-DD (overrides --days)")
|
||||
parser.add_argument("--end", help="End date YYYY-MM-DD")
|
||||
parser.add_argument("--progress-interval", type=int, default=50)
|
||||
parser.add_argument("--checkpoint-every", type=int, default=100,
|
||||
help="Flush partial CSV every N matches (default 100)")
|
||||
parser.add_argument("--no-resume", action="store_true",
|
||||
help="Ignore any prior checkpoint and start fresh")
|
||||
args = parser.parse_args()
|
||||
|
||||
print("=" * 70)
|
||||
@@ -614,12 +664,20 @@ def main():
|
||||
print("No matches to process. Exiting.")
|
||||
return
|
||||
|
||||
# ── Resume from prior checkpoint if available ──
|
||||
rows: List[Dict[str, Any]] = []
|
||||
seen_ids: set = set()
|
||||
if not args.no_resume:
|
||||
rows, seen_ids = _load_checkpoint(args)
|
||||
if rows:
|
||||
print(f" Resuming from checkpoint: {len(rows)} matches already done")
|
||||
errors: List[Tuple[str, str]] = []
|
||||
t0 = time.time()
|
||||
|
||||
for i, m in enumerate(matches, start=1):
|
||||
mid = str(m["match_id"])
|
||||
if mid in seen_ids:
|
||||
continue
|
||||
try:
|
||||
pkg = orch.analyze_match(mid)
|
||||
if pkg is None:
|
||||
@@ -627,20 +685,26 @@ def main():
|
||||
row = capture_bet_row(m, pkg)
|
||||
rows.append(row)
|
||||
except KeyboardInterrupt:
|
||||
print("\nInterrupted, writing partial results...")
|
||||
print("\nInterrupted, flushing checkpoint...")
|
||||
_flush_checkpoint(args, rows)
|
||||
break
|
||||
except Exception as e:
|
||||
errors.append((mid, str(e)))
|
||||
if len(errors) <= 5:
|
||||
traceback.print_exc()
|
||||
|
||||
# ── Periodic checkpoint flush so a crash doesn't lose everything ──
|
||||
if i % args.checkpoint_every == 0:
|
||||
_flush_checkpoint(args, rows)
|
||||
|
||||
if i % args.progress_interval == 0:
|
||||
elapsed = time.time() - t0
|
||||
rate = i / elapsed
|
||||
eta = (n - i) / rate if rate else 0
|
||||
playable_so_far = sum(1 for r in rows if r["playable"])
|
||||
print(f" [{i}/{n}] rate={rate:.1f}/s eta={eta/60:.1f}min "
|
||||
f"playable={playable_so_far} errors={len(errors)}")
|
||||
f"playable={playable_so_far} errors={len(errors)} "
|
||||
f"(checkpoint at every {args.checkpoint_every})")
|
||||
|
||||
print(f"\nProcessed {len(rows)} rows in {(time.time()-t0):.1f}s "
|
||||
f"({len(errors)} errors)")
|
||||
|
||||
Reference in New Issue
Block a user