From 162343203916169b35fd5480a0800954e52cd138 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fahri=20Can=20Se=C3=A7er?= Date: Sun, 26 Apr 2026 02:27:51 +0300 Subject: [PATCH] fix: watchdog force-kill with SIGKILL fallback when process.exit is blocked --- src/scripts/run-feeder.ts | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/src/scripts/run-feeder.ts b/src/scripts/run-feeder.ts index fd5e6ed..9fbcf00 100755 --- a/src/scripts/run-feeder.ts +++ b/src/scripts/run-feeder.ts @@ -12,7 +12,7 @@ import { FeederService } from "../modules/feeder/feeder.service"; import { Logger } from "@nestjs/common"; const WATCHDOG_INTERVAL_MS = 60_000; // Check every 1 minute -const WATCHDOG_TIMEOUT_MS = 5 * 60_000; // Kill if no activity for 5 minutes +const WATCHDOG_TIMEOUT_MS = 3 * 60_000; // Kill if no activity for 3 minutes async function bootstrap() { process.env.FEEDER_MODE = "historical"; @@ -31,15 +31,31 @@ async function bootstrap() { const feederService = app.get(FeederService); // ── Watchdog Timer ────────────────────────────────────────── - // If the feeder hangs on an API call for 5+ minutes, force-exit + // If the feeder hangs on an API call for 3+ minutes, force-kill // so PM2 can restart and resume from where it left off in DB. + // NOTE: process.exit(1) alone can be blocked by open handles + // (DB connections, HTTP sockets). We use process.kill(SIGKILL) + // as an unconditional fallback. const watchdog = setInterval(() => { const idleMs = Date.now() - feederService.lastActivityAt; if (idleMs > WATCHDOG_TIMEOUT_MS) { logger.error( - `🐕 WATCHDOG: No activity for ${Math.round(idleMs / 1000)}s. Force-exiting for PM2 restart...`, + `🐕 WATCHDOG: No activity for ${Math.round(idleMs / 1000)}s. Force-killing for PM2 restart...`, ); - process.exit(1); + + // Try graceful exit first + try { + process.exit(1); + } catch { + // Ignored – fallback below + } + + // If process.exit didn't work (blocked by open handles), + // schedule an unconditional SIGKILL after 2 seconds + setTimeout(() => { + logger.error("🐕 WATCHDOG: process.exit blocked. Sending SIGKILL..."); + process.kill(process.pid, "SIGKILL"); + }, 2_000).unref(); } }, WATCHDOG_INTERVAL_MS);