fix: watchdog force-kill with SIGKILL fallback when process.exit is blocked

This commit is contained in:
2026-04-26 02:27:51 +03:00
parent 4c7930e9d2
commit 1623432039
+19 -3
View File
@@ -12,7 +12,7 @@ import { FeederService } from "../modules/feeder/feeder.service";
import { Logger } from "@nestjs/common";
const WATCHDOG_INTERVAL_MS = 60_000; // Check every 1 minute
const WATCHDOG_TIMEOUT_MS = 5 * 60_000; // Kill if no activity for 5 minutes
const WATCHDOG_TIMEOUT_MS = 3 * 60_000; // Kill if no activity for 3 minutes
async function bootstrap() {
process.env.FEEDER_MODE = "historical";
@@ -31,15 +31,31 @@ async function bootstrap() {
const feederService = app.get(FeederService);
// ── Watchdog Timer ──────────────────────────────────────────
// If the feeder hangs on an API call for 5+ minutes, force-exit
// If the feeder hangs on an API call for 3+ minutes, force-kill
// so PM2 can restart and resume from where it left off in DB.
// NOTE: process.exit(1) alone can be blocked by open handles
// (DB connections, HTTP sockets). We use process.kill(SIGKILL)
// as an unconditional fallback.
const watchdog = setInterval(() => {
const idleMs = Date.now() - feederService.lastActivityAt;
if (idleMs > WATCHDOG_TIMEOUT_MS) {
logger.error(
`🐕 WATCHDOG: No activity for ${Math.round(idleMs / 1000)}s. Force-exiting for PM2 restart...`,
`🐕 WATCHDOG: No activity for ${Math.round(idleMs / 1000)}s. Force-killing for PM2 restart...`,
);
// Try graceful exit first
try {
process.exit(1);
} catch {
// Ignored fallback below
}
// If process.exit didn't work (blocked by open handles),
// schedule an unconditional SIGKILL after 2 seconds
setTimeout(() => {
logger.error("🐕 WATCHDOG: process.exit blocked. Sending SIGKILL...");
process.kill(process.pid, "SIGKILL");
}, 2_000).unref();
}
}, WATCHDOG_INTERVAL_MS);