fix: watchdog force-kill with SIGKILL fallback when process.exit is blocked

This commit is contained in:
2026-04-26 02:27:51 +03:00
parent 4c7930e9d2
commit 1623432039
+20 -4
View File
@@ -12,7 +12,7 @@ import { FeederService } from "../modules/feeder/feeder.service";
import { Logger } from "@nestjs/common"; import { Logger } from "@nestjs/common";
const WATCHDOG_INTERVAL_MS = 60_000; // Check every 1 minute const WATCHDOG_INTERVAL_MS = 60_000; // Check every 1 minute
const WATCHDOG_TIMEOUT_MS = 5 * 60_000; // Kill if no activity for 5 minutes const WATCHDOG_TIMEOUT_MS = 3 * 60_000; // Kill if no activity for 3 minutes
async function bootstrap() { async function bootstrap() {
process.env.FEEDER_MODE = "historical"; process.env.FEEDER_MODE = "historical";
@@ -31,15 +31,31 @@ async function bootstrap() {
const feederService = app.get(FeederService); const feederService = app.get(FeederService);
// ── Watchdog Timer ────────────────────────────────────────── // ── Watchdog Timer ──────────────────────────────────────────
// If the feeder hangs on an API call for 5+ minutes, force-exit // If the feeder hangs on an API call for 3+ minutes, force-kill
// so PM2 can restart and resume from where it left off in DB. // so PM2 can restart and resume from where it left off in DB.
// NOTE: process.exit(1) alone can be blocked by open handles
// (DB connections, HTTP sockets). We use process.kill(SIGKILL)
// as an unconditional fallback.
const watchdog = setInterval(() => { const watchdog = setInterval(() => {
const idleMs = Date.now() - feederService.lastActivityAt; const idleMs = Date.now() - feederService.lastActivityAt;
if (idleMs > WATCHDOG_TIMEOUT_MS) { if (idleMs > WATCHDOG_TIMEOUT_MS) {
logger.error( logger.error(
`🐕 WATCHDOG: No activity for ${Math.round(idleMs / 1000)}s. Force-exiting for PM2 restart...`, `🐕 WATCHDOG: No activity for ${Math.round(idleMs / 1000)}s. Force-killing for PM2 restart...`,
); );
process.exit(1);
// Try graceful exit first
try {
process.exit(1);
} catch {
// Ignored fallback below
}
// If process.exit didn't work (blocked by open handles),
// schedule an unconditional SIGKILL after 2 seconds
setTimeout(() => {
logger.error("🐕 WATCHDOG: process.exit blocked. Sending SIGKILL...");
process.kill(process.pid, "SIGKILL");
}, 2_000).unref();
} }
}, WATCHDOG_INTERVAL_MS); }, WATCHDOG_INTERVAL_MS);