fix: watchdog force-kill with SIGKILL fallback when process.exit is blocked
This commit is contained in:
@@ -12,7 +12,7 @@ import { FeederService } from "../modules/feeder/feeder.service";
|
||||
import { Logger } from "@nestjs/common";
|
||||
|
||||
const WATCHDOG_INTERVAL_MS = 60_000; // Check every 1 minute
|
||||
const WATCHDOG_TIMEOUT_MS = 5 * 60_000; // Kill if no activity for 5 minutes
|
||||
const WATCHDOG_TIMEOUT_MS = 3 * 60_000; // Kill if no activity for 3 minutes
|
||||
|
||||
async function bootstrap() {
|
||||
process.env.FEEDER_MODE = "historical";
|
||||
@@ -31,15 +31,31 @@ async function bootstrap() {
|
||||
const feederService = app.get(FeederService);
|
||||
|
||||
// ── Watchdog Timer ──────────────────────────────────────────
|
||||
// If the feeder hangs on an API call for 5+ minutes, force-exit
|
||||
// If the feeder hangs on an API call for 3+ minutes, force-kill
|
||||
// so PM2 can restart and resume from where it left off in DB.
|
||||
// NOTE: process.exit(1) alone can be blocked by open handles
|
||||
// (DB connections, HTTP sockets). We use process.kill(SIGKILL)
|
||||
// as an unconditional fallback.
|
||||
const watchdog = setInterval(() => {
|
||||
const idleMs = Date.now() - feederService.lastActivityAt;
|
||||
if (idleMs > WATCHDOG_TIMEOUT_MS) {
|
||||
logger.error(
|
||||
`🐕 WATCHDOG: No activity for ${Math.round(idleMs / 1000)}s. Force-exiting for PM2 restart...`,
|
||||
`🐕 WATCHDOG: No activity for ${Math.round(idleMs / 1000)}s. Force-killing for PM2 restart...`,
|
||||
);
|
||||
|
||||
// Try graceful exit first
|
||||
try {
|
||||
process.exit(1);
|
||||
} catch {
|
||||
// Ignored – fallback below
|
||||
}
|
||||
|
||||
// If process.exit didn't work (blocked by open handles),
|
||||
// schedule an unconditional SIGKILL after 2 seconds
|
||||
setTimeout(() => {
|
||||
logger.error("🐕 WATCHDOG: process.exit blocked. Sending SIGKILL...");
|
||||
process.kill(process.pid, "SIGKILL");
|
||||
}, 2_000).unref();
|
||||
}
|
||||
}, WATCHDOG_INTERVAL_MS);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user