fix(predictions): circuit breaker resilience + graceful degradation
Deploy Iddaai Backend / build-and-deploy (push) Successful in 27s
Deploy Iddaai Backend / build-and-deploy (push) Successful in 27s
- Reset consecutiveFailures on cooldown expiry (half-open state) so a single retry failure doesn't immediately re-open the circuit - Exclude AI Engine app-level 500s from circuit breaker count (only network/infra errors: timeout, 502, 503, 504, 429) - Return null gracefully instead of throwing 503 when no cache exists - Add DB fallback for non-cooldown AI Engine failures - Remove blocking wait-and-retry that held requests for up to 20s
This commit is contained in:
@@ -183,8 +183,11 @@ export class AiEngineClient {
|
||||
}
|
||||
|
||||
this.logger.warn(
|
||||
`[${this.serviceName}] AI circuit breaker cooldown elapsed, allowing a recovery attempt`,
|
||||
`[${this.serviceName}] AI circuit breaker cooldown elapsed, allowing a recovery attempt (resetting failures from ${this.consecutiveFailures})`,
|
||||
);
|
||||
// Half-open state: reset failures so a single retry failure doesn't
|
||||
// immediately re-open the circuit at threshold+1
|
||||
this.consecutiveFailures = 0;
|
||||
this.circuitOpenedAt = null;
|
||||
}
|
||||
|
||||
@@ -233,8 +236,18 @@ export class AiEngineClient {
|
||||
if (!error.response) {
|
||||
return true; // Network error, timeout, etc.
|
||||
}
|
||||
// Only count infrastructure-level errors toward circuit breaker:
|
||||
// - No response (network failure) → already handled above
|
||||
// - Timeout (ECONNABORTED) → infrastructure
|
||||
// - 429 (rate limit) → infrastructure
|
||||
// - 502/503/504 (proxy/gateway errors) → infrastructure
|
||||
// Do NOT count 500 (app-level crash in AI Engine) — it may be
|
||||
// match-specific and shouldn't block all other matches.
|
||||
if (error.code === 'ECONNABORTED') {
|
||||
return true;
|
||||
}
|
||||
const status = error.response.status;
|
||||
return status >= 500 || status === 429;
|
||||
return status === 429 || status === 502 || status === 503 || status === 504;
|
||||
}
|
||||
|
||||
private toRequestError(error: unknown): AiEngineRequestError {
|
||||
|
||||
Reference in New Issue
Block a user