diff --git a/src/common/utils/ai-engine-client.ts b/src/common/utils/ai-engine-client.ts
index cd90765..9ab26bf 100644
--- a/src/common/utils/ai-engine-client.ts
+++ b/src/common/utils/ai-engine-client.ts
@@ -69,8 +69,8 @@ export class AiEngineClient {
     this.defaultTimeoutMs = options.timeoutMs ?? 30000;
     this.maxRetries = options.maxRetries ?? 2;
     this.retryDelayMs = options.retryDelayMs ?? 750;
-    this.circuitBreakerThreshold = options.circuitBreakerThreshold ?? 3;
-    this.circuitBreakerCooldownMs = options.circuitBreakerCooldownMs ?? 30000;
+    this.circuitBreakerThreshold = options.circuitBreakerThreshold ?? 5;
+    this.circuitBreakerCooldownMs = options.circuitBreakerCooldownMs ?? 15000;
 
     this.axiosClient = axios.create({
       baseURL: options.baseUrl,
diff --git a/src/modules/predictions/predictions.service.ts b/src/modules/predictions/predictions.service.ts
index c0d611c..9d8a0bf 100755
--- a/src/modules/predictions/predictions.service.ts
+++ b/src/modules/predictions/predictions.service.ts
@@ -246,10 +246,21 @@ export class PredictionsService implements OnModuleInit, OnModuleDestroy {
       const status = requestError.status;
       const detail = requestError.detail || requestError.message;
 
+      // ── Cooldown fallback cascade: memCache → DB stored → DB cached → wait & retry ──
       if (
         status === HttpStatus.SERVICE_UNAVAILABLE &&
         this.hasCooldown(detail)
       ) {
+        // 1) In-memory cache (10min TTL)
+        const memCached = this.predictionMemCache.get(matchId);
+        if (memCached && Date.now() - memCached.timestamp < 10 * 60 * 1000) {
+          this.logger.warn(
+            `AI Engine cooldown for ${matchId}; returning mem-cached prediction`,
+          );
+          return memCached.payload;
+        }
+
+        // 2) DB stored prediction (no TTL filter)
         const storedPrediction = await this.getStoredPrediction(matchId);
         if (storedPrediction) {
           this.logger.warn(
@@ -257,6 +268,43 @@ export class PredictionsService implements OnModuleInit, OnModuleDestroy {
           );
           return this.enrichPredictionResponse(storedPrediction, matchContext);
         }
+
+        // 3) DB cached prediction (with model version check)
+        const cachedPrediction = await this.getCachedPrediction(matchId);
+        if (cachedPrediction) {
+          this.logger.warn(
+            `AI Engine cooldown for ${matchId}; returning cached prediction`,
+          );
+          return this.enrichPredictionResponse(cachedPrediction, matchContext);
+        }
+
+        // 4) No cached data at all — wait out cooldown and retry once
+        const cooldownMs = this.extractCooldownMs(detail);
+        if (cooldownMs > 0 && cooldownMs <= 20000) {
+          this.logger.warn(
+            `AI Engine cooldown for ${matchId}; no cached data — waiting ${cooldownMs}ms and retrying...`,
+          );
+          await new Promise((resolve) => setTimeout(resolve, cooldownMs + 500));
+          try {
+            const retryResponse =
+              await this.aiEngineClient.post<MatchPredictionDto>(
+                `/v20plus/analyze/${matchId}`,
+                { simulate: true, is_simulation: true, pre_match_only: true },
+              );
+            const retryPrediction = this.enrichPredictionResponse(
+              retryResponse.data,
+              matchContext,
+            );
+            await this.recordPredictionRun(matchId, retryResponse.data);
+            await this.cachePrediction(matchId, retryPrediction);
+            return retryPrediction;
+          } catch (retryErr: unknown) {
+            this.logger.error(
+              `AI Engine retry after cooldown also failed for ${matchId}`,
+            );
+            // Fall through to error handling below
+          }
+        }
       }
 
       this.logger.error(
@@ -1243,6 +1291,19 @@ export class PredictionsService implements OnModuleInit, OnModuleDestroy {
     return false;
   }
 
+  private extractCooldownMs(detail: unknown): number {
+    if (detail && typeof detail === "object" && "cooldownRemainingMs" in detail) {
+      return Number((detail as Record<string, unknown>).cooldownRemainingMs) || 0;
+    }
+
+    if (typeof detail === "string") {
+      const match = detail.match(/cooldownRemainingMs[":\s]+(\d+)/);
+      return match ? Number(match[1]) : 0;
+    }
+
+    return 0;
+  }
+
   private async ensureSmartCouponDataReady(matchIds: string[]): Promise<void> {
     const uniqueMatchIds = [...new Set(matchIds.filter((id) => !!id))];
     if (uniqueMatchIds.length === 0) {