Spaces:

galbendavids
/

CarsRUS

Sleeping

App Files Files Community

galbendavids commited on Jan 28

Commit

fe3cfdf

1 Parent(s): 111c7c5

✅ Fix: Add rate limiting with exponential backoff + response caching to prevent API quota errors

Browse files

Files changed (1) hide show

rag_engine.py +60 -31

rag_engine.py CHANGED Viewed

@@ -6,6 +6,8 @@ import os
 import re
 from collections import defaultdict
 from typing import List, Dict, Tuple
 class RAGEngine:
     def __init__(self, data_path=None):
@@ -27,6 +29,11 @@ class RAGEngine:
         self.car_normalization = self._build_car_normalization()  # עצה 4: נרמול שמות
         self.conversation_history = []  # עצה 10: היסטוריית שיחה
         self._load_and_process_data()
         print("RAG Engine Initialized with all 10 optimizations.")
@@ -324,11 +331,58 @@ class RAGEngine:
         return "\n".join(context_lines)
     def generate_response(self, query: str, history, api_key: str):
         """יצירת תשובה חכמה עם כל 10 העצות"""
         if not api_key:
             return "Error: Gemini API Key is missing."
         genai.configure(api_key=api_key)
         # עצה 7: זיהוי שאלות השוואתיות
@@ -402,37 +456,12 @@ User Question: {query}
 Answer:"""
-        try:
-            model = genai.GenerativeModel('gemini-2.0-flash')
-            response = model.generate_content(system_prompt + "\n\n" + prompt)
-            response_text = response.text
-        except Exception as e:
-            error_msg = str(e).lower()
-            if "api_key" in error_msg or "401" in error_msg or "authentication" in error_msg:
-                response_text = "❌ API Authentication Failed: Check your Gemini API key in HF Spaces settings"
-            elif "quota" in error_msg or "429" in error_msg:
-                response_text = "⚠️ API Rate Limit: Too many requests. Please wait a moment and try again."
-            elif "permission" in error_msg or "403" in error_msg:
-                response_text = "❌ API Permission Error: Check your API key has proper permissions"
-            elif "404" in error_msg or "not found" in error_msg or "not supported" in error_msg:
-                try:
-                    model = genai.GenerativeModel('gemini-1.5-flash')
-                    response = model.generate_content(system_prompt + "\n\n" + prompt)
-                    response_text = response.text
-                except Exception as e2:
-                    try:
-                        model = genai.GenerativeModel('gemini-1.5-pro')
-                        response = model.generate_content(system_prompt + "\n\n" + prompt)
-                        response_text = response.text
-                    except Exception as e3:
-                        response_text = f"❌ Model Error: No available models. {str(e3)[:60]}"
-            else:
-                try:
-                    model = genai.GenerativeModel('gemini-1.5-flash')
-                    response = model.generate_content(system_prompt + "\n\n" + prompt)
-                    response_text = response.text
-                except Exception as e2:
-                    response_text = f"❌ Error: {str(e2)[:100]}"
         # עצה 10: שמירת התשובה בהיסטוריה
         self._maintain_conversation_history(query, response_text)

 import re
 from collections import defaultdict
 from typing import List, Dict, Tuple
+import time
+import hashlib
 class RAGEngine:
     def __init__(self, data_path=None):
         self.car_normalization = self._build_car_normalization()  # עצה 4: נרמול שמות
         self.conversation_history = []  # עצה 10: היסטוריית שיחה
+        # Rate limiting and caching
+        self.response_cache = {}  # Cache for identical queries
+        self.last_request_time = 0  # Track last API request time
+        self.request_delay = 0.5  # Minimum delay between requests (seconds)
         self._load_and_process_data()
         print("RAG Engine Initialized with all 10 optimizations.")
         return "\n".join(context_lines)
+    def _get_cache_key(self, query: str) -> str:
+        """Generate cache key for query"""
+        return hashlib.md5(query.lower().encode()).hexdigest()
+    def _wait_for_rate_limit(self):
+        """Enforce minimum delay between API requests to avoid rate limiting"""
+        elapsed = time.time() - self.last_request_time
+        if elapsed < self.request_delay:
+            time.sleep(self.request_delay - elapsed)
+        self.last_request_time = time.time()
+    def _call_api_with_backoff(self, system_prompt: str, prompt: str, models: List[str]):
+        """Call Gemini API with exponential backoff and retry logic"""
+        for attempt, model in enumerate(models):
+            try:
+                # Wait before API call to respect rate limits
+                self._wait_for_rate_limit()
+                model_obj = genai.GenerativeModel(model)
+                response = model_obj.generate_content(system_prompt + "\n\n" + prompt)
+                return response.text
+            except Exception as e:
+                error_msg = str(e).lower()
+                # Handle rate limit errors with exponential backoff
+                if "429" in error_msg or "rate" in error_msg or "quota" in error_msg:
+                    wait_time = min(60, 2 ** attempt)  # 1, 2, 4, 8, 16, 32, 60 seconds
+                    if attempt < len(models) - 1:
+                        # Sleep longer before retry
+                        time.sleep(wait_time)
+                        continue
+                    else:
+                        return "⚠️ API Rate Limit: Too many requests. Please wait a moment and try again."
+                # Try next model for other errors
+                if attempt < len(models) - 1:
+                    continue
+                else:
+                    return f"❌ Model Error: {error_msg[:100]}"
+        return "❌ Failed to get response from API"
     def generate_response(self, query: str, history, api_key: str):
         """יצירת תשובה חכמה עם כל 10 העצות"""
         if not api_key:
             return "Error: Gemini API Key is missing."
+        # Check cache for identical queries
+        cache_key = self._get_cache_key(query)
+        if cache_key in self.response_cache:
+            return self.response_cache[cache_key]
         genai.configure(api_key=api_key)
         # עצה 7: זיהוי שאלות השוואתיות
 Answer:"""
+        # Use new rate-limited API call with backoff and caching
+        models_to_try = ['gemini-2.0-flash', 'gemini-1.5-flash', 'gemini-1.5-pro']
+        response_text = self._call_api_with_backoff(system_prompt, prompt, models_to_try)
+        # Cache the response for identical future queries
+        self.response_cache[cache_key] = response_text
         # עצה 10: שמירת התשובה בהיסטוריה
         self._maintain_conversation_history(query, response_text)