galbendavids commited on
Commit
fe3cfdf
·
1 Parent(s): 111c7c5

✅ Fix: Add rate limiting with exponential backoff + response caching to prevent API quota errors

Browse files
Files changed (1) hide show
  1. rag_engine.py +60 -31
rag_engine.py CHANGED
@@ -6,6 +6,8 @@ import os
6
  import re
7
  from collections import defaultdict
8
  from typing import List, Dict, Tuple
 
 
9
 
10
  class RAGEngine:
11
  def __init__(self, data_path=None):
@@ -27,6 +29,11 @@ class RAGEngine:
27
  self.car_normalization = self._build_car_normalization() # עצה 4: נרמול שמות
28
  self.conversation_history = [] # עצה 10: היסטוריית שיחה
29
 
 
 
 
 
 
30
  self._load_and_process_data()
31
  print("RAG Engine Initialized with all 10 optimizations.")
32
 
@@ -324,11 +331,58 @@ class RAGEngine:
324
 
325
  return "\n".join(context_lines)
326
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
  def generate_response(self, query: str, history, api_key: str):
328
  """יצירת תשובה חכמה עם כל 10 העצות"""
329
  if not api_key:
330
  return "Error: Gemini API Key is missing."
331
 
 
 
 
 
 
332
  genai.configure(api_key=api_key)
333
 
334
  # עצה 7: זיהוי שאלות השוואתיות
@@ -402,37 +456,12 @@ User Question: {query}
402
 
403
  Answer:"""
404
 
405
- try:
406
- model = genai.GenerativeModel('gemini-2.0-flash')
407
- response = model.generate_content(system_prompt + "\n\n" + prompt)
408
- response_text = response.text
409
- except Exception as e:
410
- error_msg = str(e).lower()
411
- if "api_key" in error_msg or "401" in error_msg or "authentication" in error_msg:
412
- response_text = "❌ API Authentication Failed: Check your Gemini API key in HF Spaces settings"
413
- elif "quota" in error_msg or "429" in error_msg:
414
- response_text = "⚠️ API Rate Limit: Too many requests. Please wait a moment and try again."
415
- elif "permission" in error_msg or "403" in error_msg:
416
- response_text = "❌ API Permission Error: Check your API key has proper permissions"
417
- elif "404" in error_msg or "not found" in error_msg or "not supported" in error_msg:
418
- try:
419
- model = genai.GenerativeModel('gemini-1.5-flash')
420
- response = model.generate_content(system_prompt + "\n\n" + prompt)
421
- response_text = response.text
422
- except Exception as e2:
423
- try:
424
- model = genai.GenerativeModel('gemini-1.5-pro')
425
- response = model.generate_content(system_prompt + "\n\n" + prompt)
426
- response_text = response.text
427
- except Exception as e3:
428
- response_text = f"❌ Model Error: No available models. {str(e3)[:60]}"
429
- else:
430
- try:
431
- model = genai.GenerativeModel('gemini-1.5-flash')
432
- response = model.generate_content(system_prompt + "\n\n" + prompt)
433
- response_text = response.text
434
- except Exception as e2:
435
- response_text = f"❌ Error: {str(e2)[:100]}"
436
 
437
  # עצה 10: שמירת התשובה בהיסטוריה
438
  self._maintain_conversation_history(query, response_text)
 
6
  import re
7
  from collections import defaultdict
8
  from typing import List, Dict, Tuple
9
+ import time
10
+ import hashlib
11
 
12
  class RAGEngine:
13
  def __init__(self, data_path=None):
 
29
  self.car_normalization = self._build_car_normalization() # עצה 4: נרמול שמות
30
  self.conversation_history = [] # עצה 10: היסטוריית שיחה
31
 
32
+ # Rate limiting and caching
33
+ self.response_cache = {} # Cache for identical queries
34
+ self.last_request_time = 0 # Track last API request time
35
+ self.request_delay = 0.5 # Minimum delay between requests (seconds)
36
+
37
  self._load_and_process_data()
38
  print("RAG Engine Initialized with all 10 optimizations.")
39
 
 
331
 
332
  return "\n".join(context_lines)
333
 
334
+ def _get_cache_key(self, query: str) -> str:
335
+ """Generate cache key for query"""
336
+ return hashlib.md5(query.lower().encode()).hexdigest()
337
+
338
+ def _wait_for_rate_limit(self):
339
+ """Enforce minimum delay between API requests to avoid rate limiting"""
340
+ elapsed = time.time() - self.last_request_time
341
+ if elapsed < self.request_delay:
342
+ time.sleep(self.request_delay - elapsed)
343
+ self.last_request_time = time.time()
344
+
345
+ def _call_api_with_backoff(self, system_prompt: str, prompt: str, models: List[str]):
346
+ """Call Gemini API with exponential backoff and retry logic"""
347
+ for attempt, model in enumerate(models):
348
+ try:
349
+ # Wait before API call to respect rate limits
350
+ self._wait_for_rate_limit()
351
+
352
+ model_obj = genai.GenerativeModel(model)
353
+ response = model_obj.generate_content(system_prompt + "\n\n" + prompt)
354
+ return response.text
355
+ except Exception as e:
356
+ error_msg = str(e).lower()
357
+
358
+ # Handle rate limit errors with exponential backoff
359
+ if "429" in error_msg or "rate" in error_msg or "quota" in error_msg:
360
+ wait_time = min(60, 2 ** attempt) # 1, 2, 4, 8, 16, 32, 60 seconds
361
+ if attempt < len(models) - 1:
362
+ # Sleep longer before retry
363
+ time.sleep(wait_time)
364
+ continue
365
+ else:
366
+ return "⚠️ API Rate Limit: Too many requests. Please wait a moment and try again."
367
+
368
+ # Try next model for other errors
369
+ if attempt < len(models) - 1:
370
+ continue
371
+ else:
372
+ return f"❌ Model Error: {error_msg[:100]}"
373
+
374
+ return "❌ Failed to get response from API"
375
+
376
  def generate_response(self, query: str, history, api_key: str):
377
  """יצירת תשובה חכמה עם כל 10 העצות"""
378
  if not api_key:
379
  return "Error: Gemini API Key is missing."
380
 
381
+ # Check cache for identical queries
382
+ cache_key = self._get_cache_key(query)
383
+ if cache_key in self.response_cache:
384
+ return self.response_cache[cache_key]
385
+
386
  genai.configure(api_key=api_key)
387
 
388
  # עצה 7: זיהוי שאלות השוואתיות
 
456
 
457
  Answer:"""
458
 
459
+ # Use new rate-limited API call with backoff and caching
460
+ models_to_try = ['gemini-2.0-flash', 'gemini-1.5-flash', 'gemini-1.5-pro']
461
+ response_text = self._call_api_with_backoff(system_prompt, prompt, models_to_try)
462
+
463
+ # Cache the response for identical future queries
464
+ self.response_cache[cache_key] = response_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
465
 
466
  # עצה 10: שמירת התשובה בהיסטוריה
467
  self._maintain_conversation_history(query, response_text)