VietCat commited on
Commit
eab288c
·
1 Parent(s): f6c9376

add quota manager

Browse files
app/config.py CHANGED
@@ -31,9 +31,11 @@ class Settings(BaseSettings):
31
  log_level: str = os.getenv("LOG_LEVEL", "INFO") or "INFO"
32
 
33
  # Gemini Configuration
34
- gemini_api_key: str = os.getenv("GEMINI_API_KEY") or ""
 
 
 
35
  gemini_base_url: str = os.getenv("GEMINI_BASE_URL", "https://generativelanguage.googleapis.com/v1/models/gemini-2.5-flash:generateContent") or ""
36
- gemini_model: str = os.getenv("GEMINI_MODEL", "gemini-2.5-flash") or ""
37
 
38
  # LLM (chat/completion) provider/model
39
  llm_provider: str = os.getenv("LLM_PROVIDER", "gemini") or ""
 
31
  log_level: str = os.getenv("LOG_LEVEL", "INFO") or "INFO"
32
 
33
  # Gemini Configuration
34
+ # Hỗ trợ nhiều API key và model cho Gemini
35
+ # Định nghĩa biến môi trường: GEMINI_API_KEYS="key1,key2,..."; GEMINI_MODELS="model1,model2,..."
36
+ gemini_api_keys: list[str] = os.getenv("GEMINI_API_KEYS", "").split(",") if os.getenv("GEMINI_API_KEYS") else []
37
+ gemini_models: list[str] = os.getenv("GEMINI_MODELS", "").split(",") if os.getenv("GEMINI_MODELS") else []
38
  gemini_base_url: str = os.getenv("GEMINI_BASE_URL", "https://generativelanguage.googleapis.com/v1/models/gemini-2.5-flash:generateContent") or ""
 
39
 
40
  # LLM (chat/completion) provider/model
41
  llm_provider: str = os.getenv("LLM_PROVIDER", "gemini") or ""
app/gemini_client.py CHANGED
@@ -2,48 +2,75 @@ from google.generativeai.embedding import embed_content
2
  from google.generativeai.client import configure
3
  from google.generativeai.generative_models import GenerativeModel
4
  from loguru import logger
 
5
 
6
  class GeminiClient:
7
- def __init__(self, api_key: str, model: str = "gemini-1.5-flash-latest"):
8
- self.api_key = api_key
9
- self.model = model
10
- configure(api_key=api_key)
11
- self._model = GenerativeModel(model)
12
 
13
  def generate_text(self, prompt: str, **kwargs) -> str:
14
- try:
15
- response = self._model.generate_content(prompt, **kwargs)
16
- if hasattr(response, 'usage_metadata'):
17
- logger.info(f"[GEMINI][USAGE] Prompt Token Count: {response.usage_metadata.prompt_token_count} - Candidate Token Count: {response.usage_metadata.candidates_token_count} - Total Token Count: {response.usage_metadata.total_token_count}")
18
- # response có thể là GenerativeModelResponse, lấy text hoặc trả về str
19
- if hasattr(response, 'text'):
20
- logger.info(f"[GEMINI][TEXT_RESPONSE] {response.text}")
21
- return response.text
22
- elif hasattr(response, 'candidates') and response.candidates:
23
- logger.info(f"[GEMINI][CANDIDATES_RESPONSE] {response.candidates[0].content.parts[0].text}")
24
- return response.candidates[0].content.parts[0].text
25
- logger.info(f"[GEMINI][RAW_RESPONSE] {response}")
26
- return str(response)
27
- except Exception as e:
28
- logger.error(f"[GEMINI] Error: {e}")
29
- raise
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  def count_tokens(self, prompt: str) -> int:
32
- try:
33
- return self._model.count_tokens(prompt).total_tokens
34
- except Exception as e:
35
- logger.error(f"[GEMINI] Token count error: {e}")
36
- return 0
 
 
 
37
 
38
  def create_embedding(self, text: str) -> list:
39
- try:
40
- response = embed_content(
41
- model=self.model,
42
- content=text,
43
- task_type="retrieval_query"
44
- )
45
- logger.info(f"[GEMINI][EMBEDDING][RAW_RESPONSE] {response['embedding'][:10]} ..... {response['embedding'][-10:]}")
46
- return response['embedding']
47
- except Exception as e:
48
- logger.error(f"[GEMINI][EMBEDDING] Error: {e}")
49
- raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from google.generativeai.client import configure
3
  from google.generativeai.generative_models import GenerativeModel
4
  from loguru import logger
5
+ from .request_limit_manager import RequestLimitManager
6
 
7
  class GeminiClient:
8
+ def __init__(self):
9
+ self.limit_manager = RequestLimitManager("gemini")
 
 
 
10
 
11
  def generate_text(self, prompt: str, **kwargs) -> str:
12
+ last_error = None
13
+ for key, model in self.limit_manager.iterate_key_model():
14
+ try:
15
+ configure(api_key=key)
16
+ _model = GenerativeModel(model)
17
+ response = _model.generate_content(prompt, **kwargs)
18
+ self.limit_manager.log_request(key, model, success=True)
19
+ if hasattr(response, 'usage_metadata'):
20
+ logger.info(f"[GEMINI][USAGE] Prompt Token Count: {response.usage_metadata.prompt_token_count} - Candidate Token Count: {response.usage_metadata.candidates_token_count} - Total Token Count: {response.usage_metadata.total_token_count}")
21
+ if hasattr(response, 'text'):
22
+ logger.info(f"[GEMINI][TEXT_RESPONSE] {response.text}")
23
+ return response.text
24
+ elif hasattr(response, 'candidates') and response.candidates:
25
+ logger.info(f"[GEMINI][CANDIDATES_RESPONSE] {response.candidates[0].content.parts[0].text}")
26
+ return response.candidates[0].content.parts[0].text
27
+ logger.info(f"[GEMINI][RAW_RESPONSE] {response}")
28
+ return str(response)
29
+ except Exception as e:
30
+ import re
31
+ msg = str(e)
32
+ if "429" in msg or "rate limit" in msg.lower():
33
+ retry_delay = 60
34
+ m = re.search(r'retry_delay.*?seconds: (\d+)', msg)
35
+ if m:
36
+ retry_delay = int(m.group(1))
37
+ self.limit_manager.log_request(key, model, success=False, retry_delay=retry_delay)
38
+ last_error = e
39
+ continue
40
+ raise last_error or RuntimeError("No available Gemini API key/model")
41
 
42
  def count_tokens(self, prompt: str) -> int:
43
+ for key, model in self.limit_manager.iterate_key_model():
44
+ try:
45
+ configure(api_key=key)
46
+ _model = GenerativeModel(model)
47
+ return _model.count_tokens(prompt).total_tokens
48
+ except Exception:
49
+ continue
50
+ return 0
51
 
52
  def create_embedding(self, text: str) -> list:
53
+ last_error = None
54
+ for key, model in self.limit_manager.iterate_key_model():
55
+ try:
56
+ configure(api_key=key)
57
+ response = embed_content(
58
+ model=model,
59
+ content=text,
60
+ task_type="retrieval_query"
61
+ )
62
+ self.limit_manager.log_request(key, model, success=True)
63
+ logger.info(f"[GEMINI][EMBEDDING][RAW_RESPONSE] {response['embedding'][:10]} ..... {response['embedding'][-10:]}")
64
+ return response['embedding']
65
+ except Exception as e:
66
+ import re
67
+ msg = str(e)
68
+ if "429" in msg or "rate limit" in msg.lower():
69
+ retry_delay = 60
70
+ m = re.search(r'retry_delay.*?seconds: (\d+)', msg)
71
+ if m:
72
+ retry_delay = int(m.group(1))
73
+ self.limit_manager.log_request(key, model, success=False, retry_delay=retry_delay)
74
+ last_error = e
75
+ continue
76
+ raise last_error or RuntimeError("No available Gemini API key/model")
app/llm.py CHANGED
@@ -88,9 +88,7 @@ class LLMClient:
88
 
89
  def _setup_gemini(self, config: Dict[str, Any]):
90
  """Cấu hình cho Gemini."""
91
- self.api_key = config.get("api_key", "")
92
- self.model = config.get("model", "gemini-1.5-flash-latest")
93
- self.gemini_client = GeminiClient(self.api_key, self.model)
94
 
95
  @timing_decorator_async
96
  async def generate_text(
@@ -479,7 +477,6 @@ if __name__ == "__main__":
479
  llm_client = create_llm_client(
480
  provider=settings.llm_provider,
481
  model=settings.llm_model,
482
- api_key=settings.gemini_api_key,
483
  # ... các config khác nếu cần ...
484
  )
485
 
 
88
 
89
  def _setup_gemini(self, config: Dict[str, Any]):
90
  """Cấu hình cho Gemini."""
91
+ self.gemini_client = GeminiClient()
 
 
92
 
93
  @timing_decorator_async
94
  async def generate_text(
 
477
  llm_client = create_llm_client(
478
  provider=settings.llm_provider,
479
  model=settings.llm_model,
 
480
  # ... các config khác nếu cần ...
481
  )
482
 
app/main.py CHANGED
@@ -63,9 +63,8 @@ VEHICLE_KEYWORDS = ["xe máy", "ô tô", "xe đạp", "xe hơi"]
63
  # Khởi tạo LLM client Gemini
64
  llm_client = create_llm_client(
65
  provider="gemini",
66
- api_key=settings.gemini_api_key,
67
  base_url=settings.gemini_base_url,
68
- model=settings.gemini_model
69
  )
70
 
71
  reranker = Reranker()
 
63
  # Khởi tạo LLM client Gemini
64
  llm_client = create_llm_client(
65
  provider="gemini",
 
66
  base_url=settings.gemini_base_url,
67
+ model=settings.gemini_models[0] if settings.gemini_models else "gemini-2.5-flash"
68
  )
69
 
70
  reranker = Reranker()
app/request_limit_manager.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import threading
3
+ from typing import Dict, List, Tuple, Optional, Iterator, Union
4
+ from app.config import get_settings
5
+
6
+ class RequestLimitManager:
7
+ def __init__(self, provider: str):
8
+ self.provider = provider
9
+ self.lock = threading.Lock()
10
+ self._init_keys_models()
11
+
12
+ def _init_keys_models(self):
13
+ settings = get_settings()
14
+ if self.provider == "gemini":
15
+ self.api_keys: List[str] = [k.strip() for k in getattr(settings, 'gemini_api_keys', []) if k.strip()]
16
+ self.models: List[str] = [m.strip() for m in getattr(settings, 'gemini_models', []) if m.strip()]
17
+ # Có thể mở rộng cho provider khác ở đây
18
+ self.status: Dict[str, Dict[str, Dict[str, Union[str, float]]]] = {}
19
+ now = time.time()
20
+ for key in self.api_keys:
21
+ self.status[key] = {}
22
+ for model in self.models:
23
+ self.status[key][model] = {"status": "active", "timestamp": now}
24
+ self.default_key: Optional[str] = self.api_keys[0] if self.api_keys else None
25
+ self.default_model: Optional[str] = self.models[0] if self.models else None
26
+
27
+ def log_request(self, key: str, model: str, success: bool, retry_delay: Optional[int] = None):
28
+ with self.lock:
29
+ now = time.time()
30
+ if key not in self.status:
31
+ self.status[key] = {}
32
+ if model not in self.status[key]:
33
+ self.status[key][model] = {"status": "active", "timestamp": now}
34
+ if success:
35
+ self.status[key][model]["status"] = "active"
36
+ self.status[key][model]["timestamp"] = now
37
+ else:
38
+ self.status[key][model]["status"] = "blocked"
39
+ self.status[key][model]["timestamp"] = now + (retry_delay or 60)
40
+
41
+ def iterate_key_model(self) -> Iterator[Tuple[str, str]]:
42
+ now = time.time()
43
+ keys = self.api_keys[:]
44
+ models = self.models[:]
45
+ # Ưu tiên default key/model nếu có
46
+ if self.default_key and self.default_key in keys:
47
+ keys.remove(self.default_key)
48
+ keys = [self.default_key] + keys
49
+ if self.default_model and self.default_model in models:
50
+ models.remove(self.default_model)
51
+ models = [self.default_model] + models
52
+ for key in keys:
53
+ for model in models:
54
+ info = self.status.get(key, {}).get(model, {"status": "active", "timestamp": 0.0})
55
+ status = info.get("status", "active")
56
+ ts = float(info.get("timestamp", 0.0))
57
+ if status == "active":
58
+ yield key, model
59
+ elif status == "blocked" and now > ts:
60
+ # Đã hết thời gian block, cho thử lại
61
+ yield key, model
62
+ # Nếu không có key/model nào hợp lệ, không yield gì
app/reranker.py CHANGED
@@ -10,7 +10,7 @@ class Reranker:
10
  self.provider = getattr(settings, 'rerank_provider', settings.llm_provider)
11
  self.model = getattr(settings, 'rerank_model', settings.llm_model)
12
  if self.provider == 'gemini':
13
- self.client = GeminiClient(settings.gemini_api_key, model=self.model)
14
  # elif self.provider == 'openai':
15
  # self.client = OpenAIClient(settings.openai_api_key, model=self.model)
16
  # elif self.provider == 'cohere':
 
10
  self.provider = getattr(settings, 'rerank_provider', settings.llm_provider)
11
  self.model = getattr(settings, 'rerank_model', settings.llm_model)
12
  if self.provider == 'gemini':
13
+ self.client = GeminiClient()
14
  # elif self.provider == 'openai':
15
  # self.client = OpenAIClient(settings.openai_api_key, model=self.model)
16
  # elif self.provider == 'cohere':