import os import time from google import genai from google.genai import types from typing import List, Optional from dotenv import load_dotenv, find_dotenv from ..utils import log from ..utils.common_utils import exponential_backoff class GeminiClient: def __init__(self, model: str, api_key: Optional[str] = None): self.model = model self._init_environment(api_key) self.histories = list() self.token_usages = dict() self.__first_turn = True def _init_environment(self, api_key: Optional[str] = None) -> None: if not api_key: dotenv_path = find_dotenv(usecwd=True) load_dotenv(dotenv_path, override=True) api_key = os.environ.get("GOOGLE_API_KEY", None) self.client = genai.Client(api_key=api_key) def reset_history(self, verbose: bool = True) -> None: self.__first_turn = True self.histories = list() self.token_usages = dict() if verbose: log('Conversation history has been reset.', color=True) def __make_payload(self, user_prompt: str) -> List[types.Content]: return [types.Content(role='user', parts=[types.Part.from_text(text=user_prompt)])] def __call__(self, user_prompt: str, system_prompt: Optional[str] = None, using_multi_turn: bool = True, greeting: Optional[str] = None, verbose: bool = True, **kwargs) -> str: try: if not using_multi_turn: self.reset_history(verbose) if greeting and self.__first_turn: self.histories.append(types.Content(role='model', parts=[types.Part.from_text(text=greeting)])) self.__first_turn = False self.histories += self.__make_payload(user_prompt) count = 0 max_retry = kwargs.pop('max_retry', 5) kwargs.pop('seed', None) # not a valid Gemini param kwargs.pop('verbose', None) # internal flag, not for API # Minimise thinking tokens if 'thinking_config' not in kwargs: if 'gemini-2' in self.model.lower(): kwargs['thinking_config'] = types.ThinkingConfig(thinking_budget=0) elif 'gemini-3' in self.model.lower(): kwargs['thinking_config'] = types.ThinkingConfig(thinking_level="minimal") while 1: response = self.client.models.generate_content( model=self.model, contents=self.histories, config=types.GenerateContentConfig( system_instruction=system_prompt, **kwargs ) ) if response.usage_metadata: prompt_token_cnt = response.usage_metadata.prompt_token_count if isinstance(response.usage_metadata.prompt_token_count, int) else 0 candidates_token_cnt = response.usage_metadata.candidates_token_count if isinstance(response.usage_metadata.candidates_token_count, int) else 0 total_token_cnt = response.usage_metadata.total_token_count if isinstance(response.usage_metadata.total_token_count, int) else 0 thoughts_token_cnt = response.usage_metadata.thoughts_token_count if isinstance(response.usage_metadata.thoughts_token_count, int) else 0 self.token_usages.setdefault("prompt_tokens", []).append(prompt_token_cnt) self.token_usages.setdefault("completion_tokens", []).append(candidates_token_cnt) self.token_usages.setdefault("total_tokens", []).append(total_token_cnt) self.token_usages.setdefault("reasoning_tokens", []).append(thoughts_token_cnt) if count >= max_retry: replace_text = 'Could you tell me again?' self.histories.append(types.Content(role='model', parts=[types.Part.from_text(text=replace_text)])) return replace_text if response.text is None: wait_time = exponential_backoff(count) time.sleep(wait_time) count += 1 continue else: break self.histories.append(types.Content(role='model', parts=[types.Part.from_text(text=response.text)])) return response.text except Exception as e: raise e