Spaces:

Rahul-Samedavar
/

CodeScribe

Sleeping

File size: 6,674 Bytes

import time
import json
from typing import Dict, List, Callable, Any, Union
import google.generativeai as genai
from groq import Groq, RateLimitError

# Assuming your config.py looks something like this for the example to be runnable
from dataclasses import dataclass
@dataclass
class APIKey:
    provider: str
    key: str
    model: str

# A simple callback for demonstration
def no_op_callback(message: str):
    print(message)

class LLMHandler:
    def __init__(self, api_keys: List[APIKey], progress_callback: Callable[[str], None] = no_op_callback):
        self.clients = []
        self.progress_callback = progress_callback
        for key in api_keys:
            try:
                if key.provider == "groq":
                    # --- SOLUTION ---
                    # Disable the library's internal retries. Let our handler manage failovers.
                    # This gives us immediate control when a rate limit is hit.
                    client = Groq(api_key=key.key, max_retries=0) 
                    self.clients.append({
                        "provider": "groq",
                        "client": client,
                        "model": key.model,
                        "id": f"groq_{key.key[-4:]}"
                    })
                elif key.provider == "gemini":
                    # Note: Gemini's library is less explicit about HTTP retries in its
                    # standard configuration, but the principle remains the same. The main
                    # offender is usually HTTP-based libraries like Groq's or OpenAI's.
                    genai.configure(api_key=key.key)
                    self.clients.append({
                        "provider": "gemini",
                        "client": genai.GenerativeModel(key.model),
                        "model": key.model,
                        "id": f"gemini_{key.key[-4:]}"
                    })
                self.progress_callback(f"Successfully configured client: {self.clients[-1]['id']}")
            except Exception as e:
                self.progress_callback(f"Failed to configure client for key ending in {key.key[-4:]}: {e}")
        
        if not self.clients:
            self.progress_callback("Warning: No LLM clients were successfully configured.")

        self.cooldowns: Dict[str, float] = {}
        self.cooldown_period = 30  # 30 seconds

    def _attempt_generation(self, generation_logic: Callable[[Dict], Any]) -> Any:
        """
        A private generic method to handle the client iteration, cooldown, and error handling logic.
        
        Args:
            generation_logic: A function that takes a client_info dictionary and executes
                              the specific LLM call, returning the processed content.
        """
        if not self.clients:
            raise ValueError("No LLM clients configured.")

        # Iterate through a copy of the clients list to allow for potential future modifications
        for client_info in self.clients:
            client_id = client_info["id"]

            # Check and manage cooldown
            if client_id in self.cooldowns:
                if time.time() - self.cooldowns[client_id] < self.cooldown_period:
                    self.progress_callback(f"Skipping {client_id} (on cooldown).")
                    continue
                else:
                    self.progress_callback(f"Cooldown expired for {client_id}.")
                    del self.cooldowns[client_id]
            
            try:
                # Execute the specific generation logic passed to this method
                return generation_logic(client_info)
            
            except RateLimitError:
                self.progress_callback(f"Rate limit hit for {client_id}. Placing it on a {self.cooldown_period}s cooldown.")
                self.cooldowns[client_id] = time.time()
                continue # Try the next client
            except Exception as e:
                # This catches other errors like API key issues, parsing errors, etc.
                self.progress_callback(f"An error occurred with {client_id}: {e}. Placing on cooldown and trying next client.")
                self.cooldowns[client_id] = time.time() # Put faulty clients on cooldown too
                continue

        # If the loop completes without returning, all clients have failed.
        raise RuntimeError("Failed to get a response from any available LLM provider.")

    def generate_documentation(self, prompt: str) -> Dict:
        """
        Generates structured JSON documentation using available clients.
        """
        def _generate(client_info: Dict) -> Dict:
            client_id = client_info["id"]
            self.progress_callback(f"Attempting to generate JSON docs with {client_id} ({client_info['model']})...")
            
            if client_info["provider"] == "groq":
                response = client_info["client"].chat.completions.create(
                    messages=[{"role": "user", "content": prompt}],
                    model=client_info["model"],
                    temperature=0.1,
                    response_format={"type": "json_object"},
                )
                content = response.choices[0].message.content
            
            elif client_info["provider"] == "gemini":
                # For Gemini, you must explicitly ask for JSON in the prompt
                # e.g., prompt = "Generate JSON... " + original_prompt
                response = client_info["client"].generate_content(prompt)
                content = response.text.strip().lstrip("```json").rstrip("```").strip()

            return json.loads(content)

        return self._attempt_generation(_generate)

    def generate_text_response(self, prompt: str) -> str:
        """
        Generates a plain text response using available clients.
        """
        def _generate(client_info: Dict) -> str:
            client_id = client_info["id"]
            self.progress_callback(f"Attempting to generate text with {client_id} ({client_info['model']})...")
            
            if client_info["provider"] == "groq":
                response = client_info["client"].chat.completions.create(
                    messages=[{"role": "user", "content": prompt}],
                    model=client_info["model"],
                    temperature=0.2,
                )
                return response.choices[0].message.content
            
            elif client_info["provider"] == "gemini":
                response = client_info["client"].generate_content(prompt)
                return response.text.strip()
        
        return self._attempt_generation(_generate)