Spaces:

Soumik555
/

FastApi

Running

App Files Files Community

Soumik Bose commited on Dec 15, 2025

Commit

bbde124

1 Parent(s): 03e9433

okay

Browse files

Files changed (1) hide show

cerebras_instance_provider.py +61 -24

cerebras_instance_provider.py CHANGED Viewed

@@ -1,48 +1,85 @@
 # instance_provider.py
 import os
-from typing import List, Optional
-from pydantic_ai.models.openai import OpenAIModel
-from pydantic_ai.providers.openai import OpenAIProvider
 from dotenv import load_dotenv
 load_dotenv()
 class InstanceProvider:
-    """Manages multiple Cerebras API instances with simple rotation"""
     def __init__(self):
-        self.instances: List[OpenAIModel] = []
         self.current_index = 0
         self._initialize_instances()
     def _initialize_instances(self):
-        """Load all API keys and create instances"""
         api_keys = os.getenv("CEREBRAS_API_KEYS", "").split(",")
         base_url = os.getenv("CEREBRAS_BASE_URL")
-        model_name = os.getenv("CEREBRAS_MODEL")
         for key in api_keys:
             key = key.strip()
             if key:
-                self.instances.append(
-                    OpenAIModel(
-                        model_name,
-                        provider=OpenAIProvider(
-                            base_url=base_url,
-                            api_key=key
-                        )
                     )
-                )
-    def get_next_instance(self) -> Optional[OpenAIModel]:
-        """Get next instance in rotation"""
-        if not self.instances:
             return None
-        instance = self.instances[self.current_index]
-        self.current_index = (self.current_index + 1) % len(self.instances)
-        return instance
     def get_total_instances(self) -> int:
-        """Return total number of instances available"""
-        return len(self.instances)

 # instance_provider.py
 import os
+import logging
+from typing import List, Optional, Tuple
+from openai import OpenAI
 from dotenv import load_dotenv
 load_dotenv()
+# Setup basic logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 class InstanceProvider:
+    """Manages multiple Cerebras/OpenAI clients with simple rotation"""
     def __init__(self):
+        self.clients: List[OpenAI] = []
         self.current_index = 0
+        self.model_name = os.getenv("CEREBRAS_MODEL") or "llama3.1-70b"
         self._initialize_instances()
     def _initialize_instances(self):
+        """Load all API keys and create OpenAI clients"""
+        # Split keys by comma
         api_keys = os.getenv("CEREBRAS_API_KEYS", "").split(",")
         base_url = os.getenv("CEREBRAS_BASE_URL")
         for key in api_keys:
             key = key.strip()
             if key:
+                try:
+                    # Create a standard OpenAI client for this key
+                    client = OpenAI(
+                        base_url=base_url,
+                        api_key=key
                     )
+                    self.clients.append(client)
+                except Exception as e:
+                    logger.error(f"Failed to initialize key {key[:4]}...: {e}")
+    def get_next_instance(self) -> Optional[Tuple[OpenAI, str]]:
+        """
+        Get next client in rotation.
+        Returns: Tuple (OpenAI_Client, Model_Name)
+        """
+        if not self.clients:
             return None
+        # Get current client
+        client = self.clients[self.current_index]
+        # Rotate index for the next call (Round Robin)
+        self.current_index = (self.current_index + 1) % len(self.clients)
+        return client, self.model_name
     def get_total_instances(self) -> int:
+        """Return total number of active clients available"""
+        return len(self.clients)
+    def chat_completion_with_retry(self, messages: list, **kwargs):
+        """
+        Helper function that automatically retries across all instances
+        if one fails.
+        """
+        total_attempts = self.get_total_instances()
+        for attempt in range(total_attempts):
+            client, model = self.get_next_instance()
+            try:
+                # Execute the API call
+                response = client.chat.completions.create(
+                    model=model,
+                    messages=messages,
+                    **kwargs
+                )
+                return response
+            except Exception as e:
+                logger.warning(f"Instance failed (Attempt {attempt+1}/{total_attempts}): {e}")
+                # Loop continues to next instance automatically
+                continue
+        raise RuntimeError(f"All {total_attempts} instances failed.")