Spaces:

TeamGenKI
/

Inference-API

Runtime error

App Files Files Community

AurelioAguirre commited on Jan 9, 2025

Commit

da1009f

1 Parent(s): 9814b43

fixing pydantic issue v11

Browse files

Files changed (1) hide show

main/api.py +35 -24

main/api.py CHANGED Viewed

@@ -14,20 +14,25 @@ class InferenceApi(LitAPI):
         super().__init__()
         self.logger = logging.getLogger(__name__)
         self.logger.info("Initializing Inference API")
-        self.client = None
     async def setup(self, device: Optional[str] = None):
-        """Setup method required by LitAPI - initialize HTTP client"""
         self._device = device
-        self.client = httpx.AsyncClient(
-            base_url="http://localhost:8002",  # We'll need to make this configurable
             timeout=60.0
         )
-        self.logger.info(f"Inference API setup completed on device: {device}")
     def predict(self, x: str, **kwargs) -> Iterator[str]:
         """
         Non-async prediction method that yields results.
         """
         loop = asyncio.get_event_loop()
         async def async_gen():
@@ -53,17 +58,21 @@ class InferenceApi(LitAPI):
             yield response
     def decode_request(self, request: Any, **kwargs) -> str:
-        """Convert the request payload to input format."""
         if isinstance(request, dict) and "prompt" in request:
             return request["prompt"]
         return request
     def encode_response(self, output: Iterator[str], **kwargs) -> Dict[str, Any]:
-        """Convert the model output to a response payload."""
-        # For streaming responses
         if self.stream:
             return {"generated_text": output}
-        # For non-streaming, take the first (and only) item from the iterator
         try:
             result = next(output)
             return {"generated_text": result}
@@ -80,17 +89,18 @@ class InferenceApi(LitAPI):
         self.logger.debug(f"Forwarding generation request for prompt: {prompt[:50]}...")
         try:
-            response = await self.client.post(
-                "/api/v1/generate",
-                json={
-                    "prompt": prompt,
-                    "system_message": system_message,
-                    "max_new_tokens": max_new_tokens
-                }
-            )
-            response.raise_for_status()
-            data = response.json()
-            return data["generated_text"]
         except Exception as e:
             self.logger.error(f"Error in generate_response: {str(e)}")
@@ -106,7 +116,8 @@ class InferenceApi(LitAPI):
         self.logger.debug(f"Forwarding streaming request for prompt: {prompt[:50]}...")
         try:
-            async with self.client.stream(
                     "POST",
                     "/api/v1/generate/stream",
                     json={
@@ -118,12 +129,12 @@ class InferenceApi(LitAPI):
                 response.raise_for_status()
                 async for chunk in response.aiter_text():
                     yield chunk
         except Exception as e:
             self.logger.error(f"Error in generate_stream: {str(e)}")
             raise
     async def cleanup(self):
-        """Cleanup method - close HTTP client"""
-        if self.client:
-            await self.client.aclose()

         super().__init__()
         self.logger = logging.getLogger(__name__)
         self.logger.info("Initializing Inference API")
+        self._device = None
+        self.stream = False  # Add stream flag for compatibility with LitAPI
     async def setup(self, device: Optional[str] = None):
+        """Setup method required by LitAPI"""
         self._device = device
+        self.logger.info(f"Inference API setup completed on device: {device}")
+    async def _get_client(self):
+        """Get or create HTTP client as needed"""
+        return httpx.AsyncClient(
+            base_url="http://localhost:8002",
             timeout=60.0
         )
     def predict(self, x: str, **kwargs) -> Iterator[str]:
         """
         Non-async prediction method that yields results.
+        Implements required LitAPI method.
         """
         loop = asyncio.get_event_loop()
         async def async_gen():
             yield response
     def decode_request(self, request: Any, **kwargs) -> str:
+        """
+        Convert the request payload to input format.
+        Implements required LitAPI method.
+        """
         if isinstance(request, dict) and "prompt" in request:
             return request["prompt"]
         return request
     def encode_response(self, output: Iterator[str], **kwargs) -> Dict[str, Any]:
+        """
+        Convert the model output to a response payload.
+        Implements required LitAPI method.
+        """
         if self.stream:
             return {"generated_text": output}
         try:
             result = next(output)
             return {"generated_text": result}
         self.logger.debug(f"Forwarding generation request for prompt: {prompt[:50]}...")
         try:
+            async with await self._get_client() as client:
+                response = await client.post(
+                    "/api/v1/generate",
+                    json={
+                        "prompt": prompt,
+                        "system_message": system_message,
+                        "max_new_tokens": max_new_tokens
+                    }
+                )
+                response.raise_for_status()
+                data = response.json()
+                return data["generated_text"]
         except Exception as e:
             self.logger.error(f"Error in generate_response: {str(e)}")
         self.logger.debug(f"Forwarding streaming request for prompt: {prompt[:50]}...")
         try:
+            client = await self._get_client()
+            async with client.stream(
                     "POST",
                     "/api/v1/generate/stream",
                     json={
                 response.raise_for_status()
                 async for chunk in response.aiter_text():
                     yield chunk
+            await client.aclose()
         except Exception as e:
             self.logger.error(f"Error in generate_stream: {str(e)}")
             raise
     async def cleanup(self):
+        """Cleanup method - no longer needed as clients are created per-request"""
+        pass