Spaces:

TeamGenKI
/

Inference-API

Runtime error

App Files Files Community

AurelioAguirre commited on Jan 9, 2025

Commit

db5664e

1 Parent(s): 19d483b

fixing pydantic v4

Browse files

Files changed (2) hide show

main/api.py +19 -36
main/main.py +1 -1

main/api.py CHANGED Viewed

@@ -1,36 +1,33 @@
 import httpx
-from typing import Optional, Iterator, Union, Any
 import logging
 from litserve import LitAPI
 class InferenceApi(LitAPI):
-    def __init__(self, config: dict):
         """Initialize the Inference API with configuration."""
         super().__init__()
         self.logger = logging.getLogger(__name__)
         self.logger.info("Initializing Inference API")
-        # Get base URL from config
-        self.base_url = config["llm_server"]["base_url"]
-        self.timeout = config["llm_server"].get("timeout", 60)
-        self.client = None  # Will be initialized in setup()
-        # Set request timeout from config
-        self.request_timeout = float(self.timeout)
     async def setup(self, device: Optional[str] = None):
         """Setup method required by LitAPI - initialize HTTP client"""
-        self._device = device  # Store device as required by LitAPI
         self.client = httpx.AsyncClient(
-            base_url=self.base_url,
-            timeout=self.timeout
         )
         self.logger.info(f"Inference API setup completed on device: {device}")
-    async def predict(self, x: str, **kwargs) -> Union[str, Iterator[str]]:
         """
         Main prediction method required by LitAPI.
-        If streaming is enabled, yields chunks; otherwise returns via yield.
         """
         if self.stream:
             async for chunk in self.generate_stream(x, **kwargs):
@@ -41,21 +38,16 @@ class InferenceApi(LitAPI):
     def decode_request(self, request: Any, **kwargs) -> str:
         """Convert the request payload to input format."""
-        # For our case, we expect the request to be text
         if isinstance(request, dict) and "prompt" in request:
             return request["prompt"]
         return request
-    def encode_response(self, output: Union[str, Iterator[str]], **kwargs) -> Union[str, Iterator[str]]:
         """Convert the model output to a response payload."""
-        if self.stream:
-            # For streaming, yield each chunk wrapped in a dict
-            async def stream_wrapper():
-                async for chunk in output:
-                    yield {"generated_text": chunk}
-        else:
-            # For non-streaming, return complete response
-            return {"generated_text": output}
     async def generate_response(
             self,
@@ -88,7 +80,7 @@ class InferenceApi(LitAPI):
             prompt: str,
             system_message: Optional[str] = None,
             max_new_tokens: Optional[int] = None
-    ) -> Iterator[str]:
         """Generate a streaming response by forwarding the request to the LLM Server."""
         self.logger.debug(f"Forwarding streaming request for prompt: {prompt[:50]}...")
@@ -110,16 +102,7 @@ class InferenceApi(LitAPI):
             self.logger.error(f"Error in generate_stream: {str(e)}")
             raise
-    # ... [rest of the methods remain the same: generate_embedding, check_system_status, etc.]
     async def cleanup(self):
         """Cleanup method - close HTTP client"""
         if self.client:
-            await self.client.aclose()
-    def log(self, key: str, value: Any):
-        """Override log method to use our logger if queue not set"""
-        if self._logger_queue is None:
-            self.logger.info(f"Log event: {key}={value}")
-        else:
-            super().log(key, value)

 import httpx
+from typing import Optional, AsyncIterator, Dict, Any
 import logging
 from litserve import LitAPI
+from pydantic import BaseModel
+class GenerationResponse(BaseModel):
+    generated_text: str
 class InferenceApi(LitAPI):
+    def __init__(self):
         """Initialize the Inference API with configuration."""
         super().__init__()
         self.logger = logging.getLogger(__name__)
         self.logger.info("Initializing Inference API")
+        self.client = None
     async def setup(self, device: Optional[str] = None):
         """Setup method required by LitAPI - initialize HTTP client"""
+        self._device = device
         self.client = httpx.AsyncClient(
+            base_url="http://localhost:8002",  # We'll need to make this configurable
+            timeout=60.0
         )
         self.logger.info(f"Inference API setup completed on device: {device}")
+    async def predict(self, x: str, **kwargs) -> AsyncIterator[str]:
         """
         Main prediction method required by LitAPI.
+        Always yields, either chunks in streaming mode or complete response in non-streaming mode.
         """
         if self.stream:
             async for chunk in self.generate_stream(x, **kwargs):
     def decode_request(self, request: Any, **kwargs) -> str:
         """Convert the request payload to input format."""
         if isinstance(request, dict) and "prompt" in request:
             return request["prompt"]
         return request
+    def encode_response(self, output: AsyncIterator[str], **kwargs) -> AsyncIterator[Dict[str, str]]:
         """Convert the model output to a response payload."""
+        async def wrapper():
+            async for chunk in output:
+                yield {"generated_text": chunk}
+        return wrapper()
     async def generate_response(
             self,
             prompt: str,
             system_message: Optional[str] = None,
             max_new_tokens: Optional[int] = None
+    ) -> AsyncIterator[str]:
         """Generate a streaming response by forwarding the request to the LLM Server."""
         self.logger.debug(f"Forwarding streaming request for prompt: {prompt[:50]}...")
             self.logger.error(f"Error in generate_stream: {str(e)}")
             raise
     async def cleanup(self):
         """Cleanup method - close HTTP client"""
         if self.client:
+            await self.client.aclose()

main/main.py CHANGED Viewed

@@ -32,7 +32,7 @@ def create_app():
         config = load_config()
         # Initialize API and router
-        api = InferenceApi(config)
         init_router(config)
         # Create LitServer instance

         config = load_config()
         # Initialize API and router
+        api = InferenceApi()
         init_router(config)
         # Create LitServer instance