Spaces:

TeamGenKI
/

Inference-API

Runtime error

App Files Files Community

AurelioAguirre commited on Jan 9, 2025

Commit

63fdbaa

1 Parent(s): ff91b48

fixing pydantic issue v6

Browse files

Files changed (1) hide show

main/api.py +30 -9

main/api.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import httpx
-from typing import Optional, AsyncIterator, Dict, Any
 import logging
 from litserve import LitAPI
 from pydantic import BaseModel
@@ -24,10 +25,25 @@ class InferenceApi(LitAPI):
         )
         self.logger.info(f"Inference API setup completed on device: {device}")
-    async def predict(self, x: str, **kwargs) -> AsyncIterator[str]:
         """
-        Main prediction method required by LitAPI.
-        Always yields, either chunks in streaming mode or complete response in non-streaming mode.
         """
         if self.stream:
             async for chunk in self.generate_stream(x, **kwargs):
@@ -42,12 +58,17 @@ class InferenceApi(LitAPI):
             return request["prompt"]
         return request
-    def encode_response(self, output: AsyncIterator[str], **kwargs) -> AsyncIterator[Dict[str, str]]:
         """Convert the model output to a response payload."""
-        async def wrapper():
-            async for chunk in output:
-                yield {"generated_text": chunk}
-        return wrapper()
     async def generate_response(
             self,

 import httpx
+from typing import Optional, AsyncIterator, Dict, Any, Iterator
 import logging
+import asyncio
 from litserve import LitAPI
 from pydantic import BaseModel
         )
         self.logger.info(f"Inference API setup completed on device: {device}")
+    def predict(self, x: str, **kwargs) -> Iterator[str]:
         """
+        Non-async prediction method that yields results.
+        """
+        loop = asyncio.get_event_loop()
+        async def async_gen():
+            async for item in self._async_predict(x, **kwargs):
+                yield item
+        gen = async_gen()
+        while True:
+            try:
+                yield loop.run_until_complete(gen.__anext__())
+            except StopAsyncIteration:
+                break
+    async def _async_predict(self, x: str, **kwargs) -> AsyncIterator[str]:
+        """
+        Internal async prediction method.
         """
         if self.stream:
             async for chunk in self.generate_stream(x, **kwargs):
             return request["prompt"]
         return request
+    def encode_response(self, output: Iterator[str], **kwargs) -> Dict[str, Any]:
         """Convert the model output to a response payload."""
+        # For streaming responses
+        if self.stream:
+            return {"generated_text": output}
+        # For non-streaming, take the first (and only) item from the iterator
+        try:
+            result = next(output)
+            return {"generated_text": result}
+        except StopIteration:
+            return {"generated_text": ""}
     async def generate_response(
             self,