Spaces:

ka1kuk
/

LLM-api

Sleeping

App Files Files Community

ka1kuk commited on Mar 17, 2024

Commit

0e70dd6

verified ·

1 Parent(s): d25ec2d

Update apis/chat_api.py

Browse files

Files changed (1) hide show

apis/chat_api.py +20 -22

apis/chat_api.py CHANGED Viewed

@@ -188,36 +188,34 @@ class ChatAPIApp:
             data_response = streamer.chat_return_dict(stream_response)
             return data_response
-    async def chat_embedding(self, input_text: str, model_name: str, api_key: str):
-        api_url = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_name}"
         headers = {"Authorization": f"Bearer {api_key}"}
-        response = requests.post(api_url, headers=headers, json={"inputs": input_text})
-        result = response.json()
-        # Improved error handling and logging
-        if "error" in result:
-            logging.error(f"Error from Hugging Face API: {result['error']}")
-            # More detailed error message
-            error_detail = result.get('error', 'No detailed error message provided.')
-            raise RuntimeError(f"The model is currently loading, please re-run the query. Detail: {error_detail}")
-        if isinstance(result, list) and len(result) > 0 and isinstance(result[0], list):
-            return [item for sublist in result for item in sublist]  # Flatten list of lists
-        else:
-            logging.error(f"Unexpected response format: {result}")
-            raise RuntimeError("Unexpected response format.")
-    async def embedding(self, request: QueryRequest, api_key: str = Depends(extract_api_key)):
-        try:
-            embeddings = await self.chat_embedding(request.input, request.model, api_key)
-            data = [{"object": "embedding", "index": i, "embedding": embedding} for i, embedding in enumerate(embeddings)]
             return EmbeddingResponse(
                 object="list",
                 data=data,
-                model=request.model,
-                usage={"prompt_tokens": len(request.input), "total_tokens": len(request.input)}
             )
         except Exception as e:
             raise HTTPException(status_code=500, detail=str(e))
     def setup_routes(self):

             data_response = streamer.chat_return_dict(stream_response)
             return data_response
+    async def embedding(request: QueryRequest, api_key: str = Depends(extract_api_key)):
+        api_url = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{request.model_name}"
         headers = {"Authorization": f"Bearer {api_key}"}
+        try:
+            response = requests.post(api_url, headers=headers, json={"inputs": request.input_text})
+            result = response.json()
+            if "error" in result:
+                logging.error(f"Error from Hugging Face API: {result.get('error', 'No detailed error message provided.')}")
+                raise HTTPException(status_code=503, detail="The model is currently loading, please re-run the query.")
+            if not (isinstance(result, list) and len(result) > 0 and isinstance(result[0], list)):
+                logging.error(f"Unexpected response format: {result}")
+                raise HTTPException(status_code=500, detail="Unexpected response format.")
+            # Assuming each embedding is a list of lists of floats, flatten it
+            flattened_embeddings = [sum(embedding, []) for embedding in result]
+            data = [{"object": "embedding", "index": i, "embedding": embedding} for i, embedding in enumerate(flattened_embeddings)]
             return EmbeddingResponse(
                 object="list",
                 data=data,
+                model=request.model_name,
+                usage={"prompt_tokens": len(request.input_text), "total_tokens": len(request.input_text)}
             )
         except Exception as e:
+            logging.error(f"An error occurred: {str(e)}")
             raise HTTPException(status_code=500, detail=str(e))
     def setup_routes(self):