Spaces:

ka1kuk
/

LLM-api

Sleeping

ka1kuk commited on Mar 16, 2024

Commit

2c5aa7b

verified ·

1 Parent(s): 6ffbd42

Update apis/chat_api.py

Files changed (1) hide show

apis/chat_api.py CHANGED Viewed

@@ -187,10 +187,10 @@ class ChatAPIApp:
             data_response = streamer.chat_return_dict(stream_response)
             return data_response
-    async def chat_embedding(self, texts, model_name, api_key: str = Depends(extract_api_key)):
         api_url = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_name}"
         headers = {"Authorization": f"Bearer {api_key}"}
-        response = requests.post(api_url, headers=headers, json={"inputs": texts})
         result = response.json()
         if isinstance(result, list) and len(result) > 0 and isinstance(result[0], list):
             # Assuming each embedding is a list of lists of floats, flatten it
@@ -215,7 +215,7 @@ class ChatAPIApp:
                             "object": "list",
                             "data": data,
                             "model": request.model_name,
-                            "usage": {"prompt_tokens": len(request.texts), "total_tokens": len(request.texts)}
                         }
                     except RuntimeError as e:
                         if attempt < 2:  # Don't sleep on the last attempt

             data_response = streamer.chat_return_dict(stream_response)
             return data_response
+    async def chat_embedding(self, input, model_name, api_key: str = Depends(extract_api_key)):
         api_url = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_name}"
         headers = {"Authorization": f"Bearer {api_key}"}
+        response = requests.post(api_url, headers=headers, json={"inputs": input})
         result = response.json()
         if isinstance(result, list) and len(result) > 0 and isinstance(result[0], list):
             # Assuming each embedding is a list of lists of floats, flatten it
                             "object": "list",
                             "data": data,
                             "model": request.model_name,
+                            "usage": {"prompt_tokens": len(request.input), "total_tokens": len(request.input)}
                         }
                     except RuntimeError as e:
                         if attempt < 2:  # Don't sleep on the last attempt