Update apis/chat_api.py
Browse files- apis/chat_api.py +3 -3
apis/chat_api.py
CHANGED
|
@@ -187,10 +187,10 @@ class ChatAPIApp:
|
|
| 187 |
data_response = streamer.chat_return_dict(stream_response)
|
| 188 |
return data_response
|
| 189 |
|
| 190 |
-
async def chat_embedding(self,
|
| 191 |
api_url = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_name}"
|
| 192 |
headers = {"Authorization": f"Bearer {api_key}"}
|
| 193 |
-
response = requests.post(api_url, headers=headers, json={"inputs":
|
| 194 |
result = response.json()
|
| 195 |
if isinstance(result, list) and len(result) > 0 and isinstance(result[0], list):
|
| 196 |
# Assuming each embedding is a list of lists of floats, flatten it
|
|
@@ -215,7 +215,7 @@ class ChatAPIApp:
|
|
| 215 |
"object": "list",
|
| 216 |
"data": data,
|
| 217 |
"model": request.model_name,
|
| 218 |
-
"usage": {"prompt_tokens": len(request.
|
| 219 |
}
|
| 220 |
except RuntimeError as e:
|
| 221 |
if attempt < 2: # Don't sleep on the last attempt
|
|
|
|
| 187 |
data_response = streamer.chat_return_dict(stream_response)
|
| 188 |
return data_response
|
| 189 |
|
| 190 |
+
async def chat_embedding(self, input, model_name, api_key: str = Depends(extract_api_key)):
|
| 191 |
api_url = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_name}"
|
| 192 |
headers = {"Authorization": f"Bearer {api_key}"}
|
| 193 |
+
response = requests.post(api_url, headers=headers, json={"inputs": input})
|
| 194 |
result = response.json()
|
| 195 |
if isinstance(result, list) and len(result) > 0 and isinstance(result[0], list):
|
| 196 |
# Assuming each embedding is a list of lists of floats, flatten it
|
|
|
|
| 215 |
"object": "list",
|
| 216 |
"data": data,
|
| 217 |
"model": request.model_name,
|
| 218 |
+
"usage": {"prompt_tokens": len(request.input), "total_tokens": len(request.input)}
|
| 219 |
}
|
| 220 |
except RuntimeError as e:
|
| 221 |
if attempt < 2: # Don't sleep on the last attempt
|