Update apis/chat_api.py
Browse files- apis/chat_api.py +20 -20
apis/chat_api.py
CHANGED
|
@@ -187,26 +187,26 @@ class ChatAPIApp:
|
|
| 187 |
return data_response
|
| 188 |
|
| 189 |
async def embedding(request: QueryRequest):
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
|
| 211 |
def setup_routes(self):
|
| 212 |
for prefix in ["", "/v1", "/api", "/api/v1"]:
|
|
|
|
| 187 |
return data_response
|
| 188 |
|
| 189 |
async def embedding(request: QueryRequest):
|
| 190 |
+
try:
|
| 191 |
+
for attempt in range(3): # Retry logic
|
| 192 |
+
try:
|
| 193 |
+
embeddings = await send_request_to_hugging_face(request.texts, request.model_name, request.api_key)
|
| 194 |
+
data = [
|
| 195 |
+
{"object": "embedding", "index": i, "embedding": embedding}
|
| 196 |
+
for i, embedding in enumerate(embeddings)
|
| 197 |
+
]
|
| 198 |
+
return {
|
| 199 |
+
"object": "list",
|
| 200 |
+
"data": data,
|
| 201 |
+
"model": request.model_name,
|
| 202 |
+
"usage": {"prompt_tokens": len(request.texts), "total_tokens": len(request.texts)}
|
| 203 |
+
}
|
| 204 |
+
except RuntimeError as e:
|
| 205 |
+
if attempt < 2: # Don't sleep on the last attempt
|
| 206 |
+
await asyncio.sleep(10) # Delay for the retry
|
| 207 |
+
raise HTTPException(status_code=503, detail="The model is currently loading, please try again later.")
|
| 208 |
+
except Exception as e:
|
| 209 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 210 |
|
| 211 |
def setup_routes(self):
|
| 212 |
for prefix in ["", "/v1", "/api", "/api/v1"]:
|