Spaces:
Running
Running
Dmitry Beresnev commited on
Commit ·
e1e4b82
1
Parent(s): e9b8569
fix payload processing
Browse files
app.py
CHANGED
|
@@ -497,6 +497,10 @@ class ChatCompletionRequest(BaseModel):
|
|
| 497 |
ge=0.0,
|
| 498 |
le=2.0
|
| 499 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 500 |
|
| 501 |
model_config = {
|
| 502 |
"json_schema_extra": {
|
|
@@ -896,15 +900,22 @@ async def chat_completions(request: ChatCompletionRequest, raw_request: Request)
|
|
| 896 |
raise HTTPException(status_code=500, detail="Current model not loaded")
|
| 897 |
|
| 898 |
# Forward to llama-server using aiohttp
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 899 |
async with http_session.post(
|
| 900 |
f"{cached_model.url}/v1/chat/completions",
|
| 901 |
-
json=
|
| 902 |
-
"messages": request.messages,
|
| 903 |
-
"max_tokens": request.max_tokens,
|
| 904 |
-
"temperature": request.temperature,
|
| 905 |
-
}
|
| 906 |
) as response:
|
| 907 |
-
response.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 908 |
result = await response.json()
|
| 909 |
|
| 910 |
# Update metrics
|
|
|
|
| 497 |
ge=0.0,
|
| 498 |
le=2.0
|
| 499 |
)
|
| 500 |
+
model: Optional[str] = Field(
|
| 501 |
+
default=None,
|
| 502 |
+
description="Optional model name (ignored by this server; use /switch-model)."
|
| 503 |
+
)
|
| 504 |
|
| 505 |
model_config = {
|
| 506 |
"json_schema_extra": {
|
|
|
|
| 900 |
raise HTTPException(status_code=500, detail="Current model not loaded")
|
| 901 |
|
| 902 |
# Forward to llama-server using aiohttp
|
| 903 |
+
payload = {
|
| 904 |
+
"messages": request.messages,
|
| 905 |
+
"max_tokens": request.max_tokens,
|
| 906 |
+
"temperature": request.temperature,
|
| 907 |
+
}
|
| 908 |
async with http_session.post(
|
| 909 |
f"{cached_model.url}/v1/chat/completions",
|
| 910 |
+
json=payload
|
|
|
|
|
|
|
|
|
|
|
|
|
| 911 |
) as response:
|
| 912 |
+
if response.status >= 400:
|
| 913 |
+
error_text = await response.text()
|
| 914 |
+
logger.error(
|
| 915 |
+
f"request_id={request_id} llama-server {response.status} "
|
| 916 |
+
f"error_body={error_text[:1000]}"
|
| 917 |
+
)
|
| 918 |
+
response.raise_for_status()
|
| 919 |
result = await response.json()
|
| 920 |
|
| 921 |
# Update metrics
|