Spaces:

ResearchEngineering
/

AGI

Running

Dmitry Beresnev commited on 15 days ago

Commit

e1e4b82

1 Parent(s): e9b8569

fix payload processing

Files changed (1) hide show

app.py CHANGED Viewed

@@ -497,6 +497,10 @@ class ChatCompletionRequest(BaseModel):
         ge=0.0,
         le=2.0
     )
     model_config = {
         "json_schema_extra": {
@@ -896,15 +900,22 @@ async def chat_completions(request: ChatCompletionRequest, raw_request: Request)
             raise HTTPException(status_code=500, detail="Current model not loaded")
         # Forward to llama-server using aiohttp
         async with http_session.post(
             f"{cached_model.url}/v1/chat/completions",
-            json={
-                "messages": request.messages,
-                "max_tokens": request.max_tokens,
-                "temperature": request.temperature,
-            }
         ) as response:
-            response.raise_for_status()
             result = await response.json()
         # Update metrics

         ge=0.0,
         le=2.0
     )
+    model: Optional[str] = Field(
+        default=None,
+        description="Optional model name (ignored by this server; use /switch-model)."
+    )
     model_config = {
         "json_schema_extra": {
             raise HTTPException(status_code=500, detail="Current model not loaded")
         # Forward to llama-server using aiohttp
+        payload = {
+            "messages": request.messages,
+            "max_tokens": request.max_tokens,
+            "temperature": request.temperature,
+        }
         async with http_session.post(
             f"{cached_model.url}/v1/chat/completions",
+            json=payload
         ) as response:
+            if response.status >= 400:
+                error_text = await response.text()
+                logger.error(
+                    f"request_id={request_id} llama-server {response.status} "
+                    f"error_body={error_text[:1000]}"
+                )
+                response.raise_for_status()
             result = await response.json()
         # Update metrics