Dmitry Beresnev commited on
Commit
e1e4b82
·
1 Parent(s): e9b8569

fix payload processing

Browse files
Files changed (1) hide show
  1. app.py +17 -6
app.py CHANGED
@@ -497,6 +497,10 @@ class ChatCompletionRequest(BaseModel):
497
  ge=0.0,
498
  le=2.0
499
  )
 
 
 
 
500
 
501
  model_config = {
502
  "json_schema_extra": {
@@ -896,15 +900,22 @@ async def chat_completions(request: ChatCompletionRequest, raw_request: Request)
896
  raise HTTPException(status_code=500, detail="Current model not loaded")
897
 
898
  # Forward to llama-server using aiohttp
 
 
 
 
 
899
  async with http_session.post(
900
  f"{cached_model.url}/v1/chat/completions",
901
- json={
902
- "messages": request.messages,
903
- "max_tokens": request.max_tokens,
904
- "temperature": request.temperature,
905
- }
906
  ) as response:
907
- response.raise_for_status()
 
 
 
 
 
 
908
  result = await response.json()
909
 
910
  # Update metrics
 
497
  ge=0.0,
498
  le=2.0
499
  )
500
+ model: Optional[str] = Field(
501
+ default=None,
502
+ description="Optional model name (ignored by this server; use /switch-model)."
503
+ )
504
 
505
  model_config = {
506
  "json_schema_extra": {
 
900
  raise HTTPException(status_code=500, detail="Current model not loaded")
901
 
902
  # Forward to llama-server using aiohttp
903
+ payload = {
904
+ "messages": request.messages,
905
+ "max_tokens": request.max_tokens,
906
+ "temperature": request.temperature,
907
+ }
908
  async with http_session.post(
909
  f"{cached_model.url}/v1/chat/completions",
910
+ json=payload
 
 
 
 
911
  ) as response:
912
+ if response.status >= 400:
913
+ error_text = await response.text()
914
+ logger.error(
915
+ f"request_id={request_id} llama-server {response.status} "
916
+ f"error_body={error_text[:1000]}"
917
+ )
918
+ response.raise_for_status()
919
  result = await response.json()
920
 
921
  # Update metrics