Peter Larnholt commited on
Commit
fa4aba4
·
1 Parent(s): 0e074c5

Add outlines package and fix Gradio message format compatibility

Browse files

- Add outlines>=0.0.37 for complete guided decoding support
- Strip Gradio-specific fields (metadata, options) from messages
before sending to vLLM OpenAI-compatible API
- This fixes the 400 Bad Request error from vLLM

Files changed (2) hide show
  1. app.py +3 -1
  2. requirements.txt +1 -0
app.py CHANGED
@@ -86,7 +86,9 @@ def ensure_ready():
86
  def chat_fn(user_message: str, history: list[dict]):
87
  if not ensure_ready():
88
  return "⏳ Model is loading… please retry shortly."
89
- messages = [{"role":"system","content":SYSTEM_PROMPT}] + history + [{"role":"user","content":user_message}]
 
 
90
  payload = {"model":"excom-ai","messages":messages,"temperature":0.4}
91
  r = requests.post(f"http://127.0.0.1:{API_PORT}/v1/chat/completions", json=payload, timeout=600)
92
  if not r.ok:
 
86
  def chat_fn(user_message: str, history: list[dict]):
87
  if not ensure_ready():
88
  return "⏳ Model is loading… please retry shortly."
89
+ # Strip Gradio-specific fields (metadata, options) for OpenAI compatibility
90
+ clean_history = [{"role": m["role"], "content": m["content"]} for m in history]
91
+ messages = [{"role":"system","content":SYSTEM_PROMPT}] + clean_history + [{"role":"user","content":user_message}]
92
  payload = {"model":"excom-ai","messages":messages,"temperature":0.4}
93
  r = requests.post(f"http://127.0.0.1:{API_PORT}/v1/chat/completions", json=payload, timeout=600)
94
  if not r.ok:
requirements.txt CHANGED
@@ -11,4 +11,5 @@ transformers>=4.44
11
  accelerate>=0.30
12
 
13
  # Required for vLLM's outlines guided decoding backend
 
14
  airportsdata>=20240400
 
11
  accelerate>=0.30
12
 
13
  # Required for vLLM's outlines guided decoding backend
14
+ outlines>=0.0.37
15
  airportsdata>=20240400