Spaces:
Paused
Paused
Peter Larnholt
commited on
Commit
·
fa4aba4
1
Parent(s):
0e074c5
Add outlines package and fix Gradio message format compatibility
Browse files- Add outlines>=0.0.37 for complete guided decoding support
- Strip Gradio-specific fields (metadata, options) from messages
before sending to vLLM OpenAI-compatible API
- This fixes the 400 Bad Request error from vLLM
- app.py +3 -1
- requirements.txt +1 -0
app.py
CHANGED
|
@@ -86,7 +86,9 @@ def ensure_ready():
|
|
| 86 |
def chat_fn(user_message: str, history: list[dict]):
|
| 87 |
if not ensure_ready():
|
| 88 |
return "⏳ Model is loading… please retry shortly."
|
| 89 |
-
|
|
|
|
|
|
|
| 90 |
payload = {"model":"excom-ai","messages":messages,"temperature":0.4}
|
| 91 |
r = requests.post(f"http://127.0.0.1:{API_PORT}/v1/chat/completions", json=payload, timeout=600)
|
| 92 |
if not r.ok:
|
|
|
|
| 86 |
def chat_fn(user_message: str, history: list[dict]):
|
| 87 |
if not ensure_ready():
|
| 88 |
return "⏳ Model is loading… please retry shortly."
|
| 89 |
+
# Strip Gradio-specific fields (metadata, options) for OpenAI compatibility
|
| 90 |
+
clean_history = [{"role": m["role"], "content": m["content"]} for m in history]
|
| 91 |
+
messages = [{"role":"system","content":SYSTEM_PROMPT}] + clean_history + [{"role":"user","content":user_message}]
|
| 92 |
payload = {"model":"excom-ai","messages":messages,"temperature":0.4}
|
| 93 |
r = requests.post(f"http://127.0.0.1:{API_PORT}/v1/chat/completions", json=payload, timeout=600)
|
| 94 |
if not r.ok:
|
requirements.txt
CHANGED
|
@@ -11,4 +11,5 @@ transformers>=4.44
|
|
| 11 |
accelerate>=0.30
|
| 12 |
|
| 13 |
# Required for vLLM's outlines guided decoding backend
|
|
|
|
| 14 |
airportsdata>=20240400
|
|
|
|
| 11 |
accelerate>=0.30
|
| 12 |
|
| 13 |
# Required for vLLM's outlines guided decoding backend
|
| 14 |
+
outlines>=0.0.37
|
| 15 |
airportsdata>=20240400
|