Spaces:
Paused
Paused
Peter Larnholt
commited on
Commit
·
65659d1
1
Parent(s):
fa4aba4
Upgrade vLLM to 0.6.4.post1 and remove explicit outlines dependencies
Browse files- Upgrade from 0.6.3.post1 to 0.6.4.post1 for bug fixes and stability
- Remove explicit outlines/airportsdata - let vLLM manage its own deps
- vLLM 0.6.4.post1 has better outlines integration and may fix the
silent 500 error during text generation
- app.py +2 -1
- requirements.txt +1 -5
app.py
CHANGED
|
@@ -32,7 +32,8 @@ if "AWQ" in MODEL_ID.upper():
|
|
| 32 |
|
| 33 |
def launch_vllm():
|
| 34 |
print(f"[vLLM] Launch: {MODEL_ID}")
|
| 35 |
-
|
|
|
|
| 36 |
|
| 37 |
def wait_vllm_ready(timeout=900, interval=3):
|
| 38 |
url = f"http://127.0.0.1:{API_PORT}/v1/models"
|
|
|
|
| 32 |
|
| 33 |
def launch_vllm():
|
| 34 |
print(f"[vLLM] Launch: {MODEL_ID}")
|
| 35 |
+
# Capture stderr to see any crashes/errors during generation
|
| 36 |
+
subprocess.Popen(VLLM_ARGS, stderr=subprocess.STDOUT)
|
| 37 |
|
| 38 |
def wait_vllm_ready(timeout=900, interval=3):
|
| 39 |
url = f"http://127.0.0.1:{API_PORT}/v1/models"
|
requirements.txt
CHANGED
|
@@ -4,12 +4,8 @@ gradio>=4.38
|
|
| 4 |
requests>=2.31
|
| 5 |
|
| 6 |
# vLLM + CUDA 12.1
|
| 7 |
-
vllm==0.6.
|
| 8 |
--extra-index-url https://download.pytorch.org/whl/cu121
|
| 9 |
torch==2.4.0
|
| 10 |
transformers>=4.44
|
| 11 |
accelerate>=0.30
|
| 12 |
-
|
| 13 |
-
# Required for vLLM's outlines guided decoding backend
|
| 14 |
-
outlines>=0.0.37
|
| 15 |
-
airportsdata>=20240400
|
|
|
|
| 4 |
requests>=2.31
|
| 5 |
|
| 6 |
# vLLM + CUDA 12.1
|
| 7 |
+
vllm==0.6.4.post1
|
| 8 |
--extra-index-url https://download.pytorch.org/whl/cu121
|
| 9 |
torch==2.4.0
|
| 10 |
transformers>=4.44
|
| 11 |
accelerate>=0.30
|
|
|
|
|
|
|
|
|
|
|
|