Spaces:
Paused
Paused
Peter Larnholt
commited on
Commit
·
2e9c870
1
Parent(s):
3356350
Fix invalid --disable-guided-decoding flag and add airportsdata dependency
Browse filesThe --disable-guided-decoding flag doesn't exist in vLLM 0.6.3.post1.
Instead, ensure outlines backend works properly by adding airportsdata
dependency which is required for guided decoding imports.
- app.py +0 -1
- requirements.txt +3 -0
app.py
CHANGED
|
@@ -27,7 +27,6 @@ VLLM_ARGS = [
|
|
| 27 |
"--gpu-memory-utilization", "0.90",
|
| 28 |
"--trust-remote-code",
|
| 29 |
"--disable-log-requests", # reduce log noise
|
| 30 |
-
"--disable-guided-decoding", # skip guided decoding (outlines) to avoid import issues
|
| 31 |
]
|
| 32 |
if "AWQ" in MODEL_ID.upper():
|
| 33 |
VLLM_ARGS += ["--quantization", "awq_marlin"] # faster AWQ kernel if available
|
|
|
|
| 27 |
"--gpu-memory-utilization", "0.90",
|
| 28 |
"--trust-remote-code",
|
| 29 |
"--disable-log-requests", # reduce log noise
|
|
|
|
| 30 |
]
|
| 31 |
if "AWQ" in MODEL_ID.upper():
|
| 32 |
VLLM_ARGS += ["--quantization", "awq_marlin"] # faster AWQ kernel if available
|
requirements.txt
CHANGED
|
@@ -9,3 +9,6 @@ vllm==0.6.3.post1
|
|
| 9 |
torch==2.4.0
|
| 10 |
transformers>=4.44
|
| 11 |
accelerate>=0.30
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
torch==2.4.0
|
| 10 |
transformers>=4.44
|
| 11 |
accelerate>=0.30
|
| 12 |
+
|
| 13 |
+
# Required for vLLM's outlines guided decoding backend
|
| 14 |
+
airportsdata>=20240400
|