Spaces:
Paused
Paused
Peter Larnholt
commited on
Commit
·
4142581
1
Parent(s):
82c6e6d
Remove guided decoding backend override and add airportsdata dependency
Browse files- app.py +0 -1
- requirements.txt +3 -0
app.py
CHANGED
|
@@ -27,7 +27,6 @@ VLLM_ARGS = [
|
|
| 27 |
"--gpu-memory-utilization", "0.90",
|
| 28 |
"--trust-remote-code",
|
| 29 |
"--disable-log-requests", # reduce log noise
|
| 30 |
-
"--guided-decoding-backend", "lm-format-enforcer", # avoid outlines issues
|
| 31 |
]
|
| 32 |
if "AWQ" in MODEL_ID.upper():
|
| 33 |
VLLM_ARGS += ["--quantization", "awq_marlin"] # faster AWQ kernel if available
|
|
|
|
| 27 |
"--gpu-memory-utilization", "0.90",
|
| 28 |
"--trust-remote-code",
|
| 29 |
"--disable-log-requests", # reduce log noise
|
|
|
|
| 30 |
]
|
| 31 |
if "AWQ" in MODEL_ID.upper():
|
| 32 |
VLLM_ARGS += ["--quantization", "awq_marlin"] # faster AWQ kernel if available
|
requirements.txt
CHANGED
|
@@ -9,3 +9,6 @@ vllm==0.6.3.post1
|
|
| 9 |
torch==2.4.0
|
| 10 |
transformers>=4.44
|
| 11 |
accelerate>=0.30
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
torch==2.4.0
|
| 10 |
transformers>=4.44
|
| 11 |
accelerate>=0.30
|
| 12 |
+
|
| 13 |
+
# Required for vLLM guided decoding (even if not actively used)
|
| 14 |
+
airportsdata>=20240400
|