Spaces:

plarnholt
/

excom-ai-demo

Paused

Peter Larnholt commited on Oct 9

Commit

82c6e6d

1 Parent(s): 952d32f

Switch guided decoding backend to lm-format-enforcer to avoid outlines issues

Files changed (1) hide show

app.py CHANGED Viewed

@@ -26,6 +26,8 @@ VLLM_ARGS = [
     "--max-model-len", "8192",               # fits A10G 24GB
     "--gpu-memory-utilization", "0.90",
     "--trust-remote-code",
 ]
 if "AWQ" in MODEL_ID.upper():
     VLLM_ARGS += ["--quantization", "awq_marlin"]  # faster AWQ kernel if available

     "--max-model-len", "8192",               # fits A10G 24GB
     "--gpu-memory-utilization", "0.90",
     "--trust-remote-code",
+    "--disable-log-requests",                # reduce log noise
+    "--guided-decoding-backend", "lm-format-enforcer",  # avoid outlines issues
 ]
 if "AWQ" in MODEL_ID.upper():
     VLLM_ARGS += ["--quantization", "awq_marlin"]  # faster AWQ kernel if available