Peter Larnholt commited on
Commit
82c6e6d
·
1 Parent(s): 952d32f

Switch guided decoding backend to lm-format-enforcer to avoid outlines issues

Browse files
Files changed (1) hide show
  1. app.py +2 -0
app.py CHANGED
@@ -26,6 +26,8 @@ VLLM_ARGS = [
26
  "--max-model-len", "8192", # fits A10G 24GB
27
  "--gpu-memory-utilization", "0.90",
28
  "--trust-remote-code",
 
 
29
  ]
30
  if "AWQ" in MODEL_ID.upper():
31
  VLLM_ARGS += ["--quantization", "awq_marlin"] # faster AWQ kernel if available
 
26
  "--max-model-len", "8192", # fits A10G 24GB
27
  "--gpu-memory-utilization", "0.90",
28
  "--trust-remote-code",
29
+ "--disable-log-requests", # reduce log noise
30
+ "--guided-decoding-backend", "lm-format-enforcer", # avoid outlines issues
31
  ]
32
  if "AWQ" in MODEL_ID.upper():
33
  VLLM_ARGS += ["--quantization", "awq_marlin"] # faster AWQ kernel if available