Peter Larnholt commited on
Commit
65659d1
·
1 Parent(s): fa4aba4

Upgrade vLLM to 0.6.4.post1 and remove explicit outlines dependencies

Browse files

- Upgrade from 0.6.3.post1 to 0.6.4.post1 for bug fixes and stability
- Remove explicit outlines/airportsdata - let vLLM manage its own deps
- vLLM 0.6.4.post1 has better outlines integration and may fix the
silent 500 error during text generation

Files changed (2) hide show
  1. app.py +2 -1
  2. requirements.txt +1 -5
app.py CHANGED
@@ -32,7 +32,8 @@ if "AWQ" in MODEL_ID.upper():
32
 
33
  def launch_vllm():
34
  print(f"[vLLM] Launch: {MODEL_ID}")
35
- subprocess.Popen(VLLM_ARGS)
 
36
 
37
  def wait_vllm_ready(timeout=900, interval=3):
38
  url = f"http://127.0.0.1:{API_PORT}/v1/models"
 
32
 
33
  def launch_vllm():
34
  print(f"[vLLM] Launch: {MODEL_ID}")
35
+ # Capture stderr to see any crashes/errors during generation
36
+ subprocess.Popen(VLLM_ARGS, stderr=subprocess.STDOUT)
37
 
38
  def wait_vllm_ready(timeout=900, interval=3):
39
  url = f"http://127.0.0.1:{API_PORT}/v1/models"
requirements.txt CHANGED
@@ -4,12 +4,8 @@ gradio>=4.38
4
  requests>=2.31
5
 
6
  # vLLM + CUDA 12.1
7
- vllm==0.6.3.post1
8
  --extra-index-url https://download.pytorch.org/whl/cu121
9
  torch==2.4.0
10
  transformers>=4.44
11
  accelerate>=0.30
12
-
13
- # Required for vLLM's outlines guided decoding backend
14
- outlines>=0.0.37
15
- airportsdata>=20240400
 
4
  requests>=2.31
5
 
6
  # vLLM + CUDA 12.1
7
+ vllm==0.6.4.post1
8
  --extra-index-url https://download.pytorch.org/whl/cu121
9
  torch==2.4.0
10
  transformers>=4.44
11
  accelerate>=0.30