Elfsong commited on
Commit
12a6f32
·
1 Parent(s): 24170ac

chore: Update GPU mapping for model launches by commenting out unused configurations and adjust GPU memory utilization to 0.3. Add additional dependencies to requirements.txt for enhanced functionality.

Browse files
Files changed (2) hide show
  1. app.py +15 -8
  2. requirements.txt +7 -1
app.py CHANGED
@@ -20,13 +20,20 @@ MODELS = dict()
20
 
21
  # Launch models via vLLM
22
  model_gpu_mapping = [
23
- (0, 1000), (0, 1500),
24
- (1, 2000), (1, 2500),
25
- (2, 3000), (2, 3500),
26
- (3, 4000), (3, 4500),
27
- (4, 5000), (4, 5500),
28
- (5, 6000), (5, 6500),
29
- (6, 7000), (6, 7500),
 
 
 
 
 
 
 
30
  ]
31
 
32
  for index, (gpu_id, iter_num) in enumerate(model_gpu_mapping):
@@ -44,7 +51,7 @@ for index, (gpu_id, iter_num) in enumerate(model_gpu_mapping):
44
  "--model", model_name,
45
  "--port", str(port),
46
  "--quantization", "bitsandbytes",
47
- "--gpu-memory-utilization", "0.4",
48
  "--trust-remote-code",
49
  ],
50
  env={**os.environ, "CUDA_VISIBLE_DEVICES": str(gpu_id)},
 
20
 
21
  # Launch models via vLLM
22
  model_gpu_mapping = [
23
+ # (0, 1000),
24
+ # (0, 1500),
25
+ # (1, 2000),
26
+ # (1, 2500),
27
+ # (2, 3000),
28
+ # (2, 3500),
29
+ (2, 4000),
30
+ # (3, 4500),
31
+ (2, 5000),
32
+ # (4, 5500),
33
+ (3, 6000),
34
+ # (5, 6500),
35
+ (3, 7000),
36
+ # (6, 7500),
37
  ]
38
 
39
  for index, (gpu_id, iter_num) in enumerate(model_gpu_mapping):
 
51
  "--model", model_name,
52
  "--port", str(port),
53
  "--quantization", "bitsandbytes",
54
+ "--gpu-memory-utilization", "0.3",
55
  "--trust-remote-code",
56
  ],
57
  env={**os.environ, "CUDA_VISIBLE_DEVICES": str(gpu_id)},
requirements.txt CHANGED
@@ -1,2 +1,8 @@
1
  gradio
2
- gradio[oauth]
 
 
 
 
 
 
 
1
  gradio
2
+ gradio[oauth]
3
+ vllm
4
+ bitsandbytes
5
+ transformers
6
+ datasets
7
+ ninja
8
+ flash-attn