Elfsong commited on
Commit
034dd7d
·
1 Parent(s): 12a6f32

refactor: Simplify model configuration by replacing dynamic GPU mapping with a static dictionary, and enhance bot response function to include a seed value for reproducibility in responses.

Browse files
Files changed (1) hide show
  1. app.py +17 -52
app.py CHANGED
@@ -5,63 +5,26 @@
5
 
6
  import os
7
  import json
 
8
  import datetime
9
  import gradio as gr
10
  import pandas as pd
11
- import subprocess
12
- import time
13
  from pathlib import Path
14
  from huggingface_hub import CommitScheduler
15
  from huggingface_hub import InferenceClient
16
 
17
  HF_TOKEN = os.getenv("HF_TOKEN")
18
 
19
- MODELS = dict()
20
-
21
- # Launch models via vLLM
22
- model_gpu_mapping = [
23
- # (0, 1000),
24
- # (0, 1500),
25
- # (1, 2000),
26
- # (1, 2500),
27
- # (2, 3000),
28
- # (2, 3500),
29
- (2, 4000),
30
- # (3, 4500),
31
- (2, 5000),
32
- # (4, 5500),
33
- (3, 6000),
34
- # (5, 6500),
35
- (3, 7000),
36
- # (6, 7500),
37
- ]
38
-
39
- for index, (gpu_id, iter_num) in enumerate(model_gpu_mapping):
40
- formatted_iter_num = f"{iter_num:07d}"
41
- model_name = f"Elfsong/VLM_stage_2_iter_{formatted_iter_num}"
42
- arena_key = f"Local-Model-{iter_num:05d}"
43
-
44
- port = 9000 + index
45
- print(f"🚀 Launching {model_name} on port {port} (GPU {gpu_id}) ...")
46
- log_file = open(f"./logs/vllm_{formatted_iter_num}.log", "w")
47
-
48
- subprocess.Popen(
49
- [
50
- "python", "-m", "vllm.entrypoints.openai.api_server",
51
- "--model", model_name,
52
- "--port", str(port),
53
- "--quantization", "bitsandbytes",
54
- "--gpu-memory-utilization", "0.3",
55
- "--trust-remote-code",
56
- ],
57
- env={**os.environ, "CUDA_VISIBLE_DEVICES": str(gpu_id)},
58
- stdout=log_file,
59
- stderr=log_file,
60
- )
61
-
62
- time.sleep(5) # Wait for initialization
63
- MODELS[arena_key] = f"http://localhost:{port}/v1"
64
- print(f"✅ Launched {len(MODELS)} models. Check logs in ./logs/ directory.")
65
 
66
  DATA_DIR = Path("logs")
67
  DATA_DIR.mkdir(exist_ok=True)
@@ -88,7 +51,7 @@ def save_feedback(model_name, history, feedback_data: gr.LikeData):
88
 
89
  print(f"Feedback logged for {model_name}")
90
 
91
- def bot_response(user_message, history, model_name, system_message, thinking_mode, max_tokens, temperature, top_p):
92
  if not user_message or user_message.strip() == "":
93
  yield history, ""
94
  return
@@ -114,6 +77,7 @@ def bot_response(user_message, history, model_name, system_message, thinking_mod
114
  temperature=temperature,
115
  top_p=top_p,
116
  model=model_name,
 
117
  )
118
 
119
  response_text = ""
@@ -145,6 +109,7 @@ with gr.Blocks() as demo:
145
  max_t = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens")
146
  temp = gr.Slider(minimum=0.0, maximum=2.0, value=0.0, step=0.05, label="Temperature")
147
  top_p_val = gr.Slider(minimum=0.0, maximum=1.0, value=1.0, step=0.05, label="Top-p")
 
148
 
149
  gr.Markdown("# ⚔️ Chatbot Arena")
150
 
@@ -164,12 +129,12 @@ with gr.Blocks() as demo:
164
  btn_b = gr.Button("Send to Model B")
165
 
166
  # --- Bind Events ---
167
- a_inputs = [msg_a, chatbot_a, model_a_name, system_msg, thinking_mode, max_t, temp, top_p_val]
168
  msg_a.submit(bot_response, a_inputs, [chatbot_a, msg_a])
169
  btn_a.click(bot_response, a_inputs, [chatbot_a, msg_a])
170
  chatbot_a.like(save_feedback, [model_a_name, chatbot_a], None)
171
 
172
- b_inputs = [msg_b, chatbot_b, model_b_name, system_msg, thinking_mode, max_t, temp, top_p_val]
173
  msg_b.submit(bot_response, b_inputs, [chatbot_b, msg_b])
174
  btn_b.click(bot_response, b_inputs, [chatbot_b, msg_b])
175
  chatbot_b.like(save_feedback, [model_b_name, chatbot_b], None)
@@ -185,4 +150,4 @@ with gr.Blocks() as demo:
185
  )
186
 
187
  if __name__ == "__main__":
188
- demo.launch(server_name="0.0.0.0", share=False)
 
5
 
6
  import os
7
  import json
8
+ import random
9
  import datetime
10
  import gradio as gr
11
  import pandas as pd
 
 
12
  from pathlib import Path
13
  from huggingface_hub import CommitScheduler
14
  from huggingface_hub import InferenceClient
15
 
16
  HF_TOKEN = os.getenv("HF_TOKEN")
17
 
18
+ # Model configuration - these should match the models launched by launch_models.py
19
+ MODELS = {
20
+ "Local-Model-00500": "http://localhost:9000/v1",
21
+ "Local-Model-01000": "http://localhost:9001/v1",
22
+ "Local-Model-01500": "http://localhost:9002/v1",
23
+ "Local-Model-02000": "http://localhost:9003/v1",
24
+ "Local-Model-02500": "http://localhost:9004/v1",
25
+ "Local-Model-03000": "http://localhost:9005/v1",
26
+ "Local-Model-03500": "http://localhost:9006/v1",
27
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  DATA_DIR = Path("logs")
30
  DATA_DIR.mkdir(exist_ok=True)
 
51
 
52
  print(f"Feedback logged for {model_name}")
53
 
54
+ def bot_response(user_message, history, model_name, system_message, thinking_mode, max_tokens, temperature, top_p, seed_val):
55
  if not user_message or user_message.strip() == "":
56
  yield history, ""
57
  return
 
77
  temperature=temperature,
78
  top_p=top_p,
79
  model=model_name,
80
+ seed=seed_val,
81
  )
82
 
83
  response_text = ""
 
109
  max_t = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens")
110
  temp = gr.Slider(minimum=0.0, maximum=2.0, value=0.0, step=0.05, label="Temperature")
111
  top_p_val = gr.Slider(minimum=0.0, maximum=1.0, value=1.0, step=0.05, label="Top-p")
112
+ seed_val = gr.Slider(minimum=-1, maximum=4294967295, value=random.randint(0, 4294967295), step=1, label="Seed")
113
 
114
  gr.Markdown("# ⚔️ Chatbot Arena")
115
 
 
129
  btn_b = gr.Button("Send to Model B")
130
 
131
  # --- Bind Events ---
132
+ a_inputs = [msg_a, chatbot_a, model_a_name, system_msg, thinking_mode, max_t, temp, top_p_val, seed_val]
133
  msg_a.submit(bot_response, a_inputs, [chatbot_a, msg_a])
134
  btn_a.click(bot_response, a_inputs, [chatbot_a, msg_a])
135
  chatbot_a.like(save_feedback, [model_a_name, chatbot_a], None)
136
 
137
+ b_inputs = [msg_b, chatbot_b, model_b_name, system_msg, thinking_mode, max_t, temp, top_p_val, seed_val]
138
  msg_b.submit(bot_response, b_inputs, [chatbot_b, msg_b])
139
  btn_b.click(bot_response, b_inputs, [chatbot_b, msg_b])
140
  chatbot_b.like(save_feedback, [model_b_name, chatbot_b], None)
 
150
  )
151
 
152
  if __name__ == "__main__":
153
+ demo.launch(share=True)