# coding: utf-8 # Author: Du Mingzhe (dumingzhex@gmail.com) # Date: 2025-12-21 import os import json import datetime import gradio as gr import pandas as pd import subprocess import time from pathlib import Path from huggingface_hub import CommitScheduler from huggingface_hub import InferenceClient HF_TOKEN = os.getenv("HF_TOKEN") MODELS = dict() # Launch models via vLLM model_gpu_mapping = [ (0, 1000), (0, 1500), (1, 2000), (1, 2500), (2, 3000), (2, 3500), (3, 4000), (3, 4500), (4, 5000), (4, 5500), (5, 6000), (5, 6500), (6, 7000), (6, 7500), ] for index, (gpu_id, iter_num) in enumerate(model_gpu_mapping): formatted_iter_num = f"{iter_num:07d}" model_name = f"Elfsong/VLM_stage_2_iter_{formatted_iter_num}" arena_key = f"Local-Model-{iter_num:05d}" port = 9000 + index print(f"🚀 Launching {model_name} on port {port} (GPU {gpu_id}) ...") log_file = open(f"./logs/vllm_{formatted_iter_num}.log", "w") subprocess.Popen( [ "python", "-m", "vllm.entrypoints.openai.api_server", "--model", model_name, "--port", str(port), "--quantization", "bitsandbytes", "--gpu-memory-utilization", "0.4", "--trust-remote-code", ], env={**os.environ, "CUDA_VISIBLE_DEVICES": str(gpu_id)}, stdout=log_file, stderr=log_file, ) time.sleep(5) # Wait for initialization MODELS[arena_key] = f"http://localhost:{port}/v1" print(f"✅ Launched {len(MODELS)} models. Check logs in ./logs/ directory.") DATA_DIR = Path("logs") DATA_DIR.mkdir(exist_ok=True) FEEDBACK_FILE = DATA_DIR / "feedback.jsonl" scheduler = CommitScheduler( repo_id="Elfsong/arena_feedback", repo_type="dataset", folder_path=DATA_DIR, every=5, # Sync every 5 minutes ) def save_feedback(model_name, history, feedback_data: gr.LikeData): new_entry = { "timestamp": datetime.datetime.now().isoformat(), "model_name": model_name, "message_index": feedback_data.index, "vote": feedback_data.value, "is_liked": feedback_data.liked, "conversation": history } with open(FEEDBACK_FILE, "a", encoding="utf-8") as f: f.write(json.dumps(new_entry, ensure_ascii=False) + "\n") print(f"Feedback logged for {model_name}") def bot_response(user_message, history, model_name, system_message, thinking_mode, max_tokens, temperature, top_p): if not user_message or user_message.strip() == "": yield history, "" return token = HF_TOKEN if model_name.startswith("Local-"): local_endpoint = MODELS.get(model_name) client = InferenceClient(base_url=local_endpoint, token="vllm-token") else: client = InferenceClient(token=token, model=model_name) history.append({"role": "user", "content": user_message}) history.append({"role": "assistant", "content": ""}) api_messages = [{"role": "system", "content": system_message + "/set think" if thinking_mode else "/set nothink"}] + history[:-1] try: stream = client.chat_completion( api_messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, model=model_name, ) response_text = "" for chunk in stream: if not chunk.choices or len(chunk.choices) == 0: continue token_content = chunk.choices[0].delta.content if token_content is not None: response_text += token_content history[-1]["content"] = response_text # Continuously yield update UI, while keeping input box unavailable to prevent double clicks yield history, gr.update(interactive=False) except Exception as e: # If error, display error message in assistant dialog history[-1]["content"] = f"**Error:** {str(e)}" # --- Final Yield: Restore input box availability and clear content --- yield history, gr.update(value="", interactive=True) with gr.Blocks() as demo: with gr.Sidebar(): gr.Markdown("## Configuration") # gr.LoginButton() system_msg = gr.Textbox(value="You are a helpful assistant.", label="System Prompt") thinking_mode = gr.Checkbox(value=False, label="Thinking Mode") max_t = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens") temp = gr.Slider(minimum=0.0, maximum=2.0, value=0.0, step=0.05, label="Temperature") top_p_val = gr.Slider(minimum=0.0, maximum=1.0, value=1.0, step=0.05, label="Top-p") gr.Markdown("# ⚔️ Chatbot Arena") with gr.Row(): # --- Model A --- with gr.Column(): model_a_name = gr.Dropdown(list(MODELS.keys()), label="Model A", value=list(MODELS.keys())[0]) chatbot_a = gr.Chatbot(label="Model A Output") msg_a = gr.Textbox(placeholder="Send message to Model A...", label="Model A Input") btn_a = gr.Button("Send to Model A") # --- Model B --- with gr.Column(): model_b_name = gr.Dropdown(list(MODELS.keys()), label="Model B", value=list(MODELS.keys())[-1]) chatbot_b = gr.Chatbot(label="Model B Output") msg_b = gr.Textbox(placeholder="Send message to Model B...", label="Model B Input") btn_b = gr.Button("Send to Model B") # --- Bind Events --- a_inputs = [msg_a, chatbot_a, model_a_name, system_msg, thinking_mode, max_t, temp, top_p_val] msg_a.submit(bot_response, a_inputs, [chatbot_a, msg_a]) btn_a.click(bot_response, a_inputs, [chatbot_a, msg_a]) chatbot_a.like(save_feedback, [model_a_name, chatbot_a], None) b_inputs = [msg_b, chatbot_b, model_b_name, system_msg, thinking_mode, max_t, temp, top_p_val] msg_b.submit(bot_response, b_inputs, [chatbot_b, msg_b]) btn_b.click(bot_response, b_inputs, [chatbot_b, msg_b]) chatbot_b.like(save_feedback, [model_b_name, chatbot_b], None) def clear_chats(): return [], [] gr.Button("🗑️ Clear Chats").click( fn=clear_chats, inputs=None, outputs=[chatbot_a, chatbot_b] ) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", share=False)