# coding: utf-8

# Author: Du Mingzhe (dumingzhex@gmail.com)
# Date: 2025-12-21

import os
import json
import datetime
import gradio as gr
import pandas as pd
import subprocess
import time
from pathlib import Path
from huggingface_hub import CommitScheduler
from huggingface_hub import InferenceClient

HF_TOKEN = os.getenv("HF_TOKEN")

MODELS = dict()

# Launch models via vLLM
model_gpu_mapping = [
    (0, 1000), (0, 1500),
    (1, 2000), (1, 2500),
    (2, 3000), (2, 3500),
    (3, 4000), (3, 4500),
    (4, 5000), (4, 5500),
    (5, 6000), (5, 6500),
    (6, 7000), (6, 7500),
]

for index, (gpu_id, iter_num) in enumerate(model_gpu_mapping):
    formatted_iter_num = f"{iter_num:07d}"
    model_name = f"Elfsong/VLM_stage_2_iter_{formatted_iter_num}"
    arena_key = f"Local-Model-{iter_num:05d}"

    port = 9000 + index
    print(f"🚀 Launching {model_name} on port {port} (GPU {gpu_id}) ...")
    log_file = open(f"./logs/vllm_{formatted_iter_num}.log", "w")
    
    subprocess.Popen(
        [
            "python", "-m", "vllm.entrypoints.openai.api_server",
            "--model", model_name,
            "--port", str(port),
            "--quantization", "bitsandbytes",
            "--gpu-memory-utilization", "0.4",
            "--trust-remote-code",
        ],
        env={**os.environ, "CUDA_VISIBLE_DEVICES": str(gpu_id)},
        stdout=log_file,
        stderr=log_file,
    )

    time.sleep(5) # Wait for initialization
    MODELS[arena_key] = f"http://localhost:{port}/v1"
print(f"✅ Launched {len(MODELS)} models. Check logs in ./logs/ directory.")

DATA_DIR = Path("logs")
DATA_DIR.mkdir(exist_ok=True)
FEEDBACK_FILE = DATA_DIR / "feedback.jsonl"

scheduler = CommitScheduler(
    repo_id="Elfsong/arena_feedback",
    repo_type="dataset",
    folder_path=DATA_DIR,
    every=5, # Sync every 5 minutes
)

def save_feedback(model_name, history, feedback_data: gr.LikeData):
    new_entry = {
        "timestamp": datetime.datetime.now().isoformat(),
        "model_name": model_name,
        "message_index": feedback_data.index,
        "vote": feedback_data.value,
        "is_liked": feedback_data.liked,
        "conversation": history
    }
    with open(FEEDBACK_FILE, "a", encoding="utf-8") as f:
        f.write(json.dumps(new_entry, ensure_ascii=False) + "\n")
    
    print(f"Feedback logged for {model_name}")

def bot_response(user_message, history, model_name, system_message, thinking_mode, max_tokens, temperature, top_p):
    if not user_message or user_message.strip() == "":
        yield history, ""
        return
        
    token = HF_TOKEN
    
    if model_name.startswith("Local-"):
        local_endpoint = MODELS.get(model_name)
        client = InferenceClient(base_url=local_endpoint, token="vllm-token")
    else:
        client = InferenceClient(token=token, model=model_name)
    
    history.append({"role": "user", "content": user_message})
    history.append({"role": "assistant", "content": ""})
    
    api_messages = [{"role": "system", "content": system_message + "/set think" if thinking_mode else "/set nothink"}] + history[:-1]

    try:
        stream = client.chat_completion(
            api_messages,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
            model=model_name,
        )
        
        response_text = ""
        for chunk in stream:
            if not chunk.choices or len(chunk.choices) == 0:
                continue
            
            token_content = chunk.choices[0].delta.content
            if token_content is not None:
                response_text += token_content
                history[-1]["content"] = response_text
                # Continuously yield update UI, while keeping input box unavailable to prevent double clicks
                yield history, gr.update(interactive=False)
            
    except Exception as e:
        # If error, display error message in assistant dialog
        history[-1]["content"] = f"**Error:** {str(e)}"
    
    # --- Final Yield: Restore input box availability and clear content ---
    yield history, gr.update(value="", interactive=True)

with gr.Blocks() as demo:
    with gr.Sidebar():
        gr.Markdown("## Configuration")
        # gr.LoginButton()

        system_msg = gr.Textbox(value="You are a helpful assistant.", label="System Prompt")
        thinking_mode = gr.Checkbox(value=False, label="Thinking Mode")
        max_t = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens")
        temp = gr.Slider(minimum=0.0, maximum=2.0, value=0.0, step=0.05, label="Temperature")
        top_p_val = gr.Slider(minimum=0.0, maximum=1.0, value=1.0, step=0.05, label="Top-p")

    gr.Markdown("# ⚔️ Chatbot Arena")

    with gr.Row():
        # --- Model A ---
        with gr.Column():
            model_a_name = gr.Dropdown(list(MODELS.keys()), label="Model A", value=list(MODELS.keys())[0])
            chatbot_a = gr.Chatbot(label="Model A Output")
            msg_a = gr.Textbox(placeholder="Send message to Model A...", label="Model A Input")
            btn_a = gr.Button("Send to Model A")

        # --- Model B ---
        with gr.Column():
            model_b_name = gr.Dropdown(list(MODELS.keys()), label="Model B", value=list(MODELS.keys())[-1])
            chatbot_b = gr.Chatbot(label="Model B Output")
            msg_b = gr.Textbox(placeholder="Send message to Model B...", label="Model B Input")
            btn_b = gr.Button("Send to Model B")

    # --- Bind Events ---
    a_inputs = [msg_a, chatbot_a, model_a_name, system_msg, thinking_mode, max_t, temp, top_p_val]
    msg_a.submit(bot_response, a_inputs, [chatbot_a, msg_a])
    btn_a.click(bot_response, a_inputs, [chatbot_a, msg_a])
    chatbot_a.like(save_feedback, [model_a_name, chatbot_a], None)

    b_inputs = [msg_b, chatbot_b, model_b_name, system_msg, thinking_mode, max_t, temp, top_p_val]
    msg_b.submit(bot_response, b_inputs, [chatbot_b, msg_b])
    btn_b.click(bot_response, b_inputs, [chatbot_b, msg_b])
    chatbot_b.like(save_feedback, [model_b_name, chatbot_b], None)
    

    def clear_chats():
        return [], []

    gr.Button("🗑️ Clear Chats").click(
        fn=clear_chats, 
        inputs=None, 
        outputs=[chatbot_a, chatbot_b]
    )

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", share=False)