Spaces:
Sleeping
Sleeping
| import json | |
| import random | |
| import html | |
| import markdown | |
| from typing import List, Dict, Any, Tuple | |
| import gradio as gr | |
| import pandas as pd | |
| from datasets import load_dataset | |
| df = pd.read_json("selected_battles.json") | |
| # load arena battles | |
| # ds = load_dataset("lmarena-ai/arena-human-preference-100k") | |
| # df = ds['train'].to_pandas() | |
| # Expected columns in this dataset family: | |
| # ['question_id','model_a','model_b','winner','conversation_a','conversation_b', | |
| # 'turn','anony','language','tstamp','conv_metadata','is_code','is_refusal', | |
| # 'dedup_tag','category_tag','judge_hash', ...] | |
| # See HF card. ──> winner ∈ {model_a, model_b, tie, both_bad}; conversations are full threads. [oai_citation:1‡Hugging Face](https://huggingface.co/datasets/lmarena-ai/arena-human-preference-100k/blob/c9fe392b54cd08a0fd27777455318bac2e7b495c/README.md?utm_source=chatgpt.com) | |
| # Dropdown options - sorted by frequency | |
| def get_sorted_options(column_name): | |
| if column_name not in df.columns: | |
| return ["(Any)"] | |
| value_counts = df[column_name].dropna().value_counts() | |
| sorted_values = value_counts.index.tolist() | |
| return ["(Any)"] + sorted_values | |
| models_a = get_sorted_options("model_a") | |
| models_b = get_sorted_options("model_b") | |
| languages = get_sorted_options("language") | |
| def _ensure_messages(x: Any) -> List[Dict[str, Any]]: | |
| """ | |
| conversation_a / conversation_b can be: | |
| - a Python list of {role, content} dicts | |
| - a JSON string encoding that list | |
| Normalize to a list of dicts with 'role' and 'content'. | |
| """ | |
| if isinstance(x, list): | |
| return x | |
| if isinstance(x, str): | |
| try: | |
| val = json.loads(x) | |
| if isinstance(val, list): | |
| return val | |
| except Exception: | |
| pass | |
| # Last resort: wrap as a single assistant message | |
| return [{"role": "assistant", "content": str(x)}] | |
| def _winner_text(row: pd.Series) -> str: | |
| w = str(row.get("winner", "")).strip().lower() | |
| mapping = { | |
| "model_a": "Preference: Model A", | |
| "model_b": "Preference: Model B", | |
| "tie": "Preference: Tie", | |
| "both_bad": "Preference: Tie (both bad)", | |
| } | |
| return mapping.get(w, "Preference: (unknown)") | |
| def _bubble_html(messages: List[Dict[str, Any]], side_label: str) -> str: | |
| """ | |
| Make a chat-like interface with proper user/assistant bubbles. | |
| User messages are on the left, assistant messages on the right. | |
| """ | |
| # Tailwind-like inline styles (no external CSS) | |
| css = """ | |
| <style> | |
| .chat-container {padding:12px; border-radius:16px; background:#fafafa; box-shadow:0 1px 3px rgba(0,0,0,.08);} | |
| .model-label {font-weight:600; font-size:14px; margin-bottom:12px; opacity:.8; text-align:center;} | |
| .message {margin:12px 0; display:flex; align-items:flex-start;} | |
| .message.user {justify-content:flex-start;} | |
| .message.assistant {justify-content:flex-end;} | |
| .bubble {max-width:70%; padding:10px 14px; border-radius:18px; word-wrap:break-word;} | |
| .bubble.user {background:#e9eef7; color:#2c3e50; margin-right:auto;} | |
| .bubble.assistant {background:#eaf7ea; color:#2c3e50; margin-left:auto;} | |
| .role-label {font-size:11px; font-weight:500; margin-bottom:4px; opacity:.7;} | |
| .role-label.assistant {text-align:right;} | |
| .bubble pre {background:#f5f5f5; padding:8px; border-radius:4px; overflow-x:auto; margin:8px 0;} | |
| .bubble code {background:#f0f0f0; padding:2px 4px; border-radius:3px; font-family:monospace;} | |
| .bubble p {margin:8px 0;} | |
| .bubble ul, .bubble ol {margin:8px 0; padding-left:20px;} | |
| .bubble blockquote {border-left:3px solid #ddd; padding-left:12px; margin:8px 0; color:#666;} | |
| </style> | |
| """ | |
| body = [f'<div class="chat-container">'] | |
| # Only show model label at top for User side | |
| if side_label != "Assistant": | |
| body.append(f'<div class="model-label">{side_label}</div>') | |
| first_assistant_message = True | |
| for m in messages: | |
| role = (m.get("role") or "").lower() | |
| content = str(m.get("content", "")).strip() | |
| if not content: | |
| continue | |
| # Convert markdown to HTML | |
| try: | |
| rendered_content = markdown.markdown(content, extensions=['fenced_code', 'codehilite', 'tables']) | |
| except: | |
| # Fallback to escaped content if markdown rendering fails | |
| rendered_content = html.escape(content) | |
| if role in ("user", "system"): | |
| role_display = "User" if role == "user" else "System" | |
| body.append(f''' | |
| <div class="message user"> | |
| <div> | |
| <div class="role-label">{role_display}</div> | |
| <div class="bubble user">{rendered_content}</div> | |
| </div> | |
| </div> | |
| ''') | |
| else: | |
| # For assistant messages, include the model name in the first message | |
| if first_assistant_message and side_label == "Assistant": | |
| content = f"{side_label}: {content}" | |
| try: | |
| rendered_content = markdown.markdown(content, extensions=['fenced_code', 'codehilite', 'tables']) | |
| except: | |
| rendered_content = html.escape(content) | |
| first_assistant_message = False | |
| body.append(f''' | |
| <div class="message assistant"> | |
| <div> | |
| <div class="role-label assistant">Assistant</div> | |
| <div class="bubble assistant">{rendered_content}</div> | |
| </div> | |
| </div> | |
| ''') | |
| body.append("</div>") | |
| return css + "\n".join(body) | |
| def filter_df(model_a_sel: str, model_b_sel: str, lang_sel: str) -> pd.DataFrame: | |
| sub = df | |
| if model_a_sel != "(Any)": | |
| sub = sub[sub["model_a"] == model_a_sel] | |
| if model_b_sel != "(Any)": | |
| sub = sub[sub["model_b"] == model_b_sel] | |
| if "language" in sub.columns and lang_sel != "(Any)": | |
| sub = sub[sub["language"].astype(str) == lang_sel] | |
| return sub.reset_index(drop=True) | |
| def format_row(row: pd.Series) -> Tuple[str, str, str, str, str]: | |
| # Prompt headline = first user message if present | |
| msgs_a = _ensure_messages(row["conversation_a"]) | |
| msgs_b = _ensure_messages(row["conversation_b"]) | |
| first_user = "" | |
| for m in msgs_a: | |
| if (m.get("role") or "").lower() == "user": | |
| first_user = str(m.get("content", "")).strip() | |
| break | |
| left = _bubble_html(msgs_a, f"Model A: {row['model_a']}") | |
| right = _bubble_html(msgs_b, f"Model B: {row['model_b']}") | |
| # Create a subtle preference footer with soft yellow background | |
| preference_text = _winner_text(row) | |
| footer_html = f""" | |
| <div style=" | |
| background: #fff8e1; | |
| color: #5d4037; | |
| padding: 10px 16px; | |
| margin: 12px 0; | |
| border-radius: 6px; | |
| font-weight: 600; | |
| font-size: 14px; | |
| text-align: center; | |
| box-shadow: 0 1px 3px rgba(0,0,0,0.08); | |
| border: 1px solid #ffcc02; | |
| "> | |
| {preference_text} | |
| </div> | |
| """ | |
| return "", left, right, footer_html, "" | |
| with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo")) as demo: | |
| gr.Markdown("# Chatbot Arena Battle Viewer (100k)") | |
| gr.Markdown( | |
| "Filter by **Model A**, **Model B**, and **Language**, then browse side-by-side conversations. " | |
| "Data: `lmarena-ai/arena-human-preference-100k`." | |
| ) | |
| with gr.Row(): | |
| dd_a = gr.Dropdown(models_a, label="Model A", value="(Any)") | |
| dd_b = gr.Dropdown(models_b, label="Model B", value="(Any)") | |
| dd_l = gr.Dropdown(languages, label="Language", value=languages[0]) | |
| with gr.Row(): | |
| btn_rand = gr.Button("Random match") | |
| btn_prev = gr.Button("◀ Prev") | |
| btn_next = gr.Button("Next ▶") | |
| st_indices = gr.State([]) | |
| st_ptr = gr.State(0) | |
| header_md = gr.Markdown() | |
| with gr.Row(): | |
| left_html = gr.HTML() | |
| right_html = gr.HTML() | |
| footer_md = gr.HTML() | |
| meta_md = gr.Markdown() | |
| def apply_filters(a, b, l): | |
| sub = filter_df(a, b, l) | |
| idxs = list(range(len(sub))) | |
| ptr = 0 if idxs else -1 | |
| if ptr >= 0: | |
| row = sub.iloc[ptr] | |
| head, left, right, foot, meta = format_row(row) | |
| else: | |
| head = left = right = foot = meta = "_No rows match your filters._" | |
| return idxs, ptr, head, left, right, foot, meta | |
| def nav(a, b, l, indices, ptr, direction): | |
| sub = filter_df(a, b, l) | |
| if not len(sub): | |
| return [], -1, "_No rows match your filters._", "", "", "", "" | |
| idxs = list(range(len(sub))) | |
| if ptr is None or ptr < 0 or ptr >= len(sub): | |
| ptr = 0 | |
| if direction == "next": | |
| ptr = (ptr + 1) % len(sub) | |
| elif direction == "prev": | |
| ptr = (ptr - 1) % len(sub) | |
| row = sub.iloc[ptr] | |
| head, left, right, foot, meta = format_row(row) | |
| return idxs, ptr, head, left, right, foot, meta | |
| def rand(a, b, l): | |
| sub = filter_df(a, b, l) | |
| if not len(sub): | |
| return [], -1, "_No rows match your filters._", "", "", "", "" | |
| r = random.randrange(len(sub)) | |
| row = sub.iloc[r] | |
| head, left, right, foot, meta = format_row(row) | |
| return list(range(len(sub))), r, head, left, right, foot, meta | |
| # Auto-update when dropdowns change | |
| dd_a.change(apply_filters, [dd_a, dd_b, dd_l], | |
| [st_indices, st_ptr, header_md, left_html, right_html, footer_md, meta_md]) | |
| dd_b.change(apply_filters, [dd_a, dd_b, dd_l], | |
| [st_indices, st_ptr, header_md, left_html, right_html, footer_md, meta_md]) | |
| dd_l.change(apply_filters, [dd_a, dd_b, dd_l], | |
| [st_indices, st_ptr, header_md, left_html, right_html, footer_md, meta_md]) | |
| btn_next.click(nav, [dd_a, dd_b, dd_l, st_indices, st_ptr, gr.State("next")], | |
| [st_indices, st_ptr, header_md, left_html, right_html, footer_md, meta_md]) | |
| btn_prev.click(nav, [dd_a, dd_b, dd_l, st_indices, st_ptr, gr.State("prev")], | |
| [st_indices, st_ptr, header_md, left_html, right_html, footer_md, meta_md]) | |
| btn_rand.click(rand, [dd_a, dd_b, dd_l], | |
| [st_indices, st_ptr, header_md, left_html, right_html, footer_md, meta_md]) | |
| gr.on([demo.load], apply_filters, [dd_a, dd_b, dd_l], | |
| [st_indices, st_ptr, header_md, left_html, right_html, footer_md, meta_md]) | |
| if __name__ == "__main__": | |
| demo.launch() |