#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Data-Viewer tab for ICBCBench. """ import gradio as gr import random import re from tabs.shared_data import get_entry, get_index def make_user_task_markdown(item_id, prompt): return f"""### User Task 🎯 **Task ID:** {item_id} **Description:** {prompt}""" def make_article_markdown(article: str) -> str: if article and isinstance(article, str): processed_article = re.sub(r'\n{2,}', '\n\n', article) table_pattern = r'(\|[^\n]*\n(?:[\|\s\-:]+\n)?(?:\|[^\n]*\n)*)' tables = [] def replace_table(match): tables.append(match.group(1)) return f'__TABLE_PLACEHOLDER_{len(tables)-1}__' processed_article = re.sub(table_pattern, replace_table, processed_article) processed_article = re.sub(r'(? str: """Build score cards for ICBCBench data viewer.""" track = entry.get("track", "subjective") # ICBCBench fields overall = entry.get("overall_score") objective = entry.get("objective_score") subjective = entry.get("subjective_score") expert = entry.get("expert_score") citation = entry.get("citation_score") source = entry.get("source_quality_score") confidence = entry.get("confidence") correct = entry.get("correct") # Legacy DeepResearch Bench fields comp = entry.get("comprehensiveness_score") insight = entry.get("insight_score") inst = entry.get("instruction_following_score") read = entry.get("readability_score") def fmt(val): if val is None: return "N/A" try: return f"{float(val):.2f}" except (TypeError, ValueError): return str(val) if track == "objective": scores_data = [ ("Overall
Score", fmt(overall)), ("Objective
Score", fmt(objective)), ("Confidence", fmt(confidence)), ("Correct", "Yes" if correct is True else ("No" if correct is False else "N/A")), ] else: scores_data = [ ("Overall
Score", fmt(overall)), ("Subjective
Score", fmt(subjective)), ("Expert
Score", fmt(expert)), ("Citation", fmt(citation)), ("Source
Quality", fmt(source)), ] # Add legacy dimensions if ICBCBench fields not available if subjective is None and any(v is not None for v in [comp, insight, inst, read]): scores_data = [ ("Overall
Score", fmt(overall)), ("Comprehen-
siveness", fmt(comp)), ("Insight
Score", fmt(insight)), ("Instruction
Following", fmt(inst)), ("Readability
Score", fmt(read)), ] html_items_str = "" for title, score in scores_data: html_items_str += f"""

{title}

{score}

""" return f"""

{html_items_str}

""" # ---------- 生成 Tab ---------- def create_data_viewer_tab(): with gr.Tab("🔍Data Viewer"): gr.HTML( """ """ ) with gr.Row(): model_dd = gr.Dropdown(label="Select Model", choices=[], interactive=True) task_dd = gr.Dropdown(label="Select Task", choices=[], interactive=True) user_md = gr.Markdown(value="Loading data…", elem_classes=["card", "scrollable-sm"]) article_md = gr.Markdown(elem_classes=["card", "scrollable-lg"]) scores_html = gr.HTML() def _build_task_choices(tasks): choices = [] for task in tasks: item_id = str(task["id"]) prompt = task.get("prompt", "") limit = 60 preview = prompt[:limit] + ("…" if len(prompt) > limit else "") choices.append(f"{item_id}. {preview}") return choices def fetch(model, task_disp): if not model or not task_disp: msg = "请选择模型和任务。" return make_user_task_markdown("--", msg), make_article_markdown(msg), "" item_id = task_disp.split(".", 1)[0].strip() entry = get_entry(model, item_id) if not entry: err = f"未找到模型 **{model}** 对应任务 **{item_id}** 的内容或分数。" return make_user_task_markdown(item_id, err), make_article_markdown(err), "" prompt = entry.get("prompt", "") article = entry.get("article", "") scores_content = make_scores_html(entry) return make_user_task_markdown(item_id, prompt), make_article_markdown(article), scores_content def on_load(): index = get_index() models = index.get("models", []) tasks = index.get("tasks", []) if not models or not tasks: return gr.update(choices=[], value=None), gr.update(choices=[], value=None), \ make_user_task_markdown("--", "No data"), make_article_markdown(""), "" task_choices = _build_task_choices(tasks) init_model = random.choice(models) if models else None init_task = random.choice(task_choices) if task_choices else None u, a, s = ( make_user_task_markdown("--", "请选择模型和任务。"), make_article_markdown("请选择模型和任务。"), "" ) return (gr.update(choices=models, value=init_model), gr.update(choices=task_choices, value=init_task), u, a, s) model_dd.change(fetch, inputs=[model_dd, task_dd], outputs=[user_md, article_md, scores_html]) task_dd.change(fetch, inputs=[model_dd, task_dd], outputs=[user_md, article_md, scores_html]) return on_load, [model_dd, task_dd, user_md, article_md, scores_html]