#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Data-Viewer tab for ICBCBench.
"""
import gradio as gr
import random
import re
from tabs.shared_data import get_entry, get_index
def make_user_task_markdown(item_id, prompt):
return f"""### User Task 🎯
**Task ID:** {item_id}
**Description:** {prompt}"""
def make_article_markdown(article: str) -> str:
if article and isinstance(article, str):
processed_article = re.sub(r'\n{2,}', '\n\n', article)
table_pattern = r'(\|[^\n]*\n(?:[\|\s\-:]+\n)?(?:\|[^\n]*\n)*)'
tables = []
def replace_table(match):
tables.append(match.group(1))
return f'__TABLE_PLACEHOLDER_{len(tables)-1}__'
processed_article = re.sub(table_pattern, replace_table, processed_article)
processed_article = re.sub(r'(? str:
"""Build score cards for ICBCBench data viewer."""
track = entry.get("track", "subjective")
# ICBCBench fields
overall = entry.get("overall_score")
objective = entry.get("objective_score")
subjective = entry.get("subjective_score")
expert = entry.get("expert_score")
citation = entry.get("citation_score")
source = entry.get("source_quality_score")
confidence = entry.get("confidence")
correct = entry.get("correct")
# Legacy DeepResearch Bench fields
comp = entry.get("comprehensiveness_score")
insight = entry.get("insight_score")
inst = entry.get("instruction_following_score")
read = entry.get("readability_score")
def fmt(val):
if val is None:
return "N/A"
try:
return f"{float(val):.2f}"
except (TypeError, ValueError):
return str(val)
if track == "objective":
scores_data = [
("Overall
Score", fmt(overall)),
("Objective
Score", fmt(objective)),
("Confidence", fmt(confidence)),
("Correct", "Yes" if correct is True else ("No" if correct is False else "N/A")),
]
else:
scores_data = [
("Overall
Score", fmt(overall)),
("Subjective
Score", fmt(subjective)),
("Expert
Score", fmt(expert)),
("Citation", fmt(citation)),
("Source
Quality", fmt(source)),
]
# Add legacy dimensions if ICBCBench fields not available
if subjective is None and any(v is not None for v in [comp, insight, inst, read]):
scores_data = [
("Overall
Score", fmt(overall)),
("Comprehen-
siveness", fmt(comp)),
("Insight
Score", fmt(insight)),
("Instruction
Following", fmt(inst)),
("Readability
Score", fmt(read)),
]
html_items_str = ""
for title, score in scores_data:
html_items_str += f"""
{score}