| |
| |
| """ |
| Data-Viewer tab for ICBCBench. |
| """ |
|
|
| import gradio as gr |
| import random |
| import re |
|
|
| from tabs.shared_data import get_entry, get_index |
|
|
|
|
| def make_user_task_markdown(item_id, prompt): |
| return f"""### User Task 🎯 |
| |
| **Task ID:** {item_id} |
| |
| **Description:** {prompt}""" |
|
|
|
|
| def make_article_markdown(article: str) -> str: |
| if article and isinstance(article, str): |
| processed_article = re.sub(r'\n{2,}', '\n\n', article) |
|
|
| table_pattern = r'(\|[^\n]*\n(?:[\|\s\-:]+\n)?(?:\|[^\n]*\n)*)' |
| tables = [] |
|
|
| def replace_table(match): |
| tables.append(match.group(1)) |
| return f'__TABLE_PLACEHOLDER_{len(tables)-1}__' |
|
|
| processed_article = re.sub(table_pattern, replace_table, processed_article) |
| processed_article = re.sub(r'(?<!\n)\*\s*\*\*([^*]+?)\*\*:', r'\n\n* **\1**:', processed_article) |
| processed_article = re.sub(r'\*\s*\*\*([^*]+?)\*\*:\s*([^*]*?)\s*\*\s*\*\*', r'* **\1**: \2\n * **', processed_article) |
| processed_article = re.sub(r'(?<!\n)\[\d+[^\]]*\]\*\s*\*\*', r'\n\n* **', processed_article) |
|
|
| lines = processed_article.split('\n') |
| result_lines = [] |
| for i, line in enumerate(lines): |
| result_lines.append(line) |
| if (i < len(lines) - 1 and |
| line.strip() and |
| lines[i + 1].strip() and |
| not line.strip().startswith('*') and |
| not lines[i + 1].strip().startswith('*') and |
| not line.strip().startswith('#')): |
| if i + 1 < len(lines) and lines[i + 1].strip(): |
| result_lines.append('') |
|
|
| processed_article = '\n'.join(result_lines) |
| for i, table in enumerate(tables): |
| processed_article = processed_article.replace(f'__TABLE_PLACEHOLDER_{i}__', table) |
| else: |
| processed_article = article if article is not None else "" |
|
|
| return f"""### Generated Article 📖 |
| |
| {processed_article}""" |
|
|
|
|
| def make_scores_html(entry: dict) -> str: |
| """Build score cards for ICBCBench data viewer.""" |
| track = entry.get("track", "subjective") |
|
|
| |
| overall = entry.get("overall_score") |
| objective = entry.get("objective_score") |
| subjective = entry.get("subjective_score") |
| expert = entry.get("expert_score") |
| citation = entry.get("citation_score") |
| source = entry.get("source_quality_score") |
| confidence = entry.get("confidence") |
| correct = entry.get("correct") |
|
|
| |
| comp = entry.get("comprehensiveness_score") |
| insight = entry.get("insight_score") |
| inst = entry.get("instruction_following_score") |
| read = entry.get("readability_score") |
|
|
| def fmt(val): |
| if val is None: |
| return "N/A" |
| try: |
| return f"{float(val):.2f}" |
| except (TypeError, ValueError): |
| return str(val) |
|
|
| if track == "objective": |
| scores_data = [ |
| ("Overall<br>Score", fmt(overall)), |
| ("Objective<br>Score", fmt(objective)), |
| ("Confidence", fmt(confidence)), |
| ("Correct", "Yes" if correct is True else ("No" if correct is False else "N/A")), |
| ] |
| else: |
| scores_data = [ |
| ("Overall<br>Score", fmt(overall)), |
| ("Subjective<br>Score", fmt(subjective)), |
| ("Expert<br>Score", fmt(expert)), |
| ("Citation", fmt(citation)), |
| ("Source<br>Quality", fmt(source)), |
| ] |
| |
| if subjective is None and any(v is not None for v in [comp, insight, inst, read]): |
| scores_data = [ |
| ("Overall<br>Score", fmt(overall)), |
| ("Comprehen-<br>siveness", fmt(comp)), |
| ("Insight<br>Score", fmt(insight)), |
| ("Instruction<br>Following", fmt(inst)), |
| ("Readability<br>Score", fmt(read)), |
| ] |
|
|
| html_items_str = "" |
| for title, score in scores_data: |
| html_items_str += f""" |
| <div style="text-align: center; padding: 8px 5px; flex-grow: 1; flex-basis: 0;"> |
| <h4 style="margin: 0 0 6px 0; font-size: 1.1em; color: #4a4a4a; font-weight: 600;">{title}</h4> |
| <p style="margin: 0; font-size: 1.2em; font-weight: bold; color: #333;">{score}</p> |
| </div> |
| """ |
|
|
| return f""" |
| <div style="background:#fff; border:1px solid #e0e0e0; border-radius:8px; padding: 18px 15px; margin:18px 0; box-shadow:0 2px 4px rgba(0,0,0,.06);"> |
| <div style="display: flex; justify-content: space-between; align-items: flex-start;"> |
| {html_items_str} |
| </div> |
| </div>""" |
|
|
|
|
| |
| def create_data_viewer_tab(): |
| with gr.Tab("🔍Data Viewer"): |
| gr.HTML( |
| """ |
| <style> |
| .card{background:#fff;border:1px solid #e0e0e0;border-radius:8px;padding:22px 24px;margin:18px 0;box-shadow:0 2px 4px rgba(0,0,0,.06);} |
| .scrollable-sm{max-height:260px;overflow-y:auto;} |
| .scrollable-lg{max-height:700px;overflow-y:auto;} |
| .card p{color:#424242 !important;line-height:1.75;margin:0 0 14px 0;text-align:justify;} |
| .card ul,.card ol{margin:12px 0 12px 24px;color:#424242 !important;} |
| .card li{margin:4px 0;color:#424242 !important;} |
| .card blockquote{border-left:4px solid #3498db;margin:18px 0;padding:14px 18px;background:#f8f9fa;font-style:italic;color:#555 !important;} |
| .card pre{background:#f8f8f8;color:#333 !important;padding:18px;border-radius:6px;overflow-x:auto;border:1px solid #e0e0e0;} |
| .card strong,.card b{font-weight:700 !important;} |
| .card::-webkit-scrollbar{width:10px} |
| .card::-webkit-scrollbar-track{background:#f5f5f5;border-radius:5px} |
| .card::-webkit-scrollbar-thumb{background:#c0c0c0;border-radius:5px} |
| .card::-webkit-scrollbar-thumb:hover{background:#a0a0a0} |
| </style> |
| """ |
| ) |
|
|
| with gr.Row(): |
| model_dd = gr.Dropdown(label="Select Model", choices=[], interactive=True) |
| task_dd = gr.Dropdown(label="Select Task", choices=[], interactive=True) |
|
|
| user_md = gr.Markdown(value="Loading data…", elem_classes=["card", "scrollable-sm"]) |
| article_md = gr.Markdown(elem_classes=["card", "scrollable-lg"]) |
| scores_html = gr.HTML() |
|
|
| def _build_task_choices(tasks): |
| choices = [] |
| for task in tasks: |
| item_id = str(task["id"]) |
| prompt = task.get("prompt", "") |
| limit = 60 |
| preview = prompt[:limit] + ("…" if len(prompt) > limit else "") |
| choices.append(f"{item_id}. {preview}") |
| return choices |
|
|
| def fetch(model, task_disp): |
| if not model or not task_disp: |
| msg = "请选择模型和任务。" |
| return make_user_task_markdown("--", msg), make_article_markdown(msg), "" |
|
|
| item_id = task_disp.split(".", 1)[0].strip() |
| entry = get_entry(model, item_id) |
| if not entry: |
| err = f"未找到模型 **{model}** 对应任务 **{item_id}** 的内容或分数。" |
| return make_user_task_markdown(item_id, err), make_article_markdown(err), "" |
|
|
| prompt = entry.get("prompt", "") |
| article = entry.get("article", "") |
| scores_content = make_scores_html(entry) |
| return make_user_task_markdown(item_id, prompt), make_article_markdown(article), scores_content |
|
|
| def on_load(): |
| index = get_index() |
| models = index.get("models", []) |
| tasks = index.get("tasks", []) |
| if not models or not tasks: |
| return gr.update(choices=[], value=None), gr.update(choices=[], value=None), \ |
| make_user_task_markdown("--", "No data"), make_article_markdown(""), "" |
| task_choices = _build_task_choices(tasks) |
| init_model = random.choice(models) if models else None |
| init_task = random.choice(task_choices) if task_choices else None |
| u, a, s = ( |
| make_user_task_markdown("--", "请选择模型和任务。"), |
| make_article_markdown("请选择模型和任务。"), "" |
| ) |
| return (gr.update(choices=models, value=init_model), |
| gr.update(choices=task_choices, value=init_task), |
| u, a, s) |
|
|
| model_dd.change(fetch, inputs=[model_dd, task_dd], outputs=[user_md, article_md, scores_html]) |
| task_dd.change(fetch, inputs=[model_dd, task_dd], outputs=[user_md, article_md, scores_html]) |
|
|
| return on_load, [model_dd, task_dd, user_md, article_md, scores_html] |
|
|