| """ |
| 图表问答数据集审核系统 - Gradio 5.x 应用 |
| 用于人工审核每个图表对应的问题和答案是否合理正确 |
| """ |
| import gradio as gr |
| from data_manager import DataManager, data_manager |
| from typing import Dict, List, Optional, Tuple, Any |
| import json |
| import os |
| import base64 |
|
|
| |
|
|
| class AppState: |
| """应用状态管理""" |
| def __init__(self): |
| self.current_source: str = "" |
| self.current_chart_type: str = "" |
| self.current_chart_id: str = "" |
| self.current_model: str = "" |
| self.all_paths: List[Dict] = [] |
| self.current_index: int = -1 |
| |
| |
| self.refresh_paths() |
| |
| def refresh_paths(self): |
| """刷新所有图表路径""" |
| self.all_paths = data_manager.get_all_chart_paths() |
| |
| def get_current_path(self) -> Optional[Dict]: |
| """获取当前路径信息""" |
| if 0 <= self.current_index < len(self.all_paths): |
| return self.all_paths[self.current_index] |
| return None |
| |
| def set_position(self, source: str, chart_type: str, chart_id: str, model: str): |
| """设置当前位置""" |
| self.current_source = source |
| self.current_chart_type = chart_type |
| self.current_chart_id = chart_id |
| self.current_model = model |
| |
| |
| for i, path in enumerate(self.all_paths): |
| if (path['source'] == source and |
| path['chart_type'] == chart_type and |
| path['chart_id'] == chart_id and |
| path['model'] == model): |
| self.current_index = i |
| break |
| |
| def navigate(self, direction: int) -> bool: |
| """ |
| 导航到上一个或下一个图表 |
| |
| Args: |
| direction: 1 表示下一个,-1 表示上一个 |
| |
| Returns: |
| 是否成功导航 |
| """ |
| new_index = self.current_index + direction |
| if 0 <= new_index < len(self.all_paths): |
| self.current_index = new_index |
| path = self.all_paths[new_index] |
| self.current_source = path['source'] |
| self.current_chart_type = path['chart_type'] |
| self.current_chart_id = path['chart_id'] |
| self.current_model = path['model'] |
| return True |
| return False |
|
|
| state = AppState() |
|
|
|
|
| |
|
|
| def get_dataset_choices() -> Tuple[List[str], List[str], List[str], List[str]]: |
| """获取数据集的选择项""" |
| structure = data_manager.get_dataset_structure() |
| |
| sources = list(structure.get('sources', {}).keys()) |
| |
| chart_types = [] |
| charts = [] |
| models = [] |
| |
| if state.current_source: |
| source_data = structure['sources'].get(state.current_source, {}) |
| chart_types = list(source_data.get('chart_types', {}).keys()) |
| |
| if state.current_chart_type: |
| charts = data_manager.get_chart_list(state.current_source, state.current_chart_type) |
| ct_data = source_data.get('chart_types', {}).get(state.current_chart_type, {}) |
| models = ct_data.get('models', []) |
| |
| return sources, chart_types, charts, models |
|
|
|
|
| def update_chart_type_dropdown(source: str): |
| """更新图表类型下拉框""" |
| state.current_source = source |
| structure = data_manager.get_dataset_structure() |
| |
| chart_types = list(structure.get('sources', {}).get(source, {}).get('chart_types', {}).keys()) |
| |
| return gr.Dropdown(choices=chart_types, value=chart_types[0] if chart_types else None) |
|
|
|
|
| def update_chart_dropdown(source: str, chart_type: str): |
| """更新图表和模型下拉框""" |
| state.current_source = source |
| state.current_chart_type = chart_type |
| |
| charts = data_manager.get_chart_list(source, chart_type) |
| structure = data_manager.get_dataset_structure() |
| ct_data = structure.get('sources', {}).get(source, {}).get('chart_types', {}).get(chart_type, {}) |
| models = ct_data.get('models', []) |
| |
| return ( |
| gr.Dropdown(choices=charts, value=charts[0] if charts else None), |
| gr.Dropdown(choices=models, value=models[0] if models else None) |
| ) |
|
|
|
|
| def create_embedded_html(html_content: str, chart_id: str = "") -> str: |
| """ |
| 创建嵌入式的 HTML 显示 |
| |
| 使用 data URI 方式嵌入 HTML 内容到 iframe 中 |
| """ |
| if not html_content: |
| return f""" |
| <div style="display:flex;flex-direction:column;align-items:center;justify-content:center; |
| min-height:400px;color:#999;border:2px dashed #ddd;border-radius:12px;background:#fafafa;"> |
| <div style="font-size:48px;margin-bottom:16px;">📭</div> |
| <div style="font-size:18px;font-weight:500;">暂无图表内容</div> |
| <div style="font-size:14px;margin-top:8px;">图表 ID: {chart_id or '未知'}</div> |
| <div style="font-size:12px;margin-top:16px;color:#888;">请检查数据集目录中是否存在该图表的 HTML 文件</div> |
| </div> |
| """ |
| |
| |
| html_bytes = html_content.encode('utf-8') |
| html_base64 = base64.b64encode(html_bytes).decode('utf-8') |
| |
| |
| iframe_html = f""" |
| <iframe |
| src="data:text/html;base64,{html_base64}" |
| style="width:100%;height:500px;border:1px solid #e0e0e0;border-radius:8px;background:#fff;" |
| sandbox="allow-scripts allow-same-origin" |
| loading="lazy" |
| ></iframe> |
| """ |
| |
| return iframe_html |
|
|
|
|
| def load_chart_data(source: str, chart_type: str, chart_id: str, model: str): |
| """ |
| 加载图表数据并返回所有 UI 更新 |
| |
| Returns: |
| 包含所有 UI 组件更新值的元组 |
| """ |
| if not all([source, chart_type, chart_id, model]): |
| return [ |
| create_embedded_html(""), |
| "### 请在左侧选择图表", |
| "[]", |
| "等待加载数据...", |
| "请在左侧选择图表", |
| "{}", |
| gr.Radio(choices=[], value=None), |
| "" |
| ] |
| |
| |
| state.set_position(source, chart_type, chart_id, model) |
| |
| |
| chart_data = data_manager.get_chart_data(source, chart_type, chart_id) |
| html_content = chart_data.get('html_content', '') |
| label_info = chart_data.get('label_info', {}) |
| |
| |
| embedded_html = create_embedded_html(html_content, chart_id) |
| |
| |
| debug_info = f"📁 {source}/{chart_type}/{chart_id} | HTML: {len(html_content)} 字符" |
| |
| |
| if label_info: |
| label_text = f""" |
| ### 图表信息 |
| |
| | 属性 | 值 | |
| |------|-----| |
| | **编号** | {label_info.get('Number', '-')} | |
| | **类型** | {label_info.get('Type', '-')} | |
| | **来源** | {label_info.get('Source', '-')} | |
| | **主题** | {label_info.get('Topic', '-')} | |
| | **描述** | {label_info.get('Describe', '-')} | |
| | **链接** | [查看原图]({label_info.get('Weblink', '#')}) | |
| """ |
| else: |
| label_text = "### ⚠️ 暂无标签信息" |
| |
| |
| qa_list = data_manager.get_qa_list(source, chart_type, model, chart_id) |
| |
| |
| existing_reviews = {} |
| for review in data_manager.get_reviews_by_chart(chart_id, model): |
| existing_reviews[review['qa_id']] = review |
| |
| |
| progress_text = f"当前: {state.current_index + 1} / {len(state.all_paths)} 个图表" |
| |
| |
| stats = data_manager.get_review_stats() |
| status_text = f"已审核: {stats['total']} | ✅正确: {stats['correct']} | ❌错误: {stats['incorrect']} | ✏️需修改: {stats['needs_modification']}" |
| |
| |
| qa_choices = [f"Q{i+1}: {qa.question[:50]}..." for i, qa in enumerate(qa_list)] if qa_list else [] |
| |
| return [ |
| embedded_html, |
| label_text, |
| json.dumps([{"id": qa.id, "question": qa.question, "answer": qa.answer} for qa in qa_list]), |
| status_text, |
| progress_text, |
| json.dumps(existing_reviews), |
| gr.Radio(choices=qa_choices, value=qa_choices[0] if qa_choices else None), |
| debug_info |
| ] |
|
|
|
|
| def navigate_prev(): |
| """导航到上一个图表""" |
| if state.navigate(-1): |
| path = state.get_current_path() |
| if path: |
| return ( |
| gr.Dropdown(value=path['source']), |
| gr.Dropdown(value=path['chart_type']), |
| gr.Dropdown(value=path['chart_id']), |
| gr.Dropdown(value=path['model']) |
| ) |
| return [gr.Dropdown(), gr.Dropdown(), gr.Dropdown(), gr.Dropdown()] |
|
|
|
|
| def navigate_next(): |
| """导航到下一个图表""" |
| if state.navigate(1): |
| path = state.get_current_path() |
| if path: |
| return ( |
| gr.Dropdown(value=path['source']), |
| gr.Dropdown(value=path['chart_type']), |
| gr.Dropdown(value=path['chart_id']), |
| gr.Dropdown(value=path['model']) |
| ) |
| return [gr.Dropdown(), gr.Dropdown(), gr.Dropdown(), gr.Dropdown()] |
|
|
|
|
| def save_review_handler( |
| qa_id: str, |
| chart_id: str, |
| source: str, |
| chart_type: str, |
| model: str, |
| original_question: str, |
| original_answer: str, |
| status: str, |
| modified_question: str, |
| modified_answer: str, |
| issue_type: str, |
| comment: str, |
| reviewer: str |
| ) -> str: |
| """保存审核记录""" |
| if not qa_id: |
| return "❌ 请先选择一个问答对" |
| |
| review_data = { |
| "qa_id": qa_id, |
| "chart_id": chart_id, |
| "source": source, |
| "chart_type": chart_type, |
| "model": model, |
| "original_question": original_question, |
| "original_answer": original_answer, |
| "status": status, |
| "modified_question": modified_question, |
| "modified_answer": modified_answer, |
| "issue_type": issue_type, |
| "comment": comment, |
| "reviewer": reviewer |
| } |
| |
| result = data_manager.save_review(review_data) |
| |
| |
| stats = data_manager.get_review_stats() |
| return f"✅ 已保存! 总计: {stats['total']} | ✅正确: {stats['correct']} | ❌错误: {stats['incorrect']} | ✏️需修改: {stats['needs_modification']}" |
|
|
|
|
| def export_reviews_handler(): |
| """导出审核记录""" |
| output_path = data_manager.export_reviews("./reviews_export.json") |
| return f"✅ 审核记录已导出至: {output_path}" |
|
|
|
|
| |
|
|
| def create_ui(): |
| """创建 Gradio 界面""" |
| |
| |
| custom_css = """ |
| .chart-container { |
| min-height: 520px; |
| } |
| |
| .control-panel { |
| background: #f8f9fa; |
| padding: 15px; |
| border-radius: 8px; |
| margin-bottom: 10px; |
| } |
| |
| .debug-panel { |
| font-size: 12px; |
| color: #666; |
| padding: 8px; |
| background: #f5f5f5; |
| border-radius: 4px; |
| margin-top: 10px; |
| } |
| """ |
| |
| with gr.Blocks( |
| title="图表问答数据集审核系统", |
| theme=gr.themes.Soft(), |
| css=custom_css |
| ) as app: |
| |
| |
| qa_data_json = gr.State(value="[]") |
| current_reviews_json = gr.State(value="{}") |
| |
| |
| gr.Markdown(""" |
| # 📊 图表问答数据集审核系统 |
| |
| 审核每个图表对应的问题和答案是否合理正确。使用 ← → 按钮切换图表。 |
| """) |
| |
| |
| with gr.Row(): |
| status_text = gr.Textbox( |
| label="审核统计", |
| value="等待加载数据...", |
| interactive=False, |
| show_label=False, |
| scale=2 |
| ) |
| progress_text = gr.Textbox( |
| label="进度", |
| value="请在左侧选择图表", |
| interactive=False, |
| show_label=False, |
| scale=1 |
| ) |
| |
| |
| with gr.Row(): |
| |
| with gr.Column(scale=1, min_width=250): |
| gr.Markdown("### 📁 数据选择") |
| |
| source_dropdown = gr.Dropdown( |
| label="数据来源 (Source)", |
| choices=[], |
| interactive=True |
| ) |
| |
| chart_type_dropdown = gr.Dropdown( |
| label="图表类型 (Chart Type)", |
| choices=[], |
| interactive=True |
| ) |
| |
| chart_dropdown = gr.Dropdown( |
| label="图表 ID", |
| choices=[], |
| interactive=True |
| ) |
| |
| model_dropdown = gr.Dropdown( |
| label="模型 (Model)", |
| choices=[], |
| interactive=True |
| ) |
| |
| gr.Markdown("---") |
| |
| |
| with gr.Row(): |
| prev_btn = gr.Button("⬅️ 上一个") |
| next_btn = gr.Button("➡️ 下一个") |
| |
| |
| export_btn = gr.Button("📥 导出审核记录", variant="secondary") |
| export_result = gr.Textbox(label="", visible=False) |
| |
| |
| reviewer_input = gr.Textbox( |
| label="审核人", |
| value="default", |
| interactive=True |
| ) |
| |
| |
| debug_info = gr.Textbox( |
| label="调试信息", |
| value="", |
| interactive=False, |
| show_label=False, |
| elem_classes=["debug-panel"] |
| ) |
| |
| |
| with gr.Column(scale=2, min_width=400): |
| gr.Markdown("### 📈 图表展示") |
| |
| |
| html_display = gr.HTML( |
| value="<div style='text-align:center;padding:50px;color:#999;'>请选择图表</div>", |
| elem_classes=["chart-container"] |
| ) |
| |
| |
| with gr.Column(scale=2, min_width=400): |
| |
| gr.Markdown("### 📝 图表标签") |
| label_display = gr.Markdown( |
| value="暂无信息", |
| elem_classes=["control-panel"] |
| ) |
| |
| |
| gr.Markdown("### ❓ 问答审核") |
| |
| |
| current_qa_id = gr.Textbox(visible=False, value="") |
| |
| |
| qa_question_display = gr.Textbox( |
| label="问题", |
| interactive=False, |
| lines=2, |
| value="" |
| ) |
| qa_answer_display = gr.Textbox( |
| label="答案", |
| interactive=False, |
| lines=1, |
| value="" |
| ) |
| |
| |
| qa_selector = gr.Radio( |
| label="选择要审核的问答对", |
| choices=[], |
| interactive=True |
| ) |
| |
| gr.Markdown("---") |
| gr.Markdown("#### 审核操作") |
| |
| |
| status_radio = gr.Radio( |
| label="审核状态", |
| choices=[ |
| ("✅ 正确", "correct"), |
| ("❌ 错误", "incorrect"), |
| ("✏️ 需修改", "needs_modification"), |
| ("⏳ 待定", "pending") |
| ], |
| value="pending", |
| interactive=True |
| ) |
| |
| |
| issue_type_dropdown = gr.Dropdown( |
| label="问题类型", |
| choices=[ |
| "问题歧义", |
| "答案错误", |
| "图表不清晰", |
| "问题不合理", |
| "答案格式错误", |
| "其他" |
| ], |
| interactive=True, |
| value="" |
| ) |
| |
| |
| modified_question = gr.Textbox( |
| label="修改后的问题", |
| placeholder="如需修改问题,请在此输入...", |
| lines=2, |
| interactive=True, |
| value="" |
| ) |
| |
| modified_answer = gr.Textbox( |
| label="修改后的答案", |
| placeholder="如需修改答案,请在此输入...", |
| lines=1, |
| interactive=True, |
| value="" |
| ) |
| |
| |
| comment_textbox = gr.Textbox( |
| label="评论/备注", |
| placeholder="请输入审核意见或备注...", |
| lines=2, |
| interactive=True, |
| value="" |
| ) |
| |
| |
| save_btn = gr.Button("💾 保存审核结果", variant="primary") |
| save_result = gr.Textbox(label="", visible=False) |
| |
| |
| |
| |
| def init_dataset(): |
| structure = data_manager.get_dataset_structure() |
| sources = list(structure.get('sources', {}).keys()) |
| return gr.Dropdown(choices=sources, value=sources[0] if sources else None) |
| |
| app.load( |
| fn=init_dataset, |
| outputs=[source_dropdown] |
| ) |
| |
| |
| source_dropdown.change( |
| fn=update_chart_type_dropdown, |
| inputs=[source_dropdown], |
| outputs=[chart_type_dropdown] |
| ) |
| |
| |
| chart_type_dropdown.change( |
| fn=update_chart_dropdown, |
| inputs=[source_dropdown, chart_type_dropdown], |
| outputs=[chart_dropdown, model_dropdown] |
| ) |
| |
| |
| model_dropdown.change( |
| fn=load_chart_data, |
| inputs=[source_dropdown, chart_type_dropdown, chart_dropdown, model_dropdown], |
| outputs=[ |
| html_display, label_display, qa_data_json, status_text, progress_text, |
| current_reviews_json, qa_selector, debug_info |
| ] |
| ) |
| |
| chart_dropdown.change( |
| fn=load_chart_data, |
| inputs=[source_dropdown, chart_type_dropdown, chart_dropdown, model_dropdown], |
| outputs=[ |
| html_display, label_display, qa_data_json, status_text, progress_text, |
| current_reviews_json, qa_selector, debug_info |
| ] |
| ) |
| |
| |
| def on_qa_selected(qa_index_str, qa_json, reviews_json): |
| if not qa_index_str or not qa_json: |
| return ["", "", "", gr.Radio(value="pending"), "", "", "", ""] |
| |
| try: |
| qa_list = json.loads(qa_json) |
| reviews = json.loads(reviews_json) |
| |
| |
| index = int(qa_index_str.split(":")[0].replace("Q", "")) - 1 |
| qa = qa_list[index] |
| |
| |
| review = reviews.get(qa['id'], {}) |
| |
| return [ |
| qa['id'], |
| qa['question'], |
| qa['answer'], |
| gr.Radio(value=review.get('status', 'pending')), |
| review.get('issue_type', ''), |
| review.get('modified_question', ''), |
| review.get('modified_answer', ''), |
| review.get('comment', '') |
| ] |
| except Exception as e: |
| print(f"Error in on_qa_selected: {e}") |
| return ["", "", "", gr.Radio(value="pending"), "", "", "", ""] |
| |
| qa_selector.change( |
| fn=on_qa_selected, |
| inputs=[qa_selector, qa_data_json, current_reviews_json], |
| outputs=[ |
| current_qa_id, qa_question_display, qa_answer_display, |
| status_radio, issue_type_dropdown, modified_question, modified_answer, comment_textbox |
| ] |
| ) |
| |
| |
| prev_btn.click( |
| fn=navigate_prev, |
| outputs=[source_dropdown, chart_type_dropdown, chart_dropdown, model_dropdown] |
| ) |
| |
| next_btn.click( |
| fn=navigate_next, |
| outputs=[source_dropdown, chart_type_dropdown, chart_dropdown, model_dropdown] |
| ) |
| |
| |
| save_btn.click( |
| fn=save_review_handler, |
| inputs=[ |
| current_qa_id, |
| chart_dropdown, |
| source_dropdown, |
| chart_type_dropdown, |
| model_dropdown, |
| qa_question_display, |
| qa_answer_display, |
| status_radio, |
| modified_question, |
| modified_answer, |
| issue_type_dropdown, |
| comment_textbox, |
| reviewer_input |
| ], |
| outputs=[save_result] |
| ).then( |
| fn=lambda: gr.Textbox(visible=True), |
| outputs=[save_result] |
| ) |
| |
| |
| export_btn.click( |
| fn=export_reviews_handler, |
| outputs=[export_result] |
| ).then( |
| fn=lambda: gr.Textbox(visible=True), |
| outputs=[export_result] |
| ) |
| |
| return app |
|
|
|
|
| |
|
|
| if __name__ == "__main__": |
| app = create_ui() |
| app.launch( |
| server_name="0.0.0.0", |
| server_port=7860, |
| share=True |
| ) |