""" 图表问答数据集审核系统 - Gradio 5.x 应用 用于人工审核每个图表对应的问题和答案是否合理正确 """ import gradio as gr from data_manager import DataManager, data_manager from typing import Dict, List, Optional, Tuple, Any import json import os import base64 # ============== 全局状态 ============== class AppState: """应用状态管理""" def __init__(self): self.current_source: str = "" self.current_chart_type: str = "" self.current_chart_id: str = "" self.current_model: str = "" self.all_paths: List[Dict] = [] self.current_index: int = -1 # 初始化时获取所有路径 self.refresh_paths() def refresh_paths(self): """刷新所有图表路径""" self.all_paths = data_manager.get_all_chart_paths() def get_current_path(self) -> Optional[Dict]: """获取当前路径信息""" if 0 <= self.current_index < len(self.all_paths): return self.all_paths[self.current_index] return None def set_position(self, source: str, chart_type: str, chart_id: str, model: str): """设置当前位置""" self.current_source = source self.current_chart_type = chart_type self.current_chart_id = chart_id self.current_model = model # 更新索引 for i, path in enumerate(self.all_paths): if (path['source'] == source and path['chart_type'] == chart_type and path['chart_id'] == chart_id and path['model'] == model): self.current_index = i break def navigate(self, direction: int) -> bool: """ 导航到上一个或下一个图表 Args: direction: 1 表示下一个,-1 表示上一个 Returns: 是否成功导航 """ new_index = self.current_index + direction if 0 <= new_index < len(self.all_paths): self.current_index = new_index path = self.all_paths[new_index] self.current_source = path['source'] self.current_chart_type = path['chart_type'] self.current_chart_id = path['chart_id'] self.current_model = path['model'] return True return False state = AppState() # ============== UI 更新函数 ============== def get_dataset_choices() -> Tuple[List[str], List[str], List[str], List[str]]: """获取数据集的选择项""" structure = data_manager.get_dataset_structure() sources = list(structure.get('sources', {}).keys()) chart_types = [] charts = [] models = [] if state.current_source: source_data = structure['sources'].get(state.current_source, {}) chart_types = list(source_data.get('chart_types', {}).keys()) if state.current_chart_type: charts = data_manager.get_chart_list(state.current_source, state.current_chart_type) ct_data = source_data.get('chart_types', {}).get(state.current_chart_type, {}) models = ct_data.get('models', []) return sources, chart_types, charts, models def update_chart_type_dropdown(source: str): """更新图表类型下拉框""" state.current_source = source structure = data_manager.get_dataset_structure() chart_types = list(structure.get('sources', {}).get(source, {}).get('chart_types', {}).keys()) return gr.Dropdown(choices=chart_types, value=chart_types[0] if chart_types else None) def update_chart_dropdown(source: str, chart_type: str): """更新图表和模型下拉框""" state.current_source = source state.current_chart_type = chart_type charts = data_manager.get_chart_list(source, chart_type) structure = data_manager.get_dataset_structure() ct_data = structure.get('sources', {}).get(source, {}).get('chart_types', {}).get(chart_type, {}) models = ct_data.get('models', []) return ( gr.Dropdown(choices=charts, value=charts[0] if charts else None), gr.Dropdown(choices=models, value=models[0] if models else None) ) def create_embedded_html(html_content: str, chart_id: str = "") -> str: """ 创建嵌入式的 HTML 显示 使用 data URI 方式嵌入 HTML 内容到 iframe 中 """ if not html_content: return f"""
📭
暂无图表内容
图表 ID: {chart_id or '未知'}
请检查数据集目录中是否存在该图表的 HTML 文件
""" # 使用 base64 编码 HTML 内容,避免引号转义问题 html_bytes = html_content.encode('utf-8') html_base64 = base64.b64encode(html_bytes).decode('utf-8') # 使用 data URI iframe_html = f""" """ return iframe_html def load_chart_data(source: str, chart_type: str, chart_id: str, model: str): """ 加载图表数据并返回所有 UI 更新 Returns: 包含所有 UI 组件更新值的元组 """ if not all([source, chart_type, chart_id, model]): return [ create_embedded_html(""), # html_display "### 请在左侧选择图表", # label_info "[]", # qa_data (JSON string) "等待加载数据...", # status_text "请在左侧选择图表", # progress_text "{}", # current_qa_reviews (JSON string) gr.Radio(choices=[], value=None), # qa_selector "" # debug_info ] # 更新状态 state.set_position(source, chart_type, chart_id, model) # 获取图表数据 chart_data = data_manager.get_chart_data(source, chart_type, chart_id) html_content = chart_data.get('html_content', '') label_info = chart_data.get('label_info', {}) # 创建嵌入式 HTML embedded_html = create_embedded_html(html_content, chart_id) # 调试信息 debug_info = f"📁 {source}/{chart_type}/{chart_id} | HTML: {len(html_content)} 字符" # 格式化标签信息 if label_info: label_text = f""" ### 图表信息 | 属性 | 值 | |------|-----| | **编号** | {label_info.get('Number', '-')} | | **类型** | {label_info.get('Type', '-')} | | **来源** | {label_info.get('Source', '-')} | | **主题** | {label_info.get('Topic', '-')} | | **描述** | {label_info.get('Describe', '-')} | | **链接** | [查看原图]({label_info.get('Weblink', '#')}) | """ else: label_text = "### ⚠️ 暂无标签信息" # 获取 QA 列表 qa_list = data_manager.get_qa_list(source, chart_type, model, chart_id) # 获取已有的审核记录 existing_reviews = {} for review in data_manager.get_reviews_by_chart(chart_id, model): existing_reviews[review['qa_id']] = review # 更新进度文本 progress_text = f"当前: {state.current_index + 1} / {len(state.all_paths)} 个图表" # 状态文本 stats = data_manager.get_review_stats() status_text = f"已审核: {stats['total']} | ✅正确: {stats['correct']} | ❌错误: {stats['incorrect']} | ✏️需修改: {stats['needs_modification']}" # QA 选择器选项 qa_choices = [f"Q{i+1}: {qa.question[:50]}..." for i, qa in enumerate(qa_list)] if qa_list else [] return [ embedded_html, # html_display label_text, # label_info json.dumps([{"id": qa.id, "question": qa.question, "answer": qa.answer} for qa in qa_list]), # qa_data (JSON string) status_text, # status_text progress_text, # progress_text json.dumps(existing_reviews), # current_qa_reviews (JSON string) gr.Radio(choices=qa_choices, value=qa_choices[0] if qa_choices else None), # qa_selector debug_info # debug_info ] def navigate_prev(): """导航到上一个图表""" if state.navigate(-1): path = state.get_current_path() if path: return ( gr.Dropdown(value=path['source']), gr.Dropdown(value=path['chart_type']), gr.Dropdown(value=path['chart_id']), gr.Dropdown(value=path['model']) ) return [gr.Dropdown(), gr.Dropdown(), gr.Dropdown(), gr.Dropdown()] def navigate_next(): """导航到下一个图表""" if state.navigate(1): path = state.get_current_path() if path: return ( gr.Dropdown(value=path['source']), gr.Dropdown(value=path['chart_type']), gr.Dropdown(value=path['chart_id']), gr.Dropdown(value=path['model']) ) return [gr.Dropdown(), gr.Dropdown(), gr.Dropdown(), gr.Dropdown()] def save_review_handler( qa_id: str, chart_id: str, source: str, chart_type: str, model: str, original_question: str, original_answer: str, status: str, modified_question: str, modified_answer: str, issue_type: str, comment: str, reviewer: str ) -> str: """保存审核记录""" if not qa_id: return "❌ 请先选择一个问答对" review_data = { "qa_id": qa_id, "chart_id": chart_id, "source": source, "chart_type": chart_type, "model": model, "original_question": original_question, "original_answer": original_answer, "status": status, "modified_question": modified_question, "modified_answer": modified_answer, "issue_type": issue_type, "comment": comment, "reviewer": reviewer } result = data_manager.save_review(review_data) # 返回更新后的统计 stats = data_manager.get_review_stats() return f"✅ 已保存! 总计: {stats['total']} | ✅正确: {stats['correct']} | ❌错误: {stats['incorrect']} | ✏️需修改: {stats['needs_modification']}" def export_reviews_handler(): """导出审核记录""" output_path = data_manager.export_reviews("./reviews_export.json") return f"✅ 审核记录已导出至: {output_path}" # ============== 创建 Gradio 界面 ============== def create_ui(): """创建 Gradio 界面""" # 自定义 CSS custom_css = """ .chart-container { min-height: 520px; } .control-panel { background: #f8f9fa; padding: 15px; border-radius: 8px; margin-bottom: 10px; } .debug-panel { font-size: 12px; color: #666; padding: 8px; background: #f5f5f5; border-radius: 4px; margin-top: 10px; } """ with gr.Blocks( title="图表问答数据集审核系统", theme=gr.themes.Soft(), css=custom_css ) as app: # 隐藏的状态存储(使用 JSON 字符串) qa_data_json = gr.State(value="[]") current_reviews_json = gr.State(value="{}") # ==================== 标题栏 ==================== gr.Markdown(""" # 📊 图表问答数据集审核系统 审核每个图表对应的问题和答案是否合理正确。使用 ← → 按钮切换图表。 """) # ==================== 顶部状态栏 ==================== with gr.Row(): status_text = gr.Textbox( label="审核统计", value="等待加载数据...", interactive=False, show_label=False, scale=2 ) progress_text = gr.Textbox( label="进度", value="请在左侧选择图表", interactive=False, show_label=False, scale=1 ) # ==================== 主内容区 ==================== with gr.Row(): # ===== 左侧:导航控制 ===== with gr.Column(scale=1, min_width=250): gr.Markdown("### 📁 数据选择") source_dropdown = gr.Dropdown( label="数据来源 (Source)", choices=[], interactive=True ) chart_type_dropdown = gr.Dropdown( label="图表类型 (Chart Type)", choices=[], interactive=True ) chart_dropdown = gr.Dropdown( label="图表 ID", choices=[], interactive=True ) model_dropdown = gr.Dropdown( label="模型 (Model)", choices=[], interactive=True ) gr.Markdown("---") # 导航按钮 with gr.Row(): prev_btn = gr.Button("⬅️ 上一个") next_btn = gr.Button("➡️ 下一个") # 导出按钮 export_btn = gr.Button("📥 导出审核记录", variant="secondary") export_result = gr.Textbox(label="", visible=False) # 审核人设置 reviewer_input = gr.Textbox( label="审核人", value="default", interactive=True ) # 调试信息 debug_info = gr.Textbox( label="调试信息", value="", interactive=False, show_label=False, elem_classes=["debug-panel"] ) # ===== 中间:图表展示 ===== with gr.Column(scale=2, min_width=400): gr.Markdown("### 📈 图表展示") # HTML 图表展示(使用 iframe) html_display = gr.HTML( value="
请选择图表
", elem_classes=["chart-container"] ) # ===== 右侧:标签信息和 QA 审核 ===== with gr.Column(scale=2, min_width=400): # 标签信息 gr.Markdown("### 📝 图表标签") label_display = gr.Markdown( value="暂无信息", elem_classes=["control-panel"] ) # QA 审核区 gr.Markdown("### ❓ 问答审核") # 当前选中的 QA 信息(隐藏) current_qa_id = gr.Textbox(visible=False, value="") # QA 显示 qa_question_display = gr.Textbox( label="问题", interactive=False, lines=2, value="" ) qa_answer_display = gr.Textbox( label="答案", interactive=False, lines=1, value="" ) # QA 选择器 qa_selector = gr.Radio( label="选择要审核的问答对", choices=[], interactive=True ) gr.Markdown("---") gr.Markdown("#### 审核操作") # 审核状态选择 status_radio = gr.Radio( label="审核状态", choices=[ ("✅ 正确", "correct"), ("❌ 错误", "incorrect"), ("✏️ 需修改", "needs_modification"), ("⏳ 待定", "pending") ], value="pending", interactive=True ) # 问题类型 issue_type_dropdown = gr.Dropdown( label="问题类型", choices=[ "问题歧义", "答案错误", "图表不清晰", "问题不合理", "答案格式错误", "其他" ], interactive=True, value="" ) # 修改后的问题和答案 modified_question = gr.Textbox( label="修改后的问题", placeholder="如需修改问题,请在此输入...", lines=2, interactive=True, value="" ) modified_answer = gr.Textbox( label="修改后的答案", placeholder="如需修改答案,请在此输入...", lines=1, interactive=True, value="" ) # 评论 comment_textbox = gr.Textbox( label="评论/备注", placeholder="请输入审核意见或备注...", lines=2, interactive=True, value="" ) # 保存按钮 save_btn = gr.Button("💾 保存审核结果", variant="primary") save_result = gr.Textbox(label="", visible=False) # ==================== 事件绑定 ==================== # 初始化数据集选择 def init_dataset(): structure = data_manager.get_dataset_structure() sources = list(structure.get('sources', {}).keys()) return gr.Dropdown(choices=sources, value=sources[0] if sources else None) app.load( fn=init_dataset, outputs=[source_dropdown] ) # Source 变化 -> 更新 Chart Type source_dropdown.change( fn=update_chart_type_dropdown, inputs=[source_dropdown], outputs=[chart_type_dropdown] ) # Chart Type 变化 -> 更新 Chart 和 Model chart_type_dropdown.change( fn=update_chart_dropdown, inputs=[source_dropdown, chart_type_dropdown], outputs=[chart_dropdown, model_dropdown] ) # 选择图表 -> 加载数据 model_dropdown.change( fn=load_chart_data, inputs=[source_dropdown, chart_type_dropdown, chart_dropdown, model_dropdown], outputs=[ html_display, label_display, qa_data_json, status_text, progress_text, current_reviews_json, qa_selector, debug_info ] ) chart_dropdown.change( fn=load_chart_data, inputs=[source_dropdown, chart_type_dropdown, chart_dropdown, model_dropdown], outputs=[ html_display, label_display, qa_data_json, status_text, progress_text, current_reviews_json, qa_selector, debug_info ] ) # QA 选择器变化 -> 更新审核面板 def on_qa_selected(qa_index_str, qa_json, reviews_json): if not qa_index_str or not qa_json: return ["", "", "", gr.Radio(value="pending"), "", "", "", ""] try: qa_list = json.loads(qa_json) reviews = json.loads(reviews_json) # 解析索引 index = int(qa_index_str.split(":")[0].replace("Q", "")) - 1 qa = qa_list[index] # 检查是否有现有审核 review = reviews.get(qa['id'], {}) return [ qa['id'], # current_qa_id qa['question'], # qa_question_display qa['answer'], # qa_answer_display gr.Radio(value=review.get('status', 'pending')), # status_radio review.get('issue_type', ''), # issue_type_dropdown review.get('modified_question', ''), # modified_question review.get('modified_answer', ''), # modified_answer review.get('comment', '') # comment_textbox ] except Exception as e: print(f"Error in on_qa_selected: {e}") return ["", "", "", gr.Radio(value="pending"), "", "", "", ""] qa_selector.change( fn=on_qa_selected, inputs=[qa_selector, qa_data_json, current_reviews_json], outputs=[ current_qa_id, qa_question_display, qa_answer_display, status_radio, issue_type_dropdown, modified_question, modified_answer, comment_textbox ] ) # 导航按钮 prev_btn.click( fn=navigate_prev, outputs=[source_dropdown, chart_type_dropdown, chart_dropdown, model_dropdown] ) next_btn.click( fn=navigate_next, outputs=[source_dropdown, chart_type_dropdown, chart_dropdown, model_dropdown] ) # 保存审核 save_btn.click( fn=save_review_handler, inputs=[ current_qa_id, chart_dropdown, source_dropdown, chart_type_dropdown, model_dropdown, qa_question_display, qa_answer_display, status_radio, modified_question, modified_answer, issue_type_dropdown, comment_textbox, reviewer_input ], outputs=[save_result] ).then( fn=lambda: gr.Textbox(visible=True), outputs=[save_result] ) # 导出 export_btn.click( fn=export_reviews_handler, outputs=[export_result] ).then( fn=lambda: gr.Textbox(visible=True), outputs=[export_result] ) return app # ============== 主入口 ============== if __name__ == "__main__": app = create_ui() app.launch( server_name="0.0.0.0", server_port=7860, share=True )