Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import json | |
| from eval_qwen3_vl import run_evaluation | |
| from config import OUTPUT_PATH | |
| def start_evaluation(): | |
| """启动评测,返回准确率和结果""" | |
| try: | |
| acc, results = run_evaluation() | |
| # 提取前10题展示(避免界面太长) | |
| sample_results = results[:10] if len(results) > 10 else results | |
| return ( | |
| f"✅ 评测完成!总准确率:{acc:.2%}\n" | |
| f"📊 共评测 {len(results)-1} 题(VisuLogic 1000题)\n" | |
| f"📁 完整结果已保存到:{OUTPUT_PATH}", | |
| json.dumps(sample_results, ensure_ascii=False, indent=2) | |
| ) | |
| except Exception as e: | |
| return f"❌ 评测出错:{str(e)}", "" | |
| # 创建Gradio界面 | |
| with gr.Blocks(title="Qwen3-VL VisuLogic 评测") as demo: | |
| gr.Markdown("# Qwen3-VL VisuLogic 评测工具") | |
| gr.Markdown("### 一键运行 VisuLogic 1000题 视觉逻辑推理评测") | |
| with gr.Row(): | |
| run_btn = gr.Button("🚀 开始评测", size="large") | |
| acc_output = gr.Textbox(label="评测结果", lines=5) | |
| sample_output = gr.Textbox(label="前10题详情", lines=10) | |
| # 绑定按钮事件 | |
| run_btn.click(start_evaluation, outputs=[acc_output, sample_output]) | |
| # 启动Gradio(Space自动适配端口) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860) |