| """ |
| 生成示例数据脚本 |
| 用于测试审核系统 |
| """ |
| import os |
| import json |
| from pathlib import Path |
|
|
| def create_sample_dataset(): |
| """创建示例数据集""" |
| |
| base_path = Path("./dataset") |
| |
| |
| sources = ["Apache_Echarts", "Plotly", "ChartJS"] |
| chart_types = { |
| "Apache_Echarts": ["bar", "line", "pie"], |
| "Plotly": ["scatter", "bar", "heatmap"], |
| "ChartJS": ["line", "doughnut", "radar"] |
| } |
| models = ["gpt-4", "claude-3", "gemini-pro"] |
| |
| for source in sources: |
| for chart_type in chart_types[source]: |
| |
| web_dir = base_path / "web" / source / chart_type |
| label_dir = base_path / "label" / source / chart_type |
| web_dir.mkdir(parents=True, exist_ok=True) |
| label_dir.mkdir(parents=True, exist_ok=True) |
| |
| for model in models: |
| qa_dir = base_path / "question_answer" / source / chart_type / model |
| qa_dir.mkdir(parents=True, exist_ok=True) |
| |
| |
| for i in range(1, 4): |
| chart_id = f"chart_{str(i).zfill(4)}_{chart_type}" |
| |
| |
| html_content = f"""<!DOCTYPE html> |
| <html> |
| <head> |
| <meta charset="UTF-8"> |
| <title>{chart_id}</title> |
| <script src="https://cdn.jsdelivr.net/npm/echarts@5/dist/echarts.min.js"></script> |
| <style> |
| body {{ margin: 0; padding: 20px; font-family: Arial, sans-serif; }} |
| #chart {{ width: 100%; height: 400px; }} |
| .title {{ text-align: center; color: #333; margin-bottom: 20px; }} |
| </style> |
| </head> |
| <body> |
| <h2 class="title">示例图表 - {source} - {chart_type} #{i}</h2> |
| <div id="chart"></div> |
| <script> |
| var chart = echarts.init(document.getElementById('chart')); |
| var option = {{ |
| title: {{ text: 'Sample {chart_type.capitalize()} Chart' }}, |
| tooltip: {{}}, |
| xAxis: {{ data: ['A', 'B', 'C', 'D', 'E'] }}, |
| yAxis: {{}}, |
| series: [{{ |
| type: '{chart_type}', |
| data: [Math.random() * 100, Math.random() * 100, Math.random() * 100, Math.random() * 100, Math.random() * 100] |
| }}] |
| }}; |
| chart.setOption(option); |
| </script> |
| </body> |
| </html>""" |
| |
| with open(web_dir / f"{chart_id}.html", "w", encoding="utf-8") as f: |
| f.write(html_content) |
| |
| |
| label_data = { |
| "Number": str(i).zfill(4), |
| "Type": chart_type, |
| "Source": source, |
| "Weblink": f"https://example.com/{source}/{chart_type}/{i}", |
| "Topic": f"Sample {chart_type} chart #{i}", |
| "Describe": f"This is a sample {chart_type} chart for testing the review system. It demonstrates the visualization capabilities of {source}.", |
| "Other": "" |
| } |
| |
| with open(label_dir / f"{chart_id}.json", "w", encoding="utf-8") as f: |
| json.dump(label_data, f, ensure_ascii=False, indent=2) |
| |
| |
| for j, model in enumerate(models): |
| qa_dir = base_path / "question_answer" / source / chart_type / model |
| |
| for q in range(1, 3): |
| qa_data = { |
| "id": f"{chart_id}_q{q}", |
| "chart": chart_id, |
| "question": f"在图表 {chart_id} 中,第 {q} 个数据点的值是多少?", |
| "answer": f"约为 {int(50 + q * 10 + j * 5)}" |
| } |
| |
| with open(qa_dir / f"{chart_id}_q{q}.json", "w", encoding="utf-8") as f: |
| json.dump(qa_data, f, ensure_ascii=False, indent=2) |
| |
| print("✅ 示例数据集创建完成!") |
| print(f"📁 数据集位置: {base_path.absolute()}") |
| |
| |
| total_charts = sum(len(chart_types[s]) * 3 for s in sources) |
| total_qa = total_charts * len(models) * 2 |
| print(f"📊 共创建 {total_charts} 个图表") |
| print(f"❓ 共创建 {total_qa} 个问答对") |
|
|
|
|
| if __name__ == "__main__": |
| create_sample_dataset() |
|
|