Spaces:

Pekku
/

Chart

Sleeping

App Files Files Community

Chart / create_sample_data.py

adddrett

clean init

9fce90e about 1 month ago

raw

history blame contribute delete

4.51 kB

	"""
	生成示例数据脚本
	用于测试审核系统
	"""
	import os
	import json
	from pathlib import Path

	def create_sample_dataset():
	"""创建示例数据集"""

	base_path = Path("./dataset")

	# 示例数据配置
	sources = ["Apache_Echarts", "Plotly", "ChartJS"]
	chart_types = {
	"Apache_Echarts": ["bar", "line", "pie"],
	"Plotly": ["scatter", "bar", "heatmap"],
	"ChartJS": ["line", "doughnut", "radar"]
	}
	models = ["gpt-4", "claude-3", "gemini-pro"]

	for source in sources:
	for chart_type in chart_types[source]:
	# 创建目录
	web_dir = base_path / "web" / source / chart_type
	label_dir = base_path / "label" / source / chart_type
	web_dir.mkdir(parents=True, exist_ok=True)
	label_dir.mkdir(parents=True, exist_ok=True)

	for model in models:
	qa_dir = base_path / "question_answer" / source / chart_type / model
	qa_dir.mkdir(parents=True, exist_ok=True)

	# 为每个图表类型创建示例图表
	for i in range(1, 4):
	chart_id = f"chart_{str(i).zfill(4)}_{chart_type}"

	# 创建 HTML 文件
	html_content = f"""<!DOCTYPE html>
	<html>
	<head>
	<meta charset="UTF-8">
	<title>{chart_id}</title>
	<script src="https://cdn.jsdelivr.net/npm/echarts@5/dist/echarts.min.js"></script>
	<style>
	body {{ margin: 0; padding: 20px; font-family: Arial, sans-serif; }}
	#chart {{ width: 100%; height: 400px; }}
	.title {{ text-align: center; color: #333; margin-bottom: 20px; }}
	</style>
	</head>
	<body>
	<h2 class="title">示例图表 - {source} - {chart_type} #{i}</h2>
	<div id="chart"></div>
	<script>
	var chart = echarts.init(document.getElementById('chart'));
	var option = {{
	title: {{ text: 'Sample {chart_type.capitalize()} Chart' }},
	tooltip: {{}},
	xAxis: {{ data: ['A', 'B', 'C', 'D', 'E'] }},
	yAxis: {{}},
	series: [{{
	type: '{chart_type}',
	data: [Math.random() * 100, Math.random() * 100, Math.random() * 100, Math.random() * 100, Math.random() * 100]
	}}]
	}};
	chart.setOption(option);
	</script>
	</body>
	</html>"""

	with open(web_dir / f"{chart_id}.html", "w", encoding="utf-8") as f:
	f.write(html_content)

	# 创建标签文件
	label_data = {
	"Number": str(i).zfill(4),
	"Type": chart_type,
	"Source": source,
	"Weblink": f"https://example.com/{source}/{chart_type}/{i}",
	"Topic": f"Sample {chart_type} chart #{i}",
	"Describe": f"This is a sample {chart_type} chart for testing the review system. It demonstrates the visualization capabilities of {source}.",
	"Other": ""
	}

	with open(label_dir / f"{chart_id}.json", "w", encoding="utf-8") as f:
	json.dump(label_data, f, ensure_ascii=False, indent=2)

	# 为每个模型创建 QA 文件
	for j, model in enumerate(models):
	qa_dir = base_path / "question_answer" / source / chart_type / model

	for q in range(1, 3):
	qa_data = {
	"id": f"{chart_id}_q{q}",
	"chart": chart_id,
	"question": f"在图表 {chart_id} 中，第 {q} 个数据点的值是多少？",
	"answer": f"约为 {int(50 + q * 10 + j * 5)}"
	}

	with open(qa_dir / f"{chart_id}_q{q}.json", "w", encoding="utf-8") as f:
	json.dump(qa_data, f, ensure_ascii=False, indent=2)

	print("✅ 示例数据集创建完成！")
	print(f"📁 数据集位置: {base_path.absolute()}")

	# 打印统计
	total_charts = sum(len(chart_types[s]) * 3 for s in sources)
	total_qa = total_charts * len(models) * 2
	print(f"📊 共创建 {total_charts} 个图表")
	print(f"❓ 共创建 {total_qa} 个问答对")


	if __name__ == "__main__":
	create_sample_dataset()