Spaces:

pengfali
/

GeoLLM

Runtime error

GeoLLM / test_1.py

Pengfa Li

Upload folder using huggingface_hub

badcf3c verified 6 months ago

1.2 kB

	####################################################
	# 统计json文件中text出现的次数,现在没什么用
	####################################################


	import json

	def analyze_text_content(file_path):
	# 读取JSON文件
	with open(file_path, 'r', encoding='utf-8') as f:
	data = json.load(f)

	# 获取所有text内容
	text_list = [item['text'] for item in data]

	# 统计text总数
	total_texts = len(text_list)

	# 找出重复的text
	text_count = {}
	for text in text_list:
	text_count[text] = text_count.get(text, 0) + 1

	# 筛选出重复项（出现次数大于1的）
	duplicates = {text: count for text, count in text_count.items() if count > 1}

	return total_texts, duplicates

	# 使用函数
	file_path = r"F:\GeoLLM\output\zero_shot\deepseek-ai\DeepSeek-R1.json"
	total, duplicates = analyze_text_content(file_path)

	print(f"文件中共包含 {total} 个text")
	if duplicates:
	print("\n重复的text项：")
	for text, count in duplicates.items():
	print(f"出现 {count} 次的text: {text}")
	else:
	print("\n没有发现重复的text")