#################################################### # 统计json文件中text出现的次数,现在没什么用 #################################################### import json def analyze_text_content(file_path): # 读取JSON文件 with open(file_path, 'r', encoding='utf-8') as f: data = json.load(f) # 获取所有text内容 text_list = [item['text'] for item in data] # 统计text总数 total_texts = len(text_list) # 找出重复的text text_count = {} for text in text_list: text_count[text] = text_count.get(text, 0) + 1 # 筛选出重复项(出现次数大于1的) duplicates = {text: count for text, count in text_count.items() if count > 1} return total_texts, duplicates # 使用函数 file_path = r"F:\GeoLLM\output\zero_shot\deepseek-ai\DeepSeek-R1.json" total, duplicates = analyze_text_content(file_path) print(f"文件中共包含 {total} 个text") if duplicates: print("\n重复的text项:") for text, count in duplicates.items(): print(f"出现 {count} 次的text: {text}") else: print("\n没有发现重复的text")