import json def read_original_file(file_path): original_data = {} custom_id_order = [] with open(file_path, 'r', encoding='utf-8') as f: for line in f: entry = json.loads(line.strip()) custom_id = entry.get('custom_id') if not custom_id: continue # 提取用户问题 body = entry.get('body', {}) messages = body.get('messages', []) user_content = None for msg in messages: if msg.get('role') == 'user': user_content = msg.get('content') break if user_content is not None: original_data[custom_id] = user_content custom_id_order.append(custom_id) return custom_id_order, original_data def read_output_file(file_path): output_data = {} with open(file_path, 'r', encoding='utf-8') as f: for line in f: entry = json.loads(line.strip()) custom_id = entry.get('custom_id') if not custom_id: continue # 提取模型输出 response = entry.get('response', {}) body = response.get('body', {}) choices = body.get('choices', []) model_output = '' if choices: message = choices[0].get('message', {}) model_output = message.get('content', '') output_data[custom_id] = model_output return output_data def main(): # 获取用户输入路径 original_path = input("请输入原始请求文件路径:").strip() output_path = input("请输入大模型输出文件路径:").strip() save_path = input("请输入结果保存路径:").strip() # 读取数据 custom_id_order, original_data = read_original_file(original_path) output_data = read_output_file(output_path) # 写入结果文件 with open(save_path, 'w', encoding='utf-8') as f: for i, cid in enumerate(custom_id_order): original_question = original_data.get(cid, '') model_output = output_data.get(cid, '') f.write(f"custom_id: {cid}\n") f.write(f"原问题: {original_question}\n") f.write(f"大模型输出: {model_output}\n") # 组间空行(最后一组不空) if i != len(custom_id_order) - 1: f.write('\n') if __name__ == "__main__": main()