tools / utils /upload /compare.py
Adinosaur's picture
Upload folder using huggingface_hub
1c980b1 verified
import json
def read_original_file(file_path):
original_data = {}
custom_id_order = []
with open(file_path, 'r', encoding='utf-8') as f:
for line in f:
entry = json.loads(line.strip())
custom_id = entry.get('custom_id')
if not custom_id:
continue
# 提取用户问题
body = entry.get('body', {})
messages = body.get('messages', [])
user_content = None
for msg in messages:
if msg.get('role') == 'user':
user_content = msg.get('content')
break
if user_content is not None:
original_data[custom_id] = user_content
custom_id_order.append(custom_id)
return custom_id_order, original_data
def read_output_file(file_path):
output_data = {}
with open(file_path, 'r', encoding='utf-8') as f:
for line in f:
entry = json.loads(line.strip())
custom_id = entry.get('custom_id')
if not custom_id:
continue
# 提取模型输出
response = entry.get('response', {})
body = response.get('body', {})
choices = body.get('choices', [])
model_output = ''
if choices:
message = choices[0].get('message', {})
model_output = message.get('content', '')
output_data[custom_id] = model_output
return output_data
def main():
# 获取用户输入路径
original_path = input("请输入原始请求文件路径:").strip()
output_path = input("请输入大模型输出文件路径:").strip()
save_path = input("请输入结果保存路径:").strip()
# 读取数据
custom_id_order, original_data = read_original_file(original_path)
output_data = read_output_file(output_path)
# 写入结果文件
with open(save_path, 'w', encoding='utf-8') as f:
for i, cid in enumerate(custom_id_order):
original_question = original_data.get(cid, '')
model_output = output_data.get(cid, '')
f.write(f"custom_id: {cid}\n")
f.write(f"原问题: {original_question}\n")
f.write(f"大模型输出: {model_output}\n")
# 组间空行(最后一组不空)
if i != len(custom_id_order) - 1:
f.write('\n')
if __name__ == "__main__":
main()