File size: 2,570 Bytes
1c980b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import json

def read_original_file(file_path):
    original_data = {}
    custom_id_order = []
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            entry = json.loads(line.strip())
            custom_id = entry.get('custom_id')
            if not custom_id:
                continue
            
            # 提取用户问题
            body = entry.get('body', {})
            messages = body.get('messages', [])
            user_content = None
            for msg in messages:
                if msg.get('role') == 'user':
                    user_content = msg.get('content')
                    break
            
            if user_content is not None:
                original_data[custom_id] = user_content
                custom_id_order.append(custom_id)
    
    return custom_id_order, original_data

def read_output_file(file_path):
    output_data = {}
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            entry = json.loads(line.strip())
            custom_id = entry.get('custom_id')
            if not custom_id:
                continue
            
            # 提取模型输出
            response = entry.get('response', {})
            body = response.get('body', {})
            choices = body.get('choices', [])
            model_output = ''
            if choices:
                message = choices[0].get('message', {})
                model_output = message.get('content', '')
            
            output_data[custom_id] = model_output
    
    return output_data

def main():
    # 获取用户输入路径
    original_path = input("请输入原始请求文件路径:").strip()
    output_path = input("请输入大模型输出文件路径:").strip()
    save_path = input("请输入结果保存路径:").strip()
    
    # 读取数据
    custom_id_order, original_data = read_original_file(original_path)
    output_data = read_output_file(output_path)
    
    # 写入结果文件
    with open(save_path, 'w', encoding='utf-8') as f:
        for i, cid in enumerate(custom_id_order):
            original_question = original_data.get(cid, '')
            model_output = output_data.get(cid, '')
            
            f.write(f"custom_id: {cid}\n")
            f.write(f"原问题: {original_question}\n")
            f.write(f"大模型输出: {model_output}\n")
            
            # 组间空行(最后一组不空)
            if i != len(custom_id_order) - 1:
                f.write('\n')

if __name__ == "__main__":
    main()