File size: 7,727 Bytes
7155cf2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import os
import re
from transformers import AutoTokenizer
import json
import matplotlib.pyplot as plt

# 父文件夹路径
parent_folder = "/mnt/lyc/wuxinrui/Qwen2.5-Math/evaluation"
pattern = re.compile(r"MODEL-.*-TIP-.*-STAGE-add-DATA-.*")

entry_list = []
setting_names = []
# tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-1.8B", trust_remote_code=True)  # 使用合适的tokenizer
tokenizer = AutoTokenizer.from_pretrained("/mnt/lyc/wuxinrui/LLaMA-Factory/TCMv4_8ratio/1_5B_TCMv4_8ratio_models/models", trust_remote_code=True)

def calculate_token_length(text):
    """计算文本的token长度"""
    tokens = tokenizer(text)['input_ids']
    return len(tokens)

if __name__ == "__main__":
    for folder in os.listdir(parent_folder):
        # if pattern.match(folder):
        if folder == "MODEL-DS_QW_1_5B-TIP-prompt_v2-STAGE-2-DATA-math500":
            entry_list.append(os.path.join(parent_folder, folder))
            setting_names.append(folder)

    for entry, setting_name in zip(entry_list, setting_names):
        token_length_metrics = []
        final_answer_metrics = []
        
        for sub_entry in os.listdir(entry):
            if not os.path.isdir(os.path.join(entry, sub_entry)):
                continue
                
            for root, dirs, files in os.walk(os.path.join(entry, sub_entry)):
                for file in files:
                    if "metrics" in file:
                        continue
                        
                    cot_answer_path = os.path.join(root, file)
                    
                    with open(cot_answer_path, "r") as f:
                        token_length_data = {}
                        final_answer_data = {}
                        budget_length = int(sub_entry)
                        token_length_data['budget_length'] = budget_length
                        final_answer_data['budget_length'] = budget_length
                        total_tokens = 0
                        answer_count = 0
                        single_final_answer_count = 0
                        multiple_final_answer_count = 0
                        total_delta_length = 0  # 新增:总长度差
                        valid_delta_count = 0   # 新增:有效计数(长度小于budget的)
                        
                        for line in f:
                            data = json.loads(line)
                            answer_text = data['code'][0]
                            token_length = calculate_token_length(answer_text)
                            total_tokens += token_length
                            answer_count += 1
                            
                            # 计算delta_length(新增部分)
                            if token_length < budget_length:
                                delta = budget_length - token_length
                                total_delta_length += delta
                                valid_delta_count += 1
                            
                            # 判断 **Final Answer** 的数量
                            first_match = answer_text.find("</think>")
                            if first_match != -1:
                                modified_text = answer_text[:first_match] + answer_text[first_match + len("</think>"):]
                                second_match = modified_text.find("</think>")
                                
                                if second_match == -1:
                                    single_final_answer_count += 1
                                else:
                                    multiple_final_answer_count += 1
                        
                        avg_token_length = total_tokens / answer_count if answer_count > 0 else 0
                        token_length_data['avg_token_length'] = avg_token_length
                        token_length_data['total_tokens'] = total_tokens
                        
                        # 新增:计算平均delta_length
                        avg_delta_length = total_delta_length / valid_delta_count if valid_delta_count > 0 else 0
                        token_length_data['avg_delta_length'] = avg_delta_length
                        token_length_data['avg_delta_length/BUDGET'] = avg_delta_length / token_length_data['budget_length']
                        token_length_data['valid_delta_count'] = valid_delta_count
                        
                        token_length_metrics.append(token_length_data)
                        
                        final_answer_data['single_final_answer_count'] = single_final_answer_count
                        final_answer_data['multiple_final_answer_count'] = multiple_final_answer_count
                        final_answer_data['total_answer_count'] = answer_count
                        final_answer_data['multiple_final_answer_ratio'] = multiple_final_answer_count / answer_count if answer_count > 0 else 0
                        final_answer_metrics.append(final_answer_data)
        
        # 按budget_length排序
        token_length_metrics.sort(key=lambda x: x['budget_length'])
        final_answer_metrics.sort(key=lambda x: x['budget_length'])
        
        # 绘制图表 - 平均token长度
        fig, ax = plt.subplots(figsize=(10, 6))
        budget_lengths = [data['budget_length'] for data in token_length_metrics]
        avg_lengths = [data['avg_token_length'] for data in token_length_metrics]
        
        length_csv = os.path.join(entry, "token_length_metrics.csv")
        with open(length_csv, "w") as f:
            f.write("budget_length,avg_token_length,total_tokens,avg_delta_length,valid_delta_count, avg_delta_length/BUDGET]\n")
            for data in token_length_metrics:
                f.write(f"{data['budget_length']},{data['avg_token_length']},{data['total_tokens']},{data['avg_delta_length']},{data['valid_delta_count']}, {data['avg_delta_length/BUDGET']}\n")
        
        ax.plot(budget_lengths, avg_lengths, marker='o', color='blue', linewidth=2)
        ax.set_title(f"Average Token Length by Budget Length - {setting_name}")
        ax.set_xlabel('Budget Length')
        ax.set_ylabel('Average Token Length')
        ax.grid(True)
        
        plt.tight_layout()
        pic_name = os.path.join(entry, "token_length_metrics.png")
        plt.savefig(pic_name, dpi=300)
        print(f"Saved figure as {pic_name}")
        plt.close()
        
        # 新增:绘制delta_length图表
        fig, ax = plt.subplots(figsize=(10, 6))
        avg_deltas = [data['avg_delta_length'] for data in token_length_metrics]
        
        ax.plot(budget_lengths, avg_deltas, marker='o', color='green', linewidth=2)
        ax.set_title(f"Average Delta Length by Budget Length - {setting_name}")
        ax.set_xlabel('Budget Length')
        ax.set_ylabel('Average Delta Length (budget - actual)')
        ax.grid(True)
        
        plt.tight_layout()
        delta_pic_name = os.path.join(entry, "delta_length_metrics.png")
        plt.savefig(delta_pic_name, dpi=300)
        print(f"Saved delta figure as {delta_pic_name}")
        plt.close()
        
        # 保存 **Final Answer** 的统计结果
        final_answer_csv = os.path.join(entry, "final_answer_metrics.csv")
        with open(final_answer_csv, "w") as f:
            f.write("budget_length,single_final_answer_count,multiple_final_answer_count,total_answer_count,multiple_final_answer_ratio\n")
            for data in final_answer_metrics:
                f.write(f"{data['budget_length']},{data['single_final_answer_count']},{data['multiple_final_answer_count']},{data['total_answer_count']},{data['multiple_final_answer_ratio']}\n")