File size: 7,727 Bytes
7155cf2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import os
import re
from transformers import AutoTokenizer
import json
import matplotlib.pyplot as plt
# 父文件夹路径
parent_folder = "/mnt/lyc/wuxinrui/Qwen2.5-Math/evaluation"
pattern = re.compile(r"MODEL-.*-TIP-.*-STAGE-add-DATA-.*")
entry_list = []
setting_names = []
# tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-1.8B", trust_remote_code=True) # 使用合适的tokenizer
tokenizer = AutoTokenizer.from_pretrained("/mnt/lyc/wuxinrui/LLaMA-Factory/TCMv4_8ratio/1_5B_TCMv4_8ratio_models/models", trust_remote_code=True)
def calculate_token_length(text):
"""计算文本的token长度"""
tokens = tokenizer(text)['input_ids']
return len(tokens)
if __name__ == "__main__":
for folder in os.listdir(parent_folder):
# if pattern.match(folder):
if folder == "MODEL-DS_QW_1_5B-TIP-prompt_v2-STAGE-2-DATA-math500":
entry_list.append(os.path.join(parent_folder, folder))
setting_names.append(folder)
for entry, setting_name in zip(entry_list, setting_names):
token_length_metrics = []
final_answer_metrics = []
for sub_entry in os.listdir(entry):
if not os.path.isdir(os.path.join(entry, sub_entry)):
continue
for root, dirs, files in os.walk(os.path.join(entry, sub_entry)):
for file in files:
if "metrics" in file:
continue
cot_answer_path = os.path.join(root, file)
with open(cot_answer_path, "r") as f:
token_length_data = {}
final_answer_data = {}
budget_length = int(sub_entry)
token_length_data['budget_length'] = budget_length
final_answer_data['budget_length'] = budget_length
total_tokens = 0
answer_count = 0
single_final_answer_count = 0
multiple_final_answer_count = 0
total_delta_length = 0 # 新增:总长度差
valid_delta_count = 0 # 新增:有效计数(长度小于budget的)
for line in f:
data = json.loads(line)
answer_text = data['code'][0]
token_length = calculate_token_length(answer_text)
total_tokens += token_length
answer_count += 1
# 计算delta_length(新增部分)
if token_length < budget_length:
delta = budget_length - token_length
total_delta_length += delta
valid_delta_count += 1
# 判断 **Final Answer** 的数量
first_match = answer_text.find("</think>")
if first_match != -1:
modified_text = answer_text[:first_match] + answer_text[first_match + len("</think>"):]
second_match = modified_text.find("</think>")
if second_match == -1:
single_final_answer_count += 1
else:
multiple_final_answer_count += 1
avg_token_length = total_tokens / answer_count if answer_count > 0 else 0
token_length_data['avg_token_length'] = avg_token_length
token_length_data['total_tokens'] = total_tokens
# 新增:计算平均delta_length
avg_delta_length = total_delta_length / valid_delta_count if valid_delta_count > 0 else 0
token_length_data['avg_delta_length'] = avg_delta_length
token_length_data['avg_delta_length/BUDGET'] = avg_delta_length / token_length_data['budget_length']
token_length_data['valid_delta_count'] = valid_delta_count
token_length_metrics.append(token_length_data)
final_answer_data['single_final_answer_count'] = single_final_answer_count
final_answer_data['multiple_final_answer_count'] = multiple_final_answer_count
final_answer_data['total_answer_count'] = answer_count
final_answer_data['multiple_final_answer_ratio'] = multiple_final_answer_count / answer_count if answer_count > 0 else 0
final_answer_metrics.append(final_answer_data)
# 按budget_length排序
token_length_metrics.sort(key=lambda x: x['budget_length'])
final_answer_metrics.sort(key=lambda x: x['budget_length'])
# 绘制图表 - 平均token长度
fig, ax = plt.subplots(figsize=(10, 6))
budget_lengths = [data['budget_length'] for data in token_length_metrics]
avg_lengths = [data['avg_token_length'] for data in token_length_metrics]
length_csv = os.path.join(entry, "token_length_metrics.csv")
with open(length_csv, "w") as f:
f.write("budget_length,avg_token_length,total_tokens,avg_delta_length,valid_delta_count, avg_delta_length/BUDGET]\n")
for data in token_length_metrics:
f.write(f"{data['budget_length']},{data['avg_token_length']},{data['total_tokens']},{data['avg_delta_length']},{data['valid_delta_count']}, {data['avg_delta_length/BUDGET']}\n")
ax.plot(budget_lengths, avg_lengths, marker='o', color='blue', linewidth=2)
ax.set_title(f"Average Token Length by Budget Length - {setting_name}")
ax.set_xlabel('Budget Length')
ax.set_ylabel('Average Token Length')
ax.grid(True)
plt.tight_layout()
pic_name = os.path.join(entry, "token_length_metrics.png")
plt.savefig(pic_name, dpi=300)
print(f"Saved figure as {pic_name}")
plt.close()
# 新增:绘制delta_length图表
fig, ax = plt.subplots(figsize=(10, 6))
avg_deltas = [data['avg_delta_length'] for data in token_length_metrics]
ax.plot(budget_lengths, avg_deltas, marker='o', color='green', linewidth=2)
ax.set_title(f"Average Delta Length by Budget Length - {setting_name}")
ax.set_xlabel('Budget Length')
ax.set_ylabel('Average Delta Length (budget - actual)')
ax.grid(True)
plt.tight_layout()
delta_pic_name = os.path.join(entry, "delta_length_metrics.png")
plt.savefig(delta_pic_name, dpi=300)
print(f"Saved delta figure as {delta_pic_name}")
plt.close()
# 保存 **Final Answer** 的统计结果
final_answer_csv = os.path.join(entry, "final_answer_metrics.csv")
with open(final_answer_csv, "w") as f:
f.write("budget_length,single_final_answer_count,multiple_final_answer_count,total_answer_count,multiple_final_answer_ratio\n")
for data in final_answer_metrics:
f.write(f"{data['budget_length']},{data['single_final_answer_count']},{data['multiple_final_answer_count']},{data['total_answer_count']},{data['multiple_final_answer_ratio']}\n") |