import os import re from transformers import AutoTokenizer import json import matplotlib.pyplot as plt # 父文件夹路径 parent_folder = "/mnt/lyc/wuxinrui/Qwen2.5-Math/evaluation" pattern = re.compile(r"MODEL-.*-TIP-.*-STAGE-add-DATA-.*") entry_list = [] setting_names = [] # tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-1.8B", trust_remote_code=True) # 使用合适的tokenizer tokenizer = AutoTokenizer.from_pretrained("/mnt/lyc/wuxinrui/LLaMA-Factory/TCMv4_8ratio/1_5B_TCMv4_8ratio_models/models", trust_remote_code=True) def calculate_token_length(text): """计算文本的token长度""" tokens = tokenizer(text)['input_ids'] return len(tokens) if __name__ == "__main__": for folder in os.listdir(parent_folder): # if pattern.match(folder): if folder == "MODEL-DS_QW_1_5B-TIP-prompt_v2-STAGE-2-DATA-math500": entry_list.append(os.path.join(parent_folder, folder)) setting_names.append(folder) for entry, setting_name in zip(entry_list, setting_names): token_length_metrics = [] final_answer_metrics = [] for sub_entry in os.listdir(entry): if not os.path.isdir(os.path.join(entry, sub_entry)): continue for root, dirs, files in os.walk(os.path.join(entry, sub_entry)): for file in files: if "metrics" in file: continue cot_answer_path = os.path.join(root, file) with open(cot_answer_path, "r") as f: token_length_data = {} final_answer_data = {} budget_length = int(sub_entry) token_length_data['budget_length'] = budget_length final_answer_data['budget_length'] = budget_length total_tokens = 0 answer_count = 0 single_final_answer_count = 0 multiple_final_answer_count = 0 total_delta_length = 0 # 新增:总长度差 valid_delta_count = 0 # 新增:有效计数(长度小于budget的) for line in f: data = json.loads(line) answer_text = data['code'][0] token_length = calculate_token_length(answer_text) total_tokens += token_length answer_count += 1 # 计算delta_length(新增部分) if token_length < budget_length: delta = budget_length - token_length total_delta_length += delta valid_delta_count += 1 # 判断 **Final Answer** 的数量 first_match = answer_text.find("") if first_match != -1: modified_text = answer_text[:first_match] + answer_text[first_match + len(""):] second_match = modified_text.find("") if second_match == -1: single_final_answer_count += 1 else: multiple_final_answer_count += 1 avg_token_length = total_tokens / answer_count if answer_count > 0 else 0 token_length_data['avg_token_length'] = avg_token_length token_length_data['total_tokens'] = total_tokens # 新增:计算平均delta_length avg_delta_length = total_delta_length / valid_delta_count if valid_delta_count > 0 else 0 token_length_data['avg_delta_length'] = avg_delta_length token_length_data['avg_delta_length/BUDGET'] = avg_delta_length / token_length_data['budget_length'] token_length_data['valid_delta_count'] = valid_delta_count token_length_metrics.append(token_length_data) final_answer_data['single_final_answer_count'] = single_final_answer_count final_answer_data['multiple_final_answer_count'] = multiple_final_answer_count final_answer_data['total_answer_count'] = answer_count final_answer_data['multiple_final_answer_ratio'] = multiple_final_answer_count / answer_count if answer_count > 0 else 0 final_answer_metrics.append(final_answer_data) # 按budget_length排序 token_length_metrics.sort(key=lambda x: x['budget_length']) final_answer_metrics.sort(key=lambda x: x['budget_length']) # 绘制图表 - 平均token长度 fig, ax = plt.subplots(figsize=(10, 6)) budget_lengths = [data['budget_length'] for data in token_length_metrics] avg_lengths = [data['avg_token_length'] for data in token_length_metrics] length_csv = os.path.join(entry, "token_length_metrics.csv") with open(length_csv, "w") as f: f.write("budget_length,avg_token_length,total_tokens,avg_delta_length,valid_delta_count, avg_delta_length/BUDGET]\n") for data in token_length_metrics: f.write(f"{data['budget_length']},{data['avg_token_length']},{data['total_tokens']},{data['avg_delta_length']},{data['valid_delta_count']}, {data['avg_delta_length/BUDGET']}\n") ax.plot(budget_lengths, avg_lengths, marker='o', color='blue', linewidth=2) ax.set_title(f"Average Token Length by Budget Length - {setting_name}") ax.set_xlabel('Budget Length') ax.set_ylabel('Average Token Length') ax.grid(True) plt.tight_layout() pic_name = os.path.join(entry, "token_length_metrics.png") plt.savefig(pic_name, dpi=300) print(f"Saved figure as {pic_name}") plt.close() # 新增:绘制delta_length图表 fig, ax = plt.subplots(figsize=(10, 6)) avg_deltas = [data['avg_delta_length'] for data in token_length_metrics] ax.plot(budget_lengths, avg_deltas, marker='o', color='green', linewidth=2) ax.set_title(f"Average Delta Length by Budget Length - {setting_name}") ax.set_xlabel('Budget Length') ax.set_ylabel('Average Delta Length (budget - actual)') ax.grid(True) plt.tight_layout() delta_pic_name = os.path.join(entry, "delta_length_metrics.png") plt.savefig(delta_pic_name, dpi=300) print(f"Saved delta figure as {delta_pic_name}") plt.close() # 保存 **Final Answer** 的统计结果 final_answer_csv = os.path.join(entry, "final_answer_metrics.csv") with open(final_answer_csv, "w") as f: f.write("budget_length,single_final_answer_count,multiple_final_answer_count,total_answer_count,multiple_final_answer_ratio\n") for data in final_answer_metrics: f.write(f"{data['budget_length']},{data['single_final_answer_count']},{data['multiple_final_answer_count']},{data['total_answer_count']},{data['multiple_final_answer_ratio']}\n")