| import os |
| import re |
| from transformers import AutoTokenizer |
| import json |
| import matplotlib.pyplot as plt |
|
|
| |
| parent_folder = "/mnt/lyc/wuxinrui/Qwen2.5-Math/evaluation" |
| pattern = re.compile(r"MODEL-.*-TIP-.*-STAGE-add-DATA-.*") |
|
|
| entry_list = [] |
| setting_names = [] |
| |
| tokenizer = AutoTokenizer.from_pretrained("/mnt/lyc/wuxinrui/LLaMA-Factory/TCMv4_8ratio/1_5B_TCMv4_8ratio_models/models", trust_remote_code=True) |
|
|
| def calculate_token_length(text): |
| """计算文本的token长度""" |
| tokens = tokenizer(text)['input_ids'] |
| return len(tokens) |
|
|
| if __name__ == "__main__": |
| for folder in os.listdir(parent_folder): |
| |
| if folder == "MODEL-7B_TCMv4_8ratio_v1_global_step_180-TIP-8ratio-STAGE-2-DATA-math500": |
| entry_list.append(os.path.join(parent_folder, folder)) |
| setting_names.append(folder) |
|
|
| for entry, setting_name in zip(entry_list, setting_names): |
| token_length_metrics = [] |
| final_answer_metrics = [] |
| |
| for sub_entry in os.listdir(entry): |
| if not os.path.isdir(os.path.join(entry, sub_entry)): |
| continue |
| |
| for root, dirs, files in os.walk(os.path.join(entry, sub_entry)): |
| for file in files: |
| if "metrics" in file: |
| continue |
| |
| cot_answer_path = os.path.join(root, file) |
| |
| with open(cot_answer_path, "r") as f: |
| token_length_data = {} |
| final_answer_data = {} |
| budget_length = int(sub_entry) |
| token_length_data['budget_length'] = budget_length |
| final_answer_data['budget_length'] = budget_length |
| total_tokens = 0 |
| answer_count = 0 |
| single_final_answer_count = 0 |
| multiple_final_answer_count = 0 |
| total_delta_length = 0 |
| valid_delta_count = 0 |
| |
| for line in f: |
| data = json.loads(line) |
| answer_text = data['code'][0] |
| token_length = calculate_token_length(answer_text) |
| total_tokens += token_length |
| answer_count += 1 |
| |
| |
| if token_length < budget_length: |
| delta = budget_length - token_length |
| total_delta_length += delta |
| valid_delta_count += 1 |
| |
| |
| first_match = answer_text.find("</think>") |
| if first_match != -1: |
| modified_text = answer_text[:first_match] + answer_text[first_match + len("</think>"):] |
| second_match = modified_text.find("</think>") |
| |
| if second_match == -1: |
| single_final_answer_count += 1 |
| else: |
| multiple_final_answer_count += 1 |
| |
| avg_token_length = total_tokens / answer_count if answer_count > 0 else 0 |
| token_length_data['avg_token_length'] = avg_token_length |
| token_length_data['total_tokens'] = total_tokens |
| |
| |
| avg_delta_length = total_delta_length / valid_delta_count if valid_delta_count > 0 else 0 |
| token_length_data['avg_delta_length'] = avg_delta_length |
| token_length_data['avg_delta_length/BUDGET'] = avg_delta_length / token_length_data['budget_length'] |
| token_length_data['valid_delta_count'] = valid_delta_count |
| |
| token_length_metrics.append(token_length_data) |
| |
| final_answer_data['single_final_answer_count'] = single_final_answer_count |
| final_answer_data['multiple_final_answer_count'] = multiple_final_answer_count |
| final_answer_data['total_answer_count'] = answer_count |
| final_answer_data['multiple_final_answer_ratio'] = multiple_final_answer_count / answer_count if answer_count > 0 else 0 |
| final_answer_metrics.append(final_answer_data) |
| |
| |
| token_length_metrics.sort(key=lambda x: x['budget_length']) |
| final_answer_metrics.sort(key=lambda x: x['budget_length']) |
| |
| |
| fig, ax = plt.subplots(figsize=(10, 6)) |
| budget_lengths = [data['budget_length'] for data in token_length_metrics] |
| avg_lengths = [data['avg_token_length'] for data in token_length_metrics] |
| |
| length_csv = os.path.join(entry, "token_length_metrics.csv") |
| with open(length_csv, "w") as f: |
| f.write("budget_length,avg_token_length,total_tokens,avg_delta_length,valid_delta_count, avg_delta_length/BUDGET]\n") |
| for data in token_length_metrics: |
| f.write(f"{data['budget_length']},{data['avg_token_length']},{data['total_tokens']},{data['avg_delta_length']},{data['valid_delta_count']}, {data['avg_delta_length/BUDGET']}\n") |
| |
| ax.plot(budget_lengths, avg_lengths, marker='o', color='blue', linewidth=2) |
| ax.set_title(f"Average Token Length by Budget Length - {setting_name}") |
| ax.set_xlabel('Budget Length') |
| ax.set_ylabel('Average Token Length') |
| ax.grid(True) |
| |
| plt.tight_layout() |
| pic_name = os.path.join(entry, "token_length_metrics.png") |
| plt.savefig(pic_name, dpi=300) |
| print(f"Saved figure as {pic_name}") |
| plt.close() |
| |
| |
| fig, ax = plt.subplots(figsize=(10, 6)) |
| avg_deltas = [data['avg_delta_length'] for data in token_length_metrics] |
| |
| ax.plot(budget_lengths, avg_deltas, marker='o', color='green', linewidth=2) |
| ax.set_title(f"Average Delta Length by Budget Length - {setting_name}") |
| ax.set_xlabel('Budget Length') |
| ax.set_ylabel('Average Delta Length (budget - actual)') |
| ax.grid(True) |
| |
| plt.tight_layout() |
| delta_pic_name = os.path.join(entry, "delta_length_metrics.png") |
| plt.savefig(delta_pic_name, dpi=300) |
| print(f"Saved delta figure as {delta_pic_name}") |
| plt.close() |
| |
| |
| final_answer_csv = os.path.join(entry, "final_answer_metrics.csv") |
| with open(final_answer_csv, "w") as f: |
| f.write("budget_length,single_final_answer_count,multiple_final_answer_count,total_answer_count,multiple_final_answer_ratio\n") |
| for data in final_answer_metrics: |
| f.write(f"{data['budget_length']},{data['single_final_answer_count']},{data['multiple_final_answer_count']},{data['total_answer_count']},{data['multiple_final_answer_ratio']}\n") |