import re import pandas as pd def parse_log_file(log_file_path): # 初始化存储数据的列表 base_training_data = [] fine_tuning_data = [] with open(log_file_path, 'r', encoding='utf-8') as f: content = f.read() # 提取基础训练数据 base_pattern = r'Epoch (\d+)/50 - Train Loss: ([\d.]+), Train Acc: ([\d.]+), Val Loss: ([\d.]+), Val Acc: ([\d.]+)' base_matches = re.finditer(base_pattern, content) for match in base_matches: epoch = int(match.group(1)) train_loss = float(match.group(2)) train_acc = float(match.group(3)) val_loss = float(match.group(4)) val_acc = float(match.group(5)) # 如果epoch小于等于50,认为是基础训练数据 if epoch <= 50: base_training_data.append({ 'epoch': epoch, 'train_loss': train_loss, 'train_acc': train_acc, 'val_loss': val_loss, 'val_acc': val_acc }) # 提取微调训练数据 fine_tune_pattern = r'Fine-tuning Epoch (\d+)/50 - Train Acc: ([\d.]+), Val Acc: ([\d.]+)' fine_tune_matches = re.finditer(fine_tune_pattern, content) for match in fine_tune_matches: epoch = int(match.group(1)) train_acc = float(match.group(2)) val_acc = float(match.group(3)) fine_tuning_data.append({ 'epoch': epoch, 'train_acc': train_acc, 'val_acc': val_acc }) # 转换为DataFrame并保存为CSV if base_training_data: base_df = pd.DataFrame(base_training_data) base_df.to_csv('base_training_metrics.csv', index=False) print(f"基础训练数据已保存到 base_training_metrics.csv,共 {len(base_training_data)} 条记录") if fine_tuning_data: fine_tune_df = pd.DataFrame(fine_tuning_data) fine_tune_df.to_csv('fine_tuning_metrics.csv', index=False) print(f"微调训练数据已保存到 fine_tuning_metrics.csv,共 {len(fine_tuning_data)} 条记录") if __name__ == '__main__': # 指定日志文件路径 log_file_path = '2025-04-11_14-13-49_train.log' parse_log_file(log_file_path)