File size: 2,318 Bytes
c3ece38 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
import re
import pandas as pd
def parse_log_file(log_file_path):
# 初始化存储数据的列表
base_training_data = []
fine_tuning_data = []
with open(log_file_path, 'r', encoding='utf-8') as f:
content = f.read()
# 提取基础训练数据
base_pattern = r'Epoch (\d+)/50 - Train Loss: ([\d.]+), Train Acc: ([\d.]+), Val Loss: ([\d.]+), Val Acc: ([\d.]+)'
base_matches = re.finditer(base_pattern, content)
for match in base_matches:
epoch = int(match.group(1))
train_loss = float(match.group(2))
train_acc = float(match.group(3))
val_loss = float(match.group(4))
val_acc = float(match.group(5))
# 如果epoch小于等于50,认为是基础训练数据
if epoch <= 50:
base_training_data.append({
'epoch': epoch,
'train_loss': train_loss,
'train_acc': train_acc,
'val_loss': val_loss,
'val_acc': val_acc
})
# 提取微调训练数据
fine_tune_pattern = r'Fine-tuning Epoch (\d+)/50 - Train Acc: ([\d.]+), Val Acc: ([\d.]+)'
fine_tune_matches = re.finditer(fine_tune_pattern, content)
for match in fine_tune_matches:
epoch = int(match.group(1))
train_acc = float(match.group(2))
val_acc = float(match.group(3))
fine_tuning_data.append({
'epoch': epoch,
'train_acc': train_acc,
'val_acc': val_acc
})
# 转换为DataFrame并保存为CSV
if base_training_data:
base_df = pd.DataFrame(base_training_data)
base_df.to_csv('base_training_metrics.csv', index=False)
print(f"基础训练数据已保存到 base_training_metrics.csv,共 {len(base_training_data)} 条记录")
if fine_tuning_data:
fine_tune_df = pd.DataFrame(fine_tuning_data)
fine_tune_df.to_csv('fine_tuning_metrics.csv', index=False)
print(f"微调训练数据已保存到 fine_tuning_metrics.csv,共 {len(fine_tuning_data)} 条记录")
if __name__ == '__main__':
# 指定日志文件路径
log_file_path = '2025-04-11_14-13-49_train.log'
parse_log_file(log_file_path) |