EvanDu's picture
Upload folder using huggingface_hub
c3ece38 verified
import re
import pandas as pd
def parse_log_file(log_file_path):
# 初始化存储数据的列表
base_training_data = []
fine_tuning_data = []
with open(log_file_path, 'r', encoding='utf-8') as f:
content = f.read()
# 提取基础训练数据
base_pattern = r'Epoch (\d+)/50 - Train Loss: ([\d.]+), Train Acc: ([\d.]+), Val Loss: ([\d.]+), Val Acc: ([\d.]+)'
base_matches = re.finditer(base_pattern, content)
for match in base_matches:
epoch = int(match.group(1))
train_loss = float(match.group(2))
train_acc = float(match.group(3))
val_loss = float(match.group(4))
val_acc = float(match.group(5))
# 如果epoch小于等于50,认为是基础训练数据
if epoch <= 50:
base_training_data.append({
'epoch': epoch,
'train_loss': train_loss,
'train_acc': train_acc,
'val_loss': val_loss,
'val_acc': val_acc
})
# 提取微调训练数据
fine_tune_pattern = r'Fine-tuning Epoch (\d+)/50 - Train Acc: ([\d.]+), Val Acc: ([\d.]+)'
fine_tune_matches = re.finditer(fine_tune_pattern, content)
for match in fine_tune_matches:
epoch = int(match.group(1))
train_acc = float(match.group(2))
val_acc = float(match.group(3))
fine_tuning_data.append({
'epoch': epoch,
'train_acc': train_acc,
'val_acc': val_acc
})
# 转换为DataFrame并保存为CSV
if base_training_data:
base_df = pd.DataFrame(base_training_data)
base_df.to_csv('base_training_metrics.csv', index=False)
print(f"基础训练数据已保存到 base_training_metrics.csv,共 {len(base_training_data)} 条记录")
if fine_tuning_data:
fine_tune_df = pd.DataFrame(fine_tuning_data)
fine_tune_df.to_csv('fine_tuning_metrics.csv', index=False)
print(f"微调训练数据已保存到 fine_tuning_metrics.csv,共 {len(fine_tuning_data)} 条记录")
if __name__ == '__main__':
# 指定日志文件路径
log_file_path = '2025-04-11_14-13-49_train.log'
parse_log_file(log_file_path)