Chordia / configs /training_config.yaml
Corolin's picture
first commit
0a6452f
# 训练配置文件
# Training Configuration
# 训练基本信息
training_info:
experiment_name: "emotion_prediction_v1"
description: "基于MLP的情绪与生理状态变化预测模型训练"
seed: 42
# 数据配置
data:
# 数据路径
train_data_path: "data/train.csv"
val_data_path: "data/val.csv"
test_data_path: "data/test.csv"
# 数据预处理
preprocessing:
# 特征标准化
feature_scaling:
method: "standard" # standard, min_max, robust, none
pad_features: "standard" # PAD特征标准化方法
vitality_feature: "min_max" # 活力值标准化方法
# 数据增强
augmentation:
enabled: false
noise_std: 0.01
mixup_alpha: 0.2
# 数据加载
dataloader:
batch_size: 2048
num_workers: 2
pin_memory: true
shuffle: true
drop_last: false
normalize_features: true
normalize_labels: false
# GPU预加载优化(实验性功能)
# ⚠️ 仅适用于小数据集(能完全放入GPU显存)
# 优点:消除CPU-GPU传输开销,训练速度提升1-5%
# 缺点:占用更多显存,不支持数据增强,不适合大数据集
preload_to_gpu:
enabled: true # 是否启用GPU预加载
batch_size: 8192 # GPU上的批次大小(可设置更大,如4096/8192)
apply_to_validation: true # 是否同时应用到验证集
input_dim: 7 # 输入特征维度(用于正确分割特征和标签)
output_dim: 3 # 输出标签维度(ΔPAD 3维,压力动态计算)
# 训练超参数
training:
# 优化器配置 - 使用AdamW结合L2正则化
optimizer:
type: "AdamW" # Adam, SGD, AdamW, RMSprop
learning_rate: 0.0005 # 10⁻⁴ 到 10⁻³ 范围内
weight_decay: 0 # L2正则化
betas: [0.9, 0.999]
eps: 0.00000001
# 学习率调度 - 使用Cosine Decay调度器
scheduler:
type: "CosineAnnealingLR" # StepLR, CosineAnnealingLR, ReduceLROnPlateau
T_max: 600 # 与max_epochs相同
eta_min: 0.00001 # 最小学习率
verbose: true
# 训练轮次
epochs:
max_epochs: 600
early_stopping:
enabled: true
patience: 150 # 监控10-20个Epoch
min_delta: 0
# min_delta: 0.0001
monitor: "val_mae" # 可选: val_loss, val_mae, val_r2_robust, val_r2_mean
mode: "min"
# 损失函数
loss:
type: "MSELoss" # MSELoss, L1Loss, SmoothL1Loss, HuberLoss
reduction: "mean"
# 多任务损失权重
multi_task_weights:
delta_pad_p: 1.0 # P维度权重
delta_pad_a: 20.0 # A维度权重
delta_pad_d: 20.0 # D维度权重
# 验证配置
validation:
# 验证频率
val_frequency: 1 # 每多少个epoch验证一次
# 验证指标
metrics:
- "MSE"
- "MAE"
- "RMSE"
- "R2"
- "MAPE"
# 模型选择
model_selection:
criterion: "val_loss" # val_loss, val_mae, val_r2
mode: "min"
# 日志和保存配置
logging:
# 日志级别
level: "INFO" # DEBUG, INFO, WARNING, ERROR
# 日志文件
log_dir: "logs"
log_file: "training.log"
# TensorBoard
tensorboard:
enabled: true
log_dir: "runs"
comment: ""
# 进度条
progress_bar:
enabled: true
update_frequency: 10
# 检查点保存
checkpointing:
# 保存目录
save_dir: "checkpoints"
# 保存策略
save_strategy: "best" # best, last, all
# 文件命名
filename_template: "model_epoch_{epoch}_val_{val_loss:.4f}.pth"
# 保存内容
save_items:
- "model_state_dict"
- "optimizer_state_dict"
- "scheduler_state_dict"
- "epoch"
- "loss"
- "metrics"
- "config"
# 硬件配置
hardware:
# 设备选择
device: "auto" # auto, cpu, cuda, mps
# GPU配置
gpu:
id: 0 # GPU ID
memory_fraction: 0.9 # GPU内存使用比例
allow_growth: true
# 混合精度训练
mixed_precision:
enabled: true
opt_level: "O1" # O0, O1, O2, O3
# 调试配置
debug:
# 调试模式
enabled: false
# 快速训练(用于调试)
fast_train:
enabled: false
max_epochs: 5
batch_size: 8
subset_size: 100
# 梯度检查
gradient_checking:
enabled: false
clip_value: 1.0
# 数据检查
data_checking:
enabled: true
check_nan: true
check_inf: true
check_range: true
# 实验跟踪
experiment_tracking:
# 是否启用实验跟踪
enabled: false
# MLflow配置
mlflow:
tracking_uri: "http://localhost:5000"
experiment_name: "emotion_prediction"
run_name: null
tags: {}
params: {}