Spaces:
Running
Running
| # RVC v2 训练配置 | |
| # NumberBlocks One 音色克隆 | |
| # 模型配置 | |
| model: | |
| name: "RVC v2" | |
| sampling_rate: 48000 # 采样率 | |
| hop_length: 512 # 帧移 | |
| win_length: 2048 # 窗长 | |
| n_fft: 2048 # FFT点数 | |
| # 特征提取 | |
| spec_n_mels: 128 # mel频谱数量 | |
| spec_fmin: 0 # 最小频率 | |
| spec_fmax: 8000 # 最大频率 | |
| # 音高提取 | |
| pitch_fmin: 50 # 最小音高(Hz) | |
| pitch_fmax: 1100 # 最大音高(Hz) | |
| # 嵌入 | |
| speaker_embedder: "contentvec" # 说话人嵌入 | |
| embedder_dim: 256 # 嵌入维度 | |
| # 训练配置 | |
| training: | |
| batch_size: 8 # 批次大小 | |
| epochs: 500 # 训练轮数 | |
| learning_rate: 0.0001 # 学习率 | |
| weight_decay: 0.0001 # 权重衰减 | |
| # 优化器 | |
| optimizer: "AdamW" | |
| betas: [0.9, 0.999] | |
| eps: 1.0e-8 | |
| # 学习率调度器 | |
| scheduler: "StepLR" | |
| step_size: 100 # 每100个epoch调整 | |
| gamma: 0.5 # 学习率衰减因子 | |
| # 验证 | |
| val_every_n_epochs: 25 # 每25个epoch验证一次 | |
| save_top_k: 3 # 保存最好的3个模型 | |
| early_stopping_patience: 50 # 早停耐心值 | |
| # 损失权重 | |
| loss_mel: 1.0 # mel频谱损失权重 | |
| loss_pitch: 1.0 # 音高损失权重 | |
| loss_kl: 0.1 # KL散度损失权重 | |
| # 数据增强 | |
| augmentation: | |
| enabled: true | |
| pitch_shift: [-2, 2] # 音高偏移(半音) | |
| time_stretch: [0.9, 1.1] # 时间拉伸 | |
| gain: [-3, 3] # 增益(dB) | |
| # 数据配置 | |
| data: | |
| train_dir: "data/training_data/audio" # 训练数据目录 | |
| val_dir: "data/training_data/audio" # 验证数据目录(从训练集分割) | |
| val_split: 0.1 # 验证集比例(10%) | |
| sample_rate: 48000 # 音频采样率 | |
| duration: 5.0 # 最大音频时长(秒) | |
| min_duration: 1.0 # 最小音频时长(秒) | |
| # 预处理 | |
| preprocessing: | |
| normalize: true # 音量归一化 | |
| trim_silence: true # 裁剪静音 | |
| trim_threshold: -40 # 静音阈值(dB) | |
| # 模型架构 | |
| architecture: | |
| # 特征提取器 | |
| feature_extractor: | |
| type: "Conv1d" | |
| channels: [1, 64, 128, 256] # 卷积通道数 | |
| kernel_sizes: [7, 7, 7] # 卷积核大小 | |
| strides: [2, 2, 2] # 步长 | |
| # 编码器 | |
| encoder: | |
| type: "Conv1d" | |
| channels: [256, 128, 64] # 卷积通道数 | |
| kernel_sizes: [3, 3, 3] # 卷积核大小 | |
| strides: [1, 1, 1] # 步长 | |
| # 解码器 | |
| decoder: | |
| type: "Conv1d" | |
| channels: [64, 128, 256, 1] # 卷积通道数 | |
| kernel_sizes: [3, 3, 3, 7] # 卷积核大小 | |
| strides: [1, 1, 1, 1] # 步长 | |
| # 输出配置 | |
| output: | |
| save_dir: "checkpoints" # 保存目录 | |
| log_dir: "logs" # 日志目录 | |
| checkpoint_every_n_epochs: 25 # 每25个epoch保存一次 | |
| save_best_only: true # 只保存最佳模型 | |
| overwrite: false # 是否覆盖现有checkpoint | |
| # 推理配置 | |
| inference: | |
| chunk_size: 48000 # 分块大小(1秒) | |
| overlap: 12000 # 重叠区域(0.25秒) | |
| crossfade_overlap: 0.25 # 交叉淡化重叠 | |
| # 后处理 | |
| post_processing: | |
| normalize: true # 音量归一化 | |
| trim_silence: true # 裁剪静音 | |
| fade_ms: 50 # 淡入淡出(毫秒) | |
| # 日志配置 | |
| logging: | |
| level: "INFO" # 日志级别 | |
| console: true # 控制台输出 | |
| tensorboard: true # TensorBoard日志 | |
| wandb: false # WandB日志(可选) | |