NeMo_Canary / examples /audio /conf /predictive_conformer.yaml
Respair's picture
Upload folder using huggingface_hub
b386992 verified
name: predictive_conformer
model:
type: predictive
sample_rate: 16000
skip_nan_grad: false
num_outputs: 1
# non-streaming config, use input normalization
normalize_input: true # normalize the input signal to 0dBFS
train_ds:
shar_path: ???
use_lhotse: true
truncate_duration: 4.09 # Number of STFT time frames = 1 + audio_duration // encoder.hop_length = 512
truncate_offset_type: random
batch_size: 64 # batch size may be increased based on the available memory
shuffle: true
num_workers: 8
pin_memory: true
validation_ds:
manifest_filepath: ???
input_key: noisy_filepath
target_key: clean_filepath
batch_size: 8
shuffle: false
num_workers: 4
pin_memory: true
encoder:
_target_: nemo.collections.audio.modules.transforms.AudioToSpectrogram
fft_length: 510 # Number of subbands in the STFT = fft_length // 2 + 1 = 256
hop_length: 128
magnitude_power: 0.5
scale: 0.33
decoder:
_target_: nemo.collections.audio.modules.transforms.SpectrogramToAudio
fft_length: ${model.encoder.fft_length}
hop_length: ${model.encoder.hop_length}
magnitude_power: ${model.encoder.magnitude_power}
scale: ${model.encoder.scale}
estimator:
_target_: nemo.collections.audio.parts.submodules.conformer.SpectrogramConformer
in_channels: 1 # single-channel noisy input
out_channels: 1 # single-channel estimate
feat_in: 256 # input feature dimension = number of subbands
n_layers: 8 # number of layers in the model
d_model: 512 # the hidden size of the model
subsampling_factor: 1 # subsampling factor for the model
self_attention_model: 'rel_pos'
n_heads: 8 # number of heads for the model
# streaming-related arguments
# - this is a non-streaming config
conv_context_size: null
conv_norm_type: 'layer_norm'
causal_downsampling: False
att_context_size: [-1, -1]
att_context_style: 'regular'
loss:
_target_: nemo.collections.audio.losses.MSELoss # computed in the time domain
metrics:
val:
sisdr: # output SI-SDR
_target_: torchmetrics.audio.ScaleInvariantSignalDistortionRatio
optim:
name: adamw
lr: 1e-3
# optimizer arguments
betas: [0.9, 0.98]
weight_decay: 1e-3
# scheduler setup
sched:
name: CosineAnnealing
# scheduler config override
warmup_steps: null
warmup_ratio: 0.1
min_lr: 1e-5
trainer:
devices: -1 # number of GPUs, -1 would use all available GPUs
num_nodes: 1
max_epochs: -1
max_steps: ??? # needs to be set for shar datasets
limit_train_batches: ??? # number of batches to train on in each pseudo-epoch
val_check_interval: ??? # run validation after this many training steps
accelerator: auto
strategy: ddp
use_distributed_sampler: false # required for lhotse
accumulate_grad_batches: 1
gradient_clip_val: null
precision: 32 # Should be set to 16 for O1 and O2 to enable the AMP.
log_every_n_steps: 100 # Interval of logging.
enable_progress_bar: true
num_sanity_val_steps: 0 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it
check_val_every_n_epoch: null # number of evaluations on validation every n epochs
sync_batchnorm: true
enable_checkpointing: false # Provided by exp_manager
logger: false # Provided by exp_manager
exp_manager:
exp_dir: null
name: ${name}
# logging
create_tensorboard_logger: true
# checkpointing
create_checkpoint_callback: true
checkpoint_callback_params:
# in case of multiple validation sets, first one is used
monitor: val_sisdr
mode: max
save_top_k: 5
always_save_nemo: true # saves the checkpoints as nemo files instead of PTL checkpoints
# early stopping
create_early_stopping_callback: true
early_stopping_callback_params:
monitor: val_sisdr
mode: max
min_delta: 0.0
patience: 20 # patience in terms of check_val_every_n_epoch
verbose: true
strict: false # Should be False to avoid a runtime error where EarlyStopping says monitor is unavailable, which sometimes happens with resumed training.
resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc.
# you need to set these two to true to continue the training
resume_if_exists: false
resume_ignore_no_checkpoint: false
# You may use this section to create a W&B logger
create_wandb_logger: false
wandb_logger_kwargs:
name: null
project: null