conformer-ASR / config.yaml
NickolayFM's picture
Upload logs
08276da verified
model:
_target_: src.model.ConformerModel
input_dim: 128
writer:
_target_: src.logger.CometMLWriter
project_name: pytorch_template_asr_example
workspace: null
run_name: conformer_30m
mode: online
loss_names:
- loss
log_checkpoints: false
id_length: 32
run_id: m2guzao93o9ytjxogwt78mftkyiqalsf
metrics:
train: []
inference:
- _target_: src.metrics.ArgmaxCERMetric
name: CER_(Argmax)
- _target_: src.metrics.ArgmaxWERMetric
name: WER_(Argmax)
- _target_: src.metrics.WER
name: WER
- _target_: src.metrics.CER
name: CER
datasets:
train:
_target_: src.datasets.LibrispeechDataset
part: train-other-500
instance_transforms: ${transforms.instance_transforms.train}
val:
_target_: src.datasets.LibrispeechDataset
part: test-clean
instance_transforms: ${transforms.instance_transforms.inference}
test:
_target_: src.datasets.LibrispeechDataset
part: test-other
instance_transforms: ${transforms.instance_transforms.inference}
dataloader:
_target_: torch.utils.data.DataLoader
batch_size: 30
num_workers: 2
pin_memory: true
transforms:
instance_transforms:
train:
get_spectrogram:
_target_: torchaudio.transforms.MelSpectrogram
sample_rate: 16000
audio:
_target_: torchvision.transforms.v2.Compose
transforms:
- _target_: src.transforms.wav_augs.Gain
sample_rate: 16000
min_gain_in_db: -6
max_gain_in_db: 6
p: 0.2
- _target_: src.transforms.wav_augs.Shift
p: 0.2
- _target_: src.transforms.wav_augs.PitchShift
min_semitones: -2
max_semitones: 2
p: 0.2
- _target_: src.transforms.wav_augs.Noise
p: 0.3
inference:
get_spectrogram:
_target_: torchaudio.transforms.MelSpectrogram
sample_rate: 16000
batch_transforms:
train: null
inference: null
optimizer:
_target_: torch.optim.AdamW
lr: 5.0e-05
lr_scheduler:
_target_: torch.optim.lr_scheduler.OneCycleLR
max_lr: 0.0001
pct_start: 0.1
steps_per_epoch: ${trainer.epoch_len}
epochs: ${trainer.n_epochs}
anneal_strategy: cos
loss_function:
_target_: src.loss.CTCLossWrapper
text_encoder:
_target_: src.text_encoder.CTCTextEncoder
trainer:
log_step: 200
n_epochs: 150
epoch_len: 1300
device_tensors:
- spectrogram
- text_encoded
resume_from: checkpoint-epoch62.pth
device: auto
override: false
monitor: min val_WER_(Argmax)
save_period: 5
early_stop: ${trainer.n_epochs}
save_dir: saved
seed: 1