File size: 2,587 Bytes
08276da | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 | model:
_target_: src.model.ConformerModel
input_dim: 128
writer:
_target_: src.logger.CometMLWriter
project_name: pytorch_template_asr_example
workspace: null
run_name: conformer_30m
mode: online
loss_names:
- loss
log_checkpoints: false
id_length: 32
run_id: m2guzao93o9ytjxogwt78mftkyiqalsf
metrics:
train: []
inference:
- _target_: src.metrics.ArgmaxCERMetric
name: CER_(Argmax)
- _target_: src.metrics.ArgmaxWERMetric
name: WER_(Argmax)
- _target_: src.metrics.WER
name: WER
- _target_: src.metrics.CER
name: CER
datasets:
train:
_target_: src.datasets.LibrispeechDataset
part: train-other-500
instance_transforms: ${transforms.instance_transforms.train}
val:
_target_: src.datasets.LibrispeechDataset
part: test-clean
instance_transforms: ${transforms.instance_transforms.inference}
test:
_target_: src.datasets.LibrispeechDataset
part: test-other
instance_transforms: ${transforms.instance_transforms.inference}
dataloader:
_target_: torch.utils.data.DataLoader
batch_size: 30
num_workers: 2
pin_memory: true
transforms:
instance_transforms:
train:
get_spectrogram:
_target_: torchaudio.transforms.MelSpectrogram
sample_rate: 16000
audio:
_target_: torchvision.transforms.v2.Compose
transforms:
- _target_: src.transforms.wav_augs.Gain
sample_rate: 16000
min_gain_in_db: -6
max_gain_in_db: 6
p: 0.2
- _target_: src.transforms.wav_augs.Shift
p: 0.2
- _target_: src.transforms.wav_augs.PitchShift
min_semitones: -2
max_semitones: 2
p: 0.2
- _target_: src.transforms.wav_augs.Noise
p: 0.3
inference:
get_spectrogram:
_target_: torchaudio.transforms.MelSpectrogram
sample_rate: 16000
batch_transforms:
train: null
inference: null
optimizer:
_target_: torch.optim.AdamW
lr: 5.0e-05
lr_scheduler:
_target_: torch.optim.lr_scheduler.OneCycleLR
max_lr: 0.0001
pct_start: 0.1
steps_per_epoch: ${trainer.epoch_len}
epochs: ${trainer.n_epochs}
anneal_strategy: cos
loss_function:
_target_: src.loss.CTCLossWrapper
text_encoder:
_target_: src.text_encoder.CTCTextEncoder
trainer:
log_step: 200
n_epochs: 150
epoch_len: 1300
device_tensors:
- spectrogram
- text_encoded
resume_from: checkpoint-epoch62.pth
device: auto
override: false
monitor: min val_WER_(Argmax)
save_period: 5
early_stop: ${trainer.n_epochs}
save_dir: saved
seed: 1
|