| | |
| | |
| | |
| | output_folder: results/TransformerLM_seg_char |
| | save_folder: results/TransformerLM_seg_char/save |
| | train_log: results/TransformerLM_seg_char/train_log.txt |
| | num_workers: 4 |
| |
|
| | data_folder: results/prepare_seg |
| |
|
| | tokenizer_file: results/tokenizer_seg_bpe5k_char/5000_char.model |
| |
|
| | tokenizer: &id001 !new:sentencepiece.SentencePieceProcessor |
| |
|
| | pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer |
| | collect_in: results/TransformerLM_seg_char/tokenizer |
| | loadables: |
| | tokenizer: *id001 |
| | paths: |
| | tokenizer: results/tokenizer_seg_bpe5k_char/5000_char.model |
| |
|
| | train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger |
| | save_file: results/TransformerLM_seg_char/train_log.txt |
| |
|
| | |
| | number_of_epochs: 20 |
| | batch_size: 64 |
| | lr: 1 |
| | accumulation_steps: 2 |
| | ckpt_interval_minutes: 15 |
| |
|
| | epoch_counter: &id004 !new:speechbrain.utils.epoch_loop.EpochCounter |
| |
|
| | limit: 20 |
| |
|
| | |
| | train_dataloader_opts: |
| | batch_size: 64 |
| | num_workers: 4 |
| | shuffle: true |
| | pin_memory: true |
| |
|
| | valid_dataloader_opts: |
| | batch_size: 64 |
| | num_workers: 4 |
| |
|
| | test_dataloader_opts: |
| | batch_size: 64 |
| | num_workers: 4 |
| |
|
| | |
| | d_model: 576 |
| |
|
| | |
| | output_neurons: 5000 |
| | blank_index: 0 |
| | bos_index: 1 |
| | eos_index: 2 |
| | unk_index: 0 |
| | pad_index: 0 |
| |
|
| | model: &id002 !new:speechbrain.lobes.models.transformer.TransformerLM.TransformerLM |
| |
|
| | vocab: 5000 |
| | d_model: 576 |
| | nhead: 6 |
| | num_encoder_layers: 6 |
| | num_decoder_layers: 0 |
| | d_ffn: 1538 |
| | dropout: 0.2 |
| | activation: !name:torch.nn.GELU |
| | normalize_before: false |
| |
|
| | modules: |
| | model: *id002 |
| | lr_annealing: &id003 !new:speechbrain.nnet.schedulers.NoamScheduler |
| | lr_initial: 1 |
| | n_warmup_steps: 1000 |
| | model_size: 576 |
| |
|
| | checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer |
| | checkpoints_dir: results/TransformerLM_seg_char/save |
| | recoverables: |
| | model: *id002 |
| | scheduler: *id003 |
| | counter: *id004 |
| | log_softmax: !new:speechbrain.nnet.activations.Softmax |
| | apply_log: true |
| |
|
| | optimizer: !name:torch.optim.Adam |
| | lr: 0 |
| | betas: (0.9, 0.98) |
| | eps: 0.000000001 |
| |
|
| | compute_cost: !name:speechbrain.nnet.losses.nll_loss |
| |
|