| name: HuggingFaceEncoder |
| do_training: True |
| do_testing: False |
|
|
| model: |
| beam_size: 4 |
| len_pen: 0.6 |
| max_generation_delta: -1 |
| label_smoothing: 0.1 |
| shared_tokenizer: false |
| preproc_out_dir: null |
| src_language: 'en' |
| tgt_language: 'de' |
|
|
| train_ds: |
| src_file_name: null |
| tgt_file_name: null |
| use_tarred_dataset: False |
| |
| tar_file_prefix: parallel |
| tar_files: null |
| metadata_file: null |
| lines_per_dataset_fragment: 1000000 |
| num_batches_per_tarfile: 100 |
| tar_shuffle_n: 100 |
| shard_strategy: scatter |
| n_preproc_jobs: -2 |
| tokens_in_batch: 512 |
| clean: true |
| max_seq_length: 512 |
| shuffle: true |
| num_samples: -1 |
| drop_last: false |
| pin_memory: false |
| num_workers: 8 |
|
|
| validation_ds: |
| src_file_name: null |
| tgt_file_name: null |
| tokens_in_batch: 512 |
| clean: false |
| max_seq_length: 512 |
| shuffle: false |
| num_samples: -1 |
| drop_last: false |
| pin_memory: false |
| num_workers: 8 |
|
|
| test_ds: |
| src_file_name: null |
| tgt_file_name: null |
| tokens_in_batch: 512 |
| clean: false |
| max_seq_length: 512 |
| shuffle: false |
| num_samples: -1 |
| drop_last: false |
| pin_memory: false |
| num_workers: 8 |
|
|
| optim: |
| name: adam |
| lr: 0.001 |
| betas: |
| - 0.9 |
| - 0.98 |
| weight_decay: 0.0 |
| sched: |
| name: InverseSquareRootAnnealing |
| min_lr: 0.0 |
| last_epoch: -1 |
| warmup_ratio: 0.1 |
|
|
| encoder_tokenizer: |
| library: huggingface |
| tokenizer_model: null |
| vocab_file: null |
| special_tokens: null |
| vocab_size: null |
|
|
| decoder_tokenizer: |
| library: yttm |
| tokenizer_model: null |
| vocab_file: null |
| special_tokens: null |
| vocab_size: null |
|
|
| encoder: |
| library: huggingface |
| model_name: bert-base-uncased |
| pretrained: false |
|
|
| decoder: |
| library: nemo |
| model_name: null |
| pretrained: false |
| max_sequence_length: 512 |
| num_token_types: 2 |
| embedding_dropout: 0.1 |
| learn_positional_encodings: false |
| hidden_size: 512 |
| inner_size: 2048 |
| num_layers: 6 |
| num_attention_heads: 8 |
| ffn_dropout: 0.1 |
| attn_score_dropout: 0.1 |
| attn_layer_dropout: 0.1 |
| hidden_act: relu |
| pre_ln: false |
|
|
| head: |
| num_layers: 1 |
| activation: relu |
| log_softmax: true |
| dropout: 0.0 |
| use_transformer_init: true |
|
|
| trainer: |
| devices: 4 |
| num_nodes: 1 |
| max_epochs: 200 |
| precision: 16 |
| accelerator: gpu |
| enable_checkpointing: False |
| logger: False |
| log_every_n_steps: 50 |
| check_val_every_n_epoch: 1 |
| benchmark: False |
|
|
| exp_manager: |
| name: HuggingFaceEncoder |
| files_to_copy: [] |
|
|