|
|
| name: "data_sp" |
| joeynmt_version: "2.0.0" |
|
|
| data: |
| train: "RESULTS_azb2fa/data/train" |
| dev: "RESULTS_azb2fa/data/validation" |
| test: "RESULTS_azb2fa/data/test" |
| dataset_type: "huggingface" |
| sample_dev_subset: 200 |
| src: |
| lang: "azb" |
| max_length: 100 |
| lowercase: False |
| normalize: False |
| level: "bpe" |
| voc_limit: 2000 |
| voc_min_freq: 1 |
| voc_file: "RESULTS_azb2fa/data/vocab.txt" |
| tokenizer_type: "sentencepiece" |
| tokenizer_cfg: |
| model_file: "RESULTS_azb2fa/data/sp.model" |
|
|
| trg: |
| lang: "fa" |
| max_length: 100 |
| lowercase: False |
| normalize: False |
| level: "bpe" |
| voc_limit: 2000 |
| voc_min_freq: 1 |
| voc_file: "RESULTS_azb2fa/data/vocab.txt" |
| tokenizer_type: "sentencepiece" |
| tokenizer_cfg: |
| model_file: "RESULTS_azb2fa/data/sp.model" |
|
|
|
|
| testing: |
| n_best: 1 |
| beam_size: 5 |
| beam_alpha: 1.0 |
| batch_size: 512 |
| batch_type: "token" |
| max_output_length: 100 |
| eval_metrics: ["bleu"] |
| |
| |
| sacrebleu_cfg: |
| tokenize: "13a" |
|
|
| training: |
| |
| |
| |
| |
| |
| random_seed: 42 |
| optimizer: "adam" |
| normalization: "tokens" |
| adam_betas: [0.9, 0.999] |
| scheduling: "warmupinversesquareroot" |
| learning_rate_warmup: 2000 |
| learning_rate: 0.0002 |
| learning_rate_min: 0.00000001 |
| weight_decay: 0.0 |
| label_smoothing: 0.1 |
| loss: "crossentropy" |
| batch_size: 512 |
| batch_type: "token" |
| batch_multiplier: 4 |
| early_stopping_metric: "bleu" |
| epochs: 500 |
| updates: 2000000000 |
| validation_freq: 1000 |
| logging_freq: 100 |
| model_dir: "RESULTS_azb2fa/model" |
| overwrite: True |
| shuffle: True |
| use_cuda: True |
| print_valid_sents: [0, 1, 2, 3] |
| keep_best_ckpts: 3 |
|
|
| model: |
| initializer: "xavier" |
| bias_initializer: "zeros" |
| init_gain: 1.0 |
| embed_initializer: "xavier" |
| embed_init_gain: 1.0 |
| tied_embeddings: True |
| tied_softmax: True |
| encoder: |
| type: "transformer" |
| num_layers: 2 |
| num_heads: 4 |
| embeddings: |
| embedding_dim: 256 |
| scale: True |
| dropout: 0.2 |
| |
| hidden_size: 256 |
| ff_size: 1024 |
| dropout: 0.1 |
| layer_norm: "pre" |
| decoder: |
| type: "transformer" |
| num_layers: 2 |
| num_heads: 8 |
| embeddings: |
| embedding_dim: 256 |
| scale: True |
| dropout: 0.2 |
| |
| hidden_size: 256 |
| ff_size: 1024 |
| dropout: 0.1 |
| layer_norm: "pre" |
|
|
|
|