| authors: false |
| cite: false |
| build-info: "" |
| workspace: -8000 |
| log: train.log |
| log-level: info |
| log-time-zone: PST8PDT |
| quiet: false |
| quiet-translation: true |
| seed: 141414 |
| check-nan: false |
| interpolate-env-vars: true |
| relative-paths: false |
| dump-config: "" |
| sigterm: save-and-exit |
| model: model_files/model.npz |
| pretrained-model: "" |
| ignore-model-config: false |
| type: lm-transformer |
| dim-vocabs: |
| - 8000 |
| dim-emb: 1024 |
| factors-dim-emb: 0 |
| factors-combine: sum |
| lemma-dependency: "" |
| lemma-dim-emb: 0 |
| dim-rnn: 1024 |
| enc-type: bidirectional |
| enc-cell: gru |
| enc-cell-depth: 1 |
| enc-depth: 1 |
| dec-cell: gru |
| dec-cell-base-depth: 2 |
| dec-cell-high-depth: 1 |
| dec-depth: 12 |
| skip: false |
| layer-normalization: false |
| right-left: false |
| input-types: |
| [] |
| tied-embeddings: true |
| tied-embeddings-src: false |
| tied-embeddings-all: true |
| output-omit-bias: true |
| transformer-heads: 8 |
| transformer-no-projection: false |
| transformer-rnn-projection: false |
| transformer-pool: false |
| transformer-dim-ffn: 8192 |
| transformer-decoder-dim-ffn: 8192 |
| transformer-ffn-depth: 2 |
| transformer-decoder-ffn-depth: 0 |
| transformer-ffn-activation: relu |
| transformer-dim-aan: 2048 |
| transformer-aan-depth: 2 |
| transformer-aan-activation: swish |
| transformer-aan-nogate: false |
| transformer-decoder-autoreg: self-attention |
| transformer-tied-layers: [] |
| transformer-guided-alignment-layer: last |
| transformer-preprocess: "" |
| transformer-postprocess-emb: d |
| transformer-postprocess: dan |
| transformer-postprocess-top: "" |
| transformer-train-position-embeddings: false |
| transformer-depth-scaling: true |
| transformer-no-bias: false |
| transformer-no-affine: false |
| bert-mask-symbol: "[MASK]" |
| bert-sep-symbol: "[SEP]" |
| bert-class-symbol: "[CLS]" |
| bert-masking-fraction: 0.15 |
| bert-train-type-embeddings: true |
| bert-type-vocab-size: 2 |
| comet-final-sigmoid: false |
| comet-mix: false |
| comet-mix-norm: false |
| comet-dropout: 0.1 |
| comet-mixup: 0 |
| comet-mixup-reg: false |
| comet-pooler-ffn: |
| - 2048 |
| - 1024 |
| comet-prepend-zero: false |
| dropout-rnn: 0 |
| dropout-src: 0 |
| dropout-trg: 0 |
| transformer-dropout: 0.1 |
| transformer-dropout-attention: 0 |
| transformer-dropout-ffn: 0.1 |
| cost-type: ce-sum |
| multi-loss-type: sum |
| unlikelihood-loss: false |
| overwrite: false |
| overwrite-checkpoint: true |
| no-reload: false |
| train-sets: |
| - stdin |
| vocabs: |
| - vocab |
| sentencepiece-alphas: |
| [] |
| sentencepiece-options: "" |
| sentencepiece-max-lines: 2000000 |
| no-spm-encode: false |
| after-epochs: 0 |
| after-batches: 0 |
| after: 40e |
| disp-freq: 100Mt |
| disp-first: 10 |
| disp-label-counts: true |
| save-freq: 1Gt |
| logical-epoch: |
| - 1Gt |
| max-length: 256 |
| max-length-crop: false |
| tsv: true |
| tsv-fields: 1 |
| shuffle: batches |
| no-restore-corpus: true |
| tempdir: /tmp |
| sqlite: "" |
| sqlite-drop: false |
| devices: |
| - 0 |
| - 1 |
| no-nccl: false |
| sharding: local |
| sync-freq: 200u |
| cpu-threads: 0 |
| mini-batch: 1000 |
| mini-batch-words: 500000 |
| mini-batch-fit: true |
| mini-batch-fit-step: 5 |
| gradient-checkpointing: false |
| maxi-batch: 1000 |
| maxi-batch-sort: trg |
| shuffle-in-ram: true |
| data-threads: 8 |
| all-caps-every: 0 |
| english-title-case-every: 0 |
| mini-batch-words-ref: 0 |
| mini-batch-warmup: 4000 |
| mini-batch-track-lr: false |
| mini-batch-round-up: true |
| optimizer: adam |
| optimizer-params: |
| - 0.9 |
| - 0.999 |
| - 1e-08 |
| - 0.01 |
| optimizer-delay: 1 |
| sync-sgd: true |
| learn-rate: 0.0005 |
| lr-report: true |
| lr-decay: 0 |
| lr-decay-strategy: epoch+stalled |
| lr-decay-start: |
| - 10 |
| - 1 |
| lr-decay-freq: 50000 |
| lr-decay-reset-optimizer: false |
| lr-decay-repeat-warmup: false |
| lr-decay-inv-sqrt: |
| - 4000 |
| lr-warmup: 4000 |
| lr-warmup-start-rate: 0 |
| lr-warmup-cycle: false |
| lr-warmup-at-reload: false |
| label-smoothing: 0.1 |
| factor-weight: 1 |
| clip-norm: 0 |
| exponential-smoothing: 1e-3 |
| exponential-smoothing-replace-freq: 0 |
| guided-alignment: none |
| guided-alignment-cost: ce |
| guided-alignment-weight: 0 |
| data-weighting: "" |
| data-weighting-type: sentence |
| embedding-vectors: |
| [] |
| embedding-normalization: false |
| embedding-fix-src: false |
| embedding-fix-trg: false |
| precision: |
| - float32 |
| - float32 |
| cost-scaling: |
| - 256.f |
| - 10000 |
| - 1.f |
| - 256.f |
| throw-on-divergence: |
| [] |
| custom-fallbacks: |
| [] |
| gradient-norm-average-window: 100 |
| dynamic-gradient-scaling: |
| - 2 |
| - log |
| check-gradient-nan: false |
| normalize-gradient: false |
| train-embedder-rank: |
| [] |
| quantize-bits: 0 |
| quantize-optimization-steps: 0 |
| quantize-log-based: false |
| quantize-biases: false |
| ulr: false |
| ulr-query-vectors: "" |
| ulr-keys-vectors: "" |
| ulr-trainable-transformation: false |
| ulr-dim-emb: 0 |
| ulr-dropout: 0 |
| ulr-softmax-temperature: 1 |
| valid-sets: |
| - dev.de |
| valid-freq: 1Gt |
| valid-metrics: |
| - perplexity |
| - ce-mean-words |
| - bleu |
| - chrf |
| valid-reset-stalled: false |
| valid-reset-all: false |
| early-stopping: 40 |
| early-stopping-epsilon: |
| - 0 |
| early-stopping-on: first |
| beam-size: 4 |
| normalize: 1.0 |
| max-length-factor: 3 |
| word-penalty: 0.0 |
| allow-unk: false |
| n-best: false |
| word-scores: false |
| valid-mini-batch: 32 |
| valid-max-length: 1000 |
| valid-script-path: "" |
| valid-script-args: |
| [] |
| valid-translation-output: valid.trg.output |
| keep-best: true |
| valid-log: valid.log |