{ "tensorboard": true, "tensorboard_log_dir_dated": "tensorboard/Jan-11_15-01-39", "src_vocab_size": 32000, "src_vocab": "fren/fr.eole.vocab", "seed": 1234, "valid_metrics": [ "BLEU" ], "overwrite": true, "share_vocab": false, "save_data": "data", "report_every": 100, "tensorboard_log_dir": "tensorboard", "transforms": [ "sentencepiece", "filtertoolong" ], "tgt_vocab": "fren/en.eole.vocab", "n_sample": 0, "vocab_size_multiple": 8, "tgt_vocab_size": 32000, "training": { "prefetch_factor": 128, "optim": "adamw", "keep_checkpoint": 4, "world_size": 1, "decay_method": "noam", "attention_dropout": [ 0.1 ], "max_grad_norm": 0.0, "param_init_method": "xavier_uniform", "normalization": "tokens", "batch_size_multiple": 8, "gpu_ranks": [ 0 ], "accum_count": [ 20 ], "average_decay": 0.0001, "batch_size": 6000, "compute_dtype": "torch.float16", "adam_beta2": 0.998, "valid_steps": 5000, "dropout_steps": [ 0 ], "train_steps": 200000, "warmup_steps": 5000, "learning_rate": 3.0, "num_workers": 0, "save_checkpoint_steps": 5000, "accum_steps": [ 0 ], "batch_type": "tokens", "dropout": [ 0.1 ], "bucket_size": 256000, "label_smoothing": 0.1, "model_path": "quickmt-fr-en-eole-model", "valid_batch_size": 2048 }, "transforms_configs": { "sentencepiece": { "src_subword_model": "${MODEL_PATH}/fr.spm.model", "tgt_subword_model": "${MODEL_PATH}/en.spm.model" }, "filtertoolong": { "src_seq_length": 256, "tgt_seq_length": 256 } }, "data": { "corpus_1": { "path_src": "fren/train.cleaned.filtered.fr", "path_tgt": "fren/train.cleaned.filtered.en", "path_align": null, "transforms": [ "sentencepiece", "filtertoolong" ], "weight": 200 }, "corpus_2": { "path_src": "../data/newscrawl.backtrans.cleaned.filtered.fr", "path_tgt": "../data/newscrawl.backtrans.cleaned.filtered.en", "path_align": null, "transforms": [ "sentencepiece", "filtertoolong" ], "weight": 35 }, "corpus_3": { "path_src": "../data/madlad.backtrans.cleaned.filtered.fr", "path_tgt": "../data/madlad.backtrans.cleaned.filtered.en", "path_align": null, "transforms": [ "sentencepiece", "filtertoolong" ], "weight": 68 }, "corpus_4": { "path_src": "../data/hansard.fr", "path_tgt": "../data/hansard.en", "path_align": null, "transforms": [ "sentencepiece", "filtertoolong" ], "weight": 5 }, "valid": { "path_src": "fren/dev.fr", "path_tgt": "fren/dev.en", "path_align": null, "transforms": [ "sentencepiece", "filtertoolong" ] } }, "model": { "position_encoding_type": "SinusoidalInterleaved", "share_decoder_embeddings": false, "add_qkvbias": false, "architecture": "transformer", "add_estimator": false, "hidden_size": 768, "share_embeddings": false, "layer_norm": "standard", "add_ffnbias": true, "mlp_activation_fn": "gelu", "heads": 16, "transformer_ff": 4096, "decoder": { "transformer_ff": 4096, "position_encoding_type": "SinusoidalInterleaved", "add_qkvbias": false, "tgt_word_vec_size": 768, "n_positions": null, "decoder_type": "transformer", "hidden_size": 768, "layer_norm": "standard", "add_ffnbias": true, "mlp_activation_fn": "gelu", "heads": 16, "layers": 2 }, "encoder": { "encoder_type": "transformer", "transformer_ff": 4096, "position_encoding_type": "SinusoidalInterleaved", "src_word_vec_size": 768, "add_qkvbias": false, "n_positions": null, "hidden_size": 768, "layer_norm": "standard", "add_ffnbias": true, "mlp_activation_fn": "gelu", "heads": 16, "layers": 12 }, "embeddings": { "tgt_word_vec_size": 768, "word_vec_size": 768, "position_encoding_type": "SinusoidalInterleaved", "src_word_vec_size": 768 } } }