{ "tensorboard_log_dir": "tensorboard", "transforms": [ "sentencepiece", "filtertoolong" ], "report_every": 100, "tensorboard_log_dir_dated": "tensorboard/Mar-22_13-21-16", "src_vocab_size": 20000, "seed": 1234, "tgt_vocab_size": 20000, "overwrite": true, "tgt_vocab": "ar.eole.vocab", "tensorboard": true, "src_vocab": "en.eole.vocab", "valid_metrics": [ "BLEU" ], "share_vocab": false, "save_data": "en-ar/data", "vocab_size_multiple": 8, "n_sample": 0, "training": { "dropout": [ 0.1 ], "bucket_size": 128000, "prefetch_factor": 200, "warmup_steps": 5000, "accum_steps": [ 0 ], "accum_count": [ 8 ], "adam_beta2": 0.998, "normalization": "tokens", "batch_type": "tokens", "attention_dropout": [ 0.0 ], "batch_size": 16384, "learning_rate": 2.0, "keep_checkpoint": 4, "world_size": 1, "optim": "pagedadamw8bit", "average_decay": 0.0001, "param_init_method": "xavier_uniform", "decay_method": "noam", "gpu_ranks": [ 0 ], "max_grad_norm": 2.0, "valid_steps": 1000, "train_steps": 200000, "num_workers": 0, "valid_batch_size": 16384, "dropout_steps": [ 0 ], "model_path": "model", "batch_size_multiple": 8, "label_smoothing": 0.1, "compute_dtype": "torch.float16", "save_checkpoint_steps": 1000 }, "transforms_configs": { "sentencepiece": { "src_subword_nbest": -1, "src_subword_alpha": 0.5, "tgt_subword_model": "${MODEL_PATH}/ar.spm.model", "src_subword_model": "${MODEL_PATH}/en.spm.model" }, "filtertoolong": { "src_seq_length": 256, "tgt_seq_length": 256 } }, "model": { "add_ffnbias": true, "hidden_size": 1024, "heads": 8, "share_embeddings": false, "transformer_ff": 4096, "share_decoder_embeddings": true, "add_qkvbias": false, "add_estimator": false, "norm_eps": 1e-06, "layer_norm": "standard", "architecture": "transformer", "mlp_activation_fn": "gelu", "position_encoding_type": "SinusoidalInterleaved", "embeddings": { "position_encoding_type": "SinusoidalInterleaved", "src_word_vec_size": 1024, "word_vec_size": 1024, "tgt_word_vec_size": 1024 }, "decoder": { "norm_eps": 1e-06, "add_ffnbias": true, "layer_norm": "standard", "hidden_size": 1024, "decoder_type": "transformer", "heads": 8, "n_positions": null, "transformer_ff": 4096, "layers": 2, "mlp_activation_fn": "gelu", "position_encoding_type": "SinusoidalInterleaved", "add_qkvbias": false, "tgt_word_vec_size": 1024 }, "encoder": { "encoder_type": "transformer", "norm_eps": 1e-06, "add_ffnbias": true, "layer_norm": "standard", "hidden_size": 1024, "src_word_vec_size": 1024, "heads": 8, "n_positions": null, "transformer_ff": 4096, "layers": 8, "mlp_activation_fn": "gelu", "position_encoding_type": "SinusoidalInterleaved", "add_qkvbias": false } }, "data": { "corpus_1": { "transforms": [ "sentencepiece", "filtertoolong" ], "path_sco": "hf://quickmt/quickmt-train.ar-en/sco", "path_src": "hf://quickmt/quickmt-train.ar-en/en", "path_tgt": "hf://quickmt/quickmt-train.ar-en/ar", "path_align": null }, "valid": { "transforms": [ "sentencepiece", "filtertoolong" ], "path_src": "flores-dev.en", "path_align": null, "path_tgt": "flores-dev.ar" } } }