| { | |
| "src_vocab": "ja.eole.vocab", | |
| "save_data": "data", | |
| "valid_metrics": [ | |
| "BLEU" | |
| ], | |
| "tensorboard_log_dir": "tensorboard", | |
| "report_every": 100, | |
| "vocab_size_multiple": 8, | |
| "seed": 1234, | |
| "transforms": [ | |
| "sentencepiece", | |
| "filtertoolong" | |
| ], | |
| "overwrite": true, | |
| "src_vocab_size": 32000, | |
| "tgt_vocab": "en.eole.vocab", | |
| "share_vocab": false, | |
| "tensorboard": true, | |
| "n_sample": 0, | |
| "tgt_vocab_size": 32000, | |
| "tensorboard_log_dir_dated": "tensorboard/Dec-24_20-29-50", | |
| "training": { | |
| "batch_type": "tokens", | |
| "param_init_method": "xavier_uniform", | |
| "batch_size_multiple": 8, | |
| "learning_rate": 3.0, | |
| "attention_dropout": [ | |
| 0.1 | |
| ], | |
| "accum_steps": [ | |
| 0 | |
| ], | |
| "batch_size": 15000, | |
| "model_path": "quickmt-ja-en-eole-model", | |
| "keep_checkpoint": 4, | |
| "adam_beta2": 0.998, | |
| "gpu_ranks": [ | |
| 0 | |
| ], | |
| "average_decay": 0.0001, | |
| "warmup_steps": 5000, | |
| "valid_steps": 5000, | |
| "dropout": [ | |
| 0.1 | |
| ], | |
| "dropout_steps": [ | |
| 0 | |
| ], | |
| "prefetch_factor": 64, | |
| "max_grad_norm": 0.0, | |
| "world_size": 1, | |
| "compute_dtype": "torch.float16", | |
| "train_steps": 200000, | |
| "accum_count": [ | |
| 8 | |
| ], | |
| "num_workers": 0, | |
| "normalization": "tokens", | |
| "decay_method": "noam", | |
| "optim": "adamw", | |
| "valid_batch_size": 2048, | |
| "bucket_size": 256000, | |
| "label_smoothing": 0.1, | |
| "save_checkpoint_steps": 5000 | |
| }, | |
| "model": { | |
| "layer_norm": "standard", | |
| "hidden_size": 768, | |
| "add_estimator": false, | |
| "mlp_activation_fn": "gelu", | |
| "share_embeddings": false, | |
| "heads": 16, | |
| "transformer_ff": 4096, | |
| "add_qkvbias": false, | |
| "share_decoder_embeddings": false, | |
| "position_encoding_type": "SinusoidalInterleaved", | |
| "add_ffnbias": true, | |
| "architecture": "transformer", | |
| "encoder": { | |
| "src_word_vec_size": 768, | |
| "n_positions": null, | |
| "layer_norm": "standard", | |
| "encoder_type": "transformer", | |
| "hidden_size": 768, | |
| "mlp_activation_fn": "gelu", | |
| "heads": 16, | |
| "transformer_ff": 4096, | |
| "position_encoding_type": "SinusoidalInterleaved", | |
| "add_ffnbias": true, | |
| "add_qkvbias": false, | |
| "layers": 12 | |
| }, | |
| "embeddings": { | |
| "word_vec_size": 768, | |
| "src_word_vec_size": 768, | |
| "tgt_word_vec_size": 768, | |
| "position_encoding_type": "SinusoidalInterleaved" | |
| }, | |
| "decoder": { | |
| "n_positions": null, | |
| "layer_norm": "standard", | |
| "hidden_size": 768, | |
| "mlp_activation_fn": "gelu", | |
| "tgt_word_vec_size": 768, | |
| "layers": 2, | |
| "heads": 16, | |
| "transformer_ff": 4096, | |
| "position_encoding_type": "SinusoidalInterleaved", | |
| "add_ffnbias": true, | |
| "add_qkvbias": false, | |
| "decoder_type": "transformer" | |
| } | |
| }, | |
| "transforms_configs": { | |
| "sentencepiece": { | |
| "tgt_subword_model": "${MODEL_PATH}/en.spm.model", | |
| "src_subword_model": "${MODEL_PATH}/ja.spm.model" | |
| }, | |
| "filtertoolong": { | |
| "tgt_seq_length": 256, | |
| "src_seq_length": 256 | |
| } | |
| }, | |
| "data": { | |
| "corpus_1": { | |
| "path_tgt": "hf://quickmt/quickmt-train.ja-en/en", | |
| "path_align": null, | |
| "path_src": "hf://quickmt/quickmt-train.ja-en/ja", | |
| "transforms": [ | |
| "sentencepiece", | |
| "filtertoolong" | |
| ], | |
| "weight": 2, | |
| "path_sco": "hf://quickmt/quickmt-train.ja-en/sco" | |
| }, | |
| "corpus_2": { | |
| "path_tgt": "hf://quickmt/newscrawl2024-en-backtranslated-ja/en", | |
| "path_align": null, | |
| "path_src": "hf://quickmt/newscrawl2024-en-backtranslated-ja/ja", | |
| "transforms": [ | |
| "sentencepiece", | |
| "filtertoolong" | |
| ], | |
| "weight": 1, | |
| "path_sco": "hf://quickmt/newscrawl2024-en-backtranslated-ja/sco" | |
| }, | |
| "corpus_3": { | |
| "path_tgt": "hf://quickmt/madlad400-en-backtranslated-ja/en", | |
| "path_align": null, | |
| "path_src": "hf://quickmt/madlad400-en-backtranslated-ja/ja", | |
| "transforms": [ | |
| "sentencepiece", | |
| "filtertoolong" | |
| ], | |
| "weight": 2, | |
| "path_sco": "hf://quickmt/madlad400-en-backtranslated-ja/sco" | |
| }, | |
| "valid": { | |
| "path_align": null, | |
| "path_src": "valid.ja", | |
| "transforms": [ | |
| "sentencepiece", | |
| "filtertoolong" | |
| ], | |
| "path_tgt": "valid.en" | |
| } | |
| } | |
| } |