diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-0/config.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-0/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-0/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-0/pytorch_model.bin b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-0/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..e62f1c2a73e204dd71fd38dbd4de66ee58a3ae92 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-0/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4794ea0b9a495e87fc34e316efe1b585527d6e3d15e4d093362ab24ee74ab3f +size 510396521 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-0/special_tokens_map.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-0/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-0/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-0/tokenizer_config.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-0/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-0/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-0/training_args.bin b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-0/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..15b98514e767b3fc92f1a33b342cb1dd57904c39 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-0/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfe85938b028b043a65c4e8b12b6788c64a3e8013a7acfbf6b8fb24e8dd9b106 +size 3183 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-100/config.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-100/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-100/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-100/optimizer.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-100/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..0f48af5e1c710f3e5838ebfe62b6968cef741433 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3892e883c516bec96e17c271ec326bd599d52320b9876655707e3b9f15e0e446 +size 995603825 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-100/pytorch_model.bin b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-100/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..f30539ee1fe36d25c0ab80f62ebf947ba4580c8e --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-100/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e94ea2560c106c3d3ce3645948e160284e7622d1b2958f0ea3fccac2fd90e5e5 +size 510396521 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-100/rng_state.pth b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-100/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..41bcfe2e514005f5901542463963353873911d8b --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df6fc9ae57df51ecae22fa6bfacbb0bfa78885dfa78bf2f33e5ce6822acb6264 +size 14567 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-100/scaler.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-100/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..45cc4a33e17645cb0ed4a911b11c77cb2e7ce7f3 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-100/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13a3423b2fe42f204bc8fe2c666ff379f9fd753a0f13613064a5e71e86b519e8 +size 559 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-100/scheduler.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-100/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..595eec9094e91b3eb24c2de88c461df2c22026ab --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75e9b9d31d11c624d89b0c04ad496adf4b5addd3e703848d2583972c703e8da6 +size 623 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-100/special_tokens_map.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-100/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-100/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-100/tokenizer_config.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-100/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-100/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-100/trainer_state.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-100/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..dbb271ddb71cb5816f28f20474132c46afbc7e30 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-100/trainer_state.json @@ -0,0 +1,34 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.015833333333333, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0217, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.0012, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 5.7133, + "step": 100 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.7526070403072e+16, + "trial_name": null, + "trial_params": null +} diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-100/training_args.bin b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-100/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..15b98514e767b3fc92f1a33b342cb1dd57904c39 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfe85938b028b043a65c4e8b12b6788c64a3e8013a7acfbf6b8fb24e8dd9b106 +size 3183 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1000/config.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1000/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1000/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1000/optimizer.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1000/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..0aea358954aa82a210b72b12563e254471ca4cb6 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ff2b79960f69b7a050c0254caa0544ac4f5fa99793b756bc83414da5b894f7a +size 995604017 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1000/pytorch_model.bin b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1000/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..2180b03c919d35055e8e948cdd77e836d7cfdf1e --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1ebb8aa1bad585e70c3f9299975ca07c45f9ca2ab67a0b2142dbb3c6a16b995 +size 510396521 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1000/rng_state.pth b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1000/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..ffa8df6d3b95313e63e2bef3d6e88eabb1ee2292 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b659b55d858751773a2297a8af7a96675a48b76cb05008e3e070fb8be589cdc8 +size 14567 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1000/scaler.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1000/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..b04695d3a30e4bab2b78883d9c849c25c37ef7d7 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f810fc7b695697c440d8985f6042b4ba23a9e1027604c265718b518ca29f1b2b +size 559 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1000/scheduler.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1000/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..2a616899591a36b66ac0bd1ceeb324087c181a2a --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0691206f4bd9ca409d6e7104087a4e0eb05df8f8f555a400f6ecc532edba52d8 +size 623 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1000/special_tokens_map.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1000/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1000/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1000/tokenizer_config.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1000/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1000/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1000/trainer_state.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1000/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..044ed933b9dcd9b3c2660b62bdfca0baf4d291b0 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1000/trainer_state.json @@ -0,0 +1,158 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 37.00083333333333, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0217, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.0012, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 5.7133, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 5.1467, + "step": 150 + }, + { + "epoch": 7.01, + "learning_rate": 0.0005555555555555556, + "loss": 4.9057, + "step": 200 + }, + { + "epoch": 9.01, + "learning_rate": 0.0005277777777777777, + "loss": 4.7624, + "step": 250 + }, + { + "epoch": 11.0, + "learning_rate": 0.0005, + "loss": 4.5953, + "step": 300 + }, + { + "epoch": 12.02, + "learning_rate": 0.00047222222222222224, + "loss": 4.3139, + "step": 350 + }, + { + "epoch": 14.02, + "learning_rate": 0.00044444444444444436, + "loss": 4.2007, + "step": 400 + }, + { + "epoch": 16.02, + "learning_rate": 0.00041666666666666664, + "loss": 4.0441, + "step": 450 + }, + { + "epoch": 18.01, + "learning_rate": 0.00038888888888888887, + "loss": 3.9169, + "step": 500 + }, + { + "epoch": 20.01, + "learning_rate": 0.0003611111111111111, + "loss": 3.8046, + "step": 550 + }, + { + "epoch": 22.0, + "learning_rate": 0.0003333333333333333, + "loss": 3.706, + "step": 600 + }, + { + "epoch": 24.0, + "learning_rate": 0.00030555555555555555, + "loss": 3.6192, + "step": 650 + }, + { + "epoch": 25.02, + "learning_rate": 0.0002777777777777778, + "loss": 3.4774, + "step": 700 + }, + { + "epoch": 27.02, + "learning_rate": 0.00025, + "loss": 3.4742, + "step": 750 + }, + { + "epoch": 29.01, + "learning_rate": 0.00022222222222222218, + "loss": 3.411, + "step": 800 + }, + { + "epoch": 31.01, + "learning_rate": 0.00019444444444444443, + "loss": 3.3501, + "step": 850 + }, + { + "epoch": 33.01, + "learning_rate": 0.00016666666666666666, + "loss": 3.2943, + "step": 900 + }, + { + "epoch": 35.0, + "learning_rate": 0.0001388888888888889, + "loss": 3.2435, + "step": 950 + }, + { + "epoch": 37.0, + "learning_rate": 0.00011111111111111109, + "loss": 3.1955, + "step": 1000 + }, + { + "epoch": 37.0, + "eval_loss": 4.1568193435668945, + "eval_runtime": 3.9982, + "eval_samples_per_second": 50.773, + "eval_steps_per_second": 3.251, + "step": 1000 + }, + { + "epoch": 37.0, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_loss": 4.1568193435668945, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_ppl": 63.86805684825764, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_runtime": 3.9982, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 50.773, + "step": 1000 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.77056825458688e+17, + "trial_name": null, + "trial_params": null +} diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1000/training_args.bin b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1000/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..15b98514e767b3fc92f1a33b342cb1dd57904c39 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfe85938b028b043a65c4e8b12b6788c64a3e8013a7acfbf6b8fb24e8dd9b106 +size 3183 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1100/config.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1100/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1100/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1100/optimizer.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1100/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..e18fe1236a00b412650f3cfae7fff9d59383eb94 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5312dade7d066e5cd557a5df7c7de05c16bb06ea333b77ac35aeaeaf5693750a +size 995604017 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1100/pytorch_model.bin b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1100/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..a60c9b9c12a059287a5ed9a71c79942bfc684bf5 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1100/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a31698ef1c41ffaf01df801a9e92d3cfdeea227b3e19b8bf0f9ef85c4045b5a7 +size 510396521 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1100/rng_state.pth b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1100/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..b5cd226223ad1e4fe68af4d29dfa02b7ecb1918d --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3cbf51f998b1b39c0b0c0dfb3ea6916e4c4bf96f57fe235ab93b204ad3bcc5d +size 14567 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1100/scaler.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1100/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..37bf049fbd5fd721203bf0238edc8ff67dbd8f94 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1100/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fb16c30b686aa43e110b0d33f9d46bf3127b7124542ca8dc34831233d4675a0 +size 559 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1100/scheduler.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1100/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..c158169ec0bb09620952d544fd4b4edea0cc9cf4 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f1b7713e4bb40428f29080b7d08d4a52f779ac863737861e4724292b2cf6c59 +size 623 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1100/special_tokens_map.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1100/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1100/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1100/tokenizer_config.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1100/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1100/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1100/trainer_state.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1100/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..b77e7a52703e8adca93024b2571fbb394646a89e --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1100/trainer_state.json @@ -0,0 +1,170 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 40.016666666666666, + "global_step": 1100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0217, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.0012, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 5.7133, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 5.1467, + "step": 150 + }, + { + "epoch": 7.01, + "learning_rate": 0.0005555555555555556, + "loss": 4.9057, + "step": 200 + }, + { + "epoch": 9.01, + "learning_rate": 0.0005277777777777777, + "loss": 4.7624, + "step": 250 + }, + { + "epoch": 11.0, + "learning_rate": 0.0005, + "loss": 4.5953, + "step": 300 + }, + { + "epoch": 12.02, + "learning_rate": 0.00047222222222222224, + "loss": 4.3139, + "step": 350 + }, + { + "epoch": 14.02, + "learning_rate": 0.00044444444444444436, + "loss": 4.2007, + "step": 400 + }, + { + "epoch": 16.02, + "learning_rate": 0.00041666666666666664, + "loss": 4.0441, + "step": 450 + }, + { + "epoch": 18.01, + "learning_rate": 0.00038888888888888887, + "loss": 3.9169, + "step": 500 + }, + { + "epoch": 20.01, + "learning_rate": 0.0003611111111111111, + "loss": 3.8046, + "step": 550 + }, + { + "epoch": 22.0, + "learning_rate": 0.0003333333333333333, + "loss": 3.706, + "step": 600 + }, + { + "epoch": 24.0, + "learning_rate": 0.00030555555555555555, + "loss": 3.6192, + "step": 650 + }, + { + "epoch": 25.02, + "learning_rate": 0.0002777777777777778, + "loss": 3.4774, + "step": 700 + }, + { + "epoch": 27.02, + "learning_rate": 0.00025, + "loss": 3.4742, + "step": 750 + }, + { + "epoch": 29.01, + "learning_rate": 0.00022222222222222218, + "loss": 3.411, + "step": 800 + }, + { + "epoch": 31.01, + "learning_rate": 0.00019444444444444443, + "loss": 3.3501, + "step": 850 + }, + { + "epoch": 33.01, + "learning_rate": 0.00016666666666666666, + "loss": 3.2943, + "step": 900 + }, + { + "epoch": 35.0, + "learning_rate": 0.0001388888888888889, + "loss": 3.2435, + "step": 950 + }, + { + "epoch": 37.0, + "learning_rate": 0.00011111111111111109, + "loss": 3.1955, + "step": 1000 + }, + { + "epoch": 37.0, + "eval_loss": 4.1568193435668945, + "eval_runtime": 3.9982, + "eval_samples_per_second": 50.773, + "eval_steps_per_second": 3.251, + "step": 1000 + }, + { + "epoch": 37.0, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_loss": 4.1568193435668945, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_ppl": 63.86805684825764, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_runtime": 3.9982, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 50.773, + "step": 1000 + }, + { + "epoch": 38.02, + "learning_rate": 8.333333333333333e-05, + "loss": 3.0973, + "step": 1050 + }, + { + "epoch": 40.02, + "learning_rate": 5.5555555555555545e-05, + "loss": 3.1214, + "step": 1100 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 3.0458289586176e+17, + "trial_name": null, + "trial_params": null +} diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1100/training_args.bin b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1100/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..15b98514e767b3fc92f1a33b342cb1dd57904c39 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfe85938b028b043a65c4e8b12b6788c64a3e8013a7acfbf6b8fb24e8dd9b106 +size 3183 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1200/config.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1200/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1200/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1200/optimizer.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1200/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..8f7b986ce2b0accbdeff16f04f3d57926b56b91c --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01d87af56b6da9fc268635e1dfe32d11b8bd98d82bd6c4270bbc7bb5c3a690d4 +size 995604017 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1200/pytorch_model.bin b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1200/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..79606bdca066f0ffdf09d00decb7a0cc64406fbf --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1200/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b6b16cad85cae699e713528f709cb28175652190ccf93624f41b01a74ac113e +size 510396521 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1200/rng_state.pth b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1200/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..5a62055d2a4cf010a681440f6d548e9d838b7e0f --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22724403665e347fa884491881d38603434d9ece1ef30562ba13d8e7ca7c236d +size 14567 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1200/scaler.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1200/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..8953dddccbefc4703c09dcda27d83c15add2bade --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1200/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19c7277eaca0850ae3e9b6790b3d002d820169cce0671185e672c28c8ae8e056 +size 559 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1200/scheduler.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1200/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..310d39c17fc616a9c83286ed00f0f4cefba9f5df --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:935a8fb09a6e9698d9894853b05e181b3f56098deaaecddde08e55f06bf000c4 +size 623 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1200/special_tokens_map.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1200/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1200/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1200/tokenizer_config.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1200/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1200/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1200/trainer_state.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1200/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..7c0c73e2b96ec8eacfcf7909b8eee5a467679686 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1200/trainer_state.json @@ -0,0 +1,182 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 44.01, + "global_step": 1200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0217, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.0012, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 5.7133, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 5.1467, + "step": 150 + }, + { + "epoch": 7.01, + "learning_rate": 0.0005555555555555556, + "loss": 4.9057, + "step": 200 + }, + { + "epoch": 9.01, + "learning_rate": 0.0005277777777777777, + "loss": 4.7624, + "step": 250 + }, + { + "epoch": 11.0, + "learning_rate": 0.0005, + "loss": 4.5953, + "step": 300 + }, + { + "epoch": 12.02, + "learning_rate": 0.00047222222222222224, + "loss": 4.3139, + "step": 350 + }, + { + "epoch": 14.02, + "learning_rate": 0.00044444444444444436, + "loss": 4.2007, + "step": 400 + }, + { + "epoch": 16.02, + "learning_rate": 0.00041666666666666664, + "loss": 4.0441, + "step": 450 + }, + { + "epoch": 18.01, + "learning_rate": 0.00038888888888888887, + "loss": 3.9169, + "step": 500 + }, + { + "epoch": 20.01, + "learning_rate": 0.0003611111111111111, + "loss": 3.8046, + "step": 550 + }, + { + "epoch": 22.0, + "learning_rate": 0.0003333333333333333, + "loss": 3.706, + "step": 600 + }, + { + "epoch": 24.0, + "learning_rate": 0.00030555555555555555, + "loss": 3.6192, + "step": 650 + }, + { + "epoch": 25.02, + "learning_rate": 0.0002777777777777778, + "loss": 3.4774, + "step": 700 + }, + { + "epoch": 27.02, + "learning_rate": 0.00025, + "loss": 3.4742, + "step": 750 + }, + { + "epoch": 29.01, + "learning_rate": 0.00022222222222222218, + "loss": 3.411, + "step": 800 + }, + { + "epoch": 31.01, + "learning_rate": 0.00019444444444444443, + "loss": 3.3501, + "step": 850 + }, + { + "epoch": 33.01, + "learning_rate": 0.00016666666666666666, + "loss": 3.2943, + "step": 900 + }, + { + "epoch": 35.0, + "learning_rate": 0.0001388888888888889, + "loss": 3.2435, + "step": 950 + }, + { + "epoch": 37.0, + "learning_rate": 0.00011111111111111109, + "loss": 3.1955, + "step": 1000 + }, + { + "epoch": 37.0, + "eval_loss": 4.1568193435668945, + "eval_runtime": 3.9982, + "eval_samples_per_second": 50.773, + "eval_steps_per_second": 3.251, + "step": 1000 + }, + { + "epoch": 37.0, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_loss": 4.1568193435668945, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_ppl": 63.86805684825764, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_runtime": 3.9982, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 50.773, + "step": 1000 + }, + { + "epoch": 38.02, + "learning_rate": 8.333333333333333e-05, + "loss": 3.0973, + "step": 1050 + }, + { + "epoch": 40.02, + "learning_rate": 5.5555555555555545e-05, + "loss": 3.1214, + "step": 1100 + }, + { + "epoch": 42.01, + "learning_rate": 2.7777777777777772e-05, + "loss": 3.0907, + "step": 1150 + }, + { + "epoch": 44.01, + "learning_rate": 0.0, + "loss": 3.0653, + "step": 1200 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 3.32365555040256e+17, + "trial_name": null, + "trial_params": null +} diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1200/training_args.bin b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1200/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..15b98514e767b3fc92f1a33b342cb1dd57904c39 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-1200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfe85938b028b043a65c4e8b12b6788c64a3e8013a7acfbf6b8fb24e8dd9b106 +size 3183 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-200/config.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-200/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-200/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-200/optimizer.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-200/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..aa03c74ac561de0fc454cb16f177f5c464b60577 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32e5f22abe3cb0150546e63e1f90e1ca7e307db1d4233880c96ce0dc4b4bac7f +size 995603825 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-200/pytorch_model.bin b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-200/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..5bac8ee7fc7d80231f3dc97cad00870a12e5b8a1 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-200/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66d408c99d7d507fffcdf2d4cbcb652b92bd635273775d23e927998100b00f54 +size 510396521 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-200/rng_state.pth b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-200/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..55df12f1587533cd9110273977a9a4db6115081e --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a45350f255628f9e40f118bea3dc7b01319b280bcb1f46c7b5d3c3a962da6d42 +size 14567 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-200/scaler.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-200/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..365b52ebf376498237a843f6d7332e5a49b14902 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-200/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb6982c29cd162f49aeb531674acf574eccd46a8f556bec596040d7c3b95200a +size 559 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-200/scheduler.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-200/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..36cb8da739c80a63971a62f06f781c40ac0fceb2 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a992625ada6d884e508ff9392d16738b4a4163147f8fcbf9f46be82ecae9888 +size 623 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-200/special_tokens_map.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-200/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-200/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-200/tokenizer_config.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-200/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-200/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-200/trainer_state.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-200/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..1e74f57096e18f0e522651c19acf03531a1e85c5 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-200/trainer_state.json @@ -0,0 +1,46 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.009166666666666, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0217, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.0012, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 5.7133, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 5.1467, + "step": 150 + }, + { + "epoch": 7.01, + "learning_rate": 0.0005555555555555556, + "loss": 4.9057, + "step": 200 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 5.5308729581568e+16, + "trial_name": null, + "trial_params": null +} diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-200/training_args.bin b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-200/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..15b98514e767b3fc92f1a33b342cb1dd57904c39 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfe85938b028b043a65c4e8b12b6788c64a3e8013a7acfbf6b8fb24e8dd9b106 +size 3183 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-300/config.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-300/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-300/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-300/optimizer.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-300/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..1c11dded05b2b15de0ce0e63aa9671c0355fd8bc --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-300/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e15489c89edfbeaf816973b1e2253d1099be5ead2ca9e1d47d99899c468005ef +size 995604017 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-300/pytorch_model.bin b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-300/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..751c48525a3d00d456670092a65a15a2fa64e9d5 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-300/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aabea722c77c39f648a5ee625a5fedd13ece28123de883a74ede6ee895eace0e +size 510396521 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-300/rng_state.pth b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-300/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..690fa753c3a986063d985a890e697c34137211b1 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-300/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfb1ff97fc8b0debd997bedee90b6cac4abc0f8249a85848ea64351bd560424e +size 14567 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-300/scaler.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-300/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..01066cf4761ea9d2f7962f5181762f7b08690b79 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-300/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0967b9f865f16344c55f5ccc3cf7d6e8e97ca61dda304e931ca6bad130f48dd1 +size 559 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-300/scheduler.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-300/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..681987f21e79cd4685aac32a5e6b74341e25d936 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1db899b266916f792a0898ceb27a87eaf76647f10c29cc0c13ce22f12a12efd +size 623 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-300/special_tokens_map.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-300/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-300/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-300/tokenizer_config.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-300/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-300/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-300/trainer_state.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-300/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..529b83e004b64b163fb7d799b1fb6c2498dbe5ea --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-300/trainer_state.json @@ -0,0 +1,58 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 11.0025, + "global_step": 300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0217, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.0012, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 5.7133, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 5.1467, + "step": 150 + }, + { + "epoch": 7.01, + "learning_rate": 0.0005555555555555556, + "loss": 4.9057, + "step": 200 + }, + { + "epoch": 9.01, + "learning_rate": 0.0005277777777777777, + "loss": 4.7624, + "step": 250 + }, + { + "epoch": 11.0, + "learning_rate": 0.0005, + "loss": 4.5953, + "step": 300 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 8.3091388760064e+16, + "trial_name": null, + "trial_params": null +} diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-300/training_args.bin b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-300/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..15b98514e767b3fc92f1a33b342cb1dd57904c39 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfe85938b028b043a65c4e8b12b6788c64a3e8013a7acfbf6b8fb24e8dd9b106 +size 3183 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-400/config.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-400/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-400/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-400/optimizer.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-400/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..82e5303066c4e415ddb2c7883151257d648e72e7 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-400/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e1e94cf696c3ae52092e093b3a54f12fdef6ffb7fd968ce842bd724de1a13c5 +size 995604017 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-400/pytorch_model.bin b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-400/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..5241c8d1070f3de4646aa22224beb549bfa86bc7 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-400/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ace39de7081859584f97e766ebee2d829fae77ca7efc31a7511be4262275dcd7 +size 510396521 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-400/rng_state.pth b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-400/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..8329909c0a1b1e67ffe07642f1b0055a5787ac04 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-400/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:459a5eb9a09a1849a1ae251e88a692d74dd55e8a211a2a500e3e43401f683629 +size 14567 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-400/scaler.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-400/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..9c7aef4199e98d81152810b661dbaffc01963383 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-400/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:476e510c8ea7edbd2b51d1e76a4e037820a5639381c0d8b5d32dafa492795a1e +size 559 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-400/scheduler.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-400/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..967673cfc91836d239beb5a7ede06992232ba309 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-400/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db087d678047b5c346bbb8511936612c1fdf223c6fd70321e97369bc31ed76a8 +size 623 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-400/special_tokens_map.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-400/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-400/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-400/tokenizer_config.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-400/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-400/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-400/trainer_state.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-400/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..34fe9af03c56b300c1420ed93d8a2edb2dd87169 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-400/trainer_state.json @@ -0,0 +1,70 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 14.018333333333333, + "global_step": 400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0217, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.0012, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 5.7133, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 5.1467, + "step": 150 + }, + { + "epoch": 7.01, + "learning_rate": 0.0005555555555555556, + "loss": 4.9057, + "step": 200 + }, + { + "epoch": 9.01, + "learning_rate": 0.0005277777777777777, + "loss": 4.7624, + "step": 250 + }, + { + "epoch": 11.0, + "learning_rate": 0.0005, + "loss": 4.5953, + "step": 300 + }, + { + "epoch": 12.02, + "learning_rate": 0.00047222222222222224, + "loss": 4.3139, + "step": 350 + }, + { + "epoch": 14.02, + "learning_rate": 0.00044444444444444436, + "loss": 4.2007, + "step": 400 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.10617459163136e+17, + "trial_name": null, + "trial_params": null +} diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-400/training_args.bin b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-400/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..15b98514e767b3fc92f1a33b342cb1dd57904c39 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfe85938b028b043a65c4e8b12b6788c64a3e8013a7acfbf6b8fb24e8dd9b106 +size 3183 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-500/config.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-500/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-500/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-500/optimizer.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-500/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..8281d798c49ff2c9428eafe742f7db0d01b34d7c --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b279da727ff71ef50543aa5ea2d235d1a63388118af8bbebb1bfddd71b8728fa +size 995604017 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-500/pytorch_model.bin b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-500/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..93f7494b9b4733fb86c6d5f2f194d39a86ebabef --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-500/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca9319ef3b05478339fe076293e5a1cb4f6296f686d266a7307aca28261080ec +size 510396521 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-500/rng_state.pth b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-500/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..1f83492197471e5ee2100332b9ba97ffebbb63df --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ce1eb77aea4a942baddcbf1699a25fd847d9eaf08b0f4bdcda47d7e25876f50 +size 14567 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-500/scaler.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-500/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..e8b96c9c2837f2c95b1d07d4fc3f245f9ad1ef62 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-500/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fa4c7be44c959599b8b43bb9bc3371e9e4e5bbc5758b3ab5afcccfda3e72e67 +size 559 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-500/scheduler.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-500/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..77b958181a5b47b022b96b38a6207f274a1b6604 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:026fae4a90d56c24de94b10dfa7a75b6ba4e43bd5c1a3fdb2d77356b81cd6f8a +size 623 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-500/special_tokens_map.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-500/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-500/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-500/tokenizer_config.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-500/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-500/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-500/trainer_state.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-500/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..048fe9e8136f8c39e12023391005bf16d2b29a42 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-500/trainer_state.json @@ -0,0 +1,82 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 18.011666666666667, + "global_step": 500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0217, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.0012, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 5.7133, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 5.1467, + "step": 150 + }, + { + "epoch": 7.01, + "learning_rate": 0.0005555555555555556, + "loss": 4.9057, + "step": 200 + }, + { + "epoch": 9.01, + "learning_rate": 0.0005277777777777777, + "loss": 4.7624, + "step": 250 + }, + { + "epoch": 11.0, + "learning_rate": 0.0005, + "loss": 4.5953, + "step": 300 + }, + { + "epoch": 12.02, + "learning_rate": 0.00047222222222222224, + "loss": 4.3139, + "step": 350 + }, + { + "epoch": 14.02, + "learning_rate": 0.00044444444444444436, + "loss": 4.2007, + "step": 400 + }, + { + "epoch": 16.02, + "learning_rate": 0.00041666666666666664, + "loss": 4.0441, + "step": 450 + }, + { + "epoch": 18.01, + "learning_rate": 0.00038888888888888887, + "loss": 3.9169, + "step": 500 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.38400118341632e+17, + "trial_name": null, + "trial_params": null +} diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-500/training_args.bin b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-500/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..15b98514e767b3fc92f1a33b342cb1dd57904c39 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfe85938b028b043a65c4e8b12b6788c64a3e8013a7acfbf6b8fb24e8dd9b106 +size 3183 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-600/config.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-600/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-600/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-600/optimizer.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-600/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..955e4cb48a66a96aea7fb84bc53a13134482b541 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-600/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bf8d22d9b8cf11d77ca7fe981a2785e1a353df24730f3707f1c513206b20956 +size 995604017 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-600/pytorch_model.bin b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-600/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..f9134ce3f474f0d4d1a37ef72022c9692311224c --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-600/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea9def5271958f4632bf0644f0f49a5152c744e26c42555ed7ab22214578d4a8 +size 510396521 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-600/rng_state.pth b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-600/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..671f7e9721373e3e2eba2ee5795267689b498224 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-600/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09411b1af0b4840248cb4dc6472ab7fa246ea85ad915d9fcf16ef8a8e80b0399 +size 14567 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-600/scaler.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-600/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..f95b3e36da01561ec333a83ee8419ad225633e06 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-600/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e8415b86bbce347c0df306b84a695add049c2a3b2d0b6f4dda3bf036d341150 +size 559 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-600/scheduler.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-600/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..62e5359badd619e7dad2c51d98fa8043d9948f0b --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-600/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:700940432b1c2117248896e2ce5a58d93c051d92ea97707f74d76bf1ef24deee +size 623 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-600/special_tokens_map.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-600/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-600/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-600/tokenizer_config.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-600/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-600/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-600/trainer_state.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-600/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..58abf56c86b29963f5aed0995a9e64bb5f7a1ef7 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-600/trainer_state.json @@ -0,0 +1,94 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 22.005, + "global_step": 600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0217, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.0012, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 5.7133, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 5.1467, + "step": 150 + }, + { + "epoch": 7.01, + "learning_rate": 0.0005555555555555556, + "loss": 4.9057, + "step": 200 + }, + { + "epoch": 9.01, + "learning_rate": 0.0005277777777777777, + "loss": 4.7624, + "step": 250 + }, + { + "epoch": 11.0, + "learning_rate": 0.0005, + "loss": 4.5953, + "step": 300 + }, + { + "epoch": 12.02, + "learning_rate": 0.00047222222222222224, + "loss": 4.3139, + "step": 350 + }, + { + "epoch": 14.02, + "learning_rate": 0.00044444444444444436, + "loss": 4.2007, + "step": 400 + }, + { + "epoch": 16.02, + "learning_rate": 0.00041666666666666664, + "loss": 4.0441, + "step": 450 + }, + { + "epoch": 18.01, + "learning_rate": 0.00038888888888888887, + "loss": 3.9169, + "step": 500 + }, + { + "epoch": 20.01, + "learning_rate": 0.0003611111111111111, + "loss": 3.8046, + "step": 550 + }, + { + "epoch": 22.0, + "learning_rate": 0.0003333333333333333, + "loss": 3.706, + "step": 600 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.66182777520128e+17, + "trial_name": null, + "trial_params": null +} diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-600/training_args.bin b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-600/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..15b98514e767b3fc92f1a33b342cb1dd57904c39 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfe85938b028b043a65c4e8b12b6788c64a3e8013a7acfbf6b8fb24e8dd9b106 +size 3183 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-700/config.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-700/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-700/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-700/optimizer.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-700/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..79fc67497a35f0aa3cc8a8fc5745a8661397f229 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-700/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f17085239f2bb2af3693477ca3a13e8b8e8f05303e98eaada6935f2e4b349b9 +size 995604017 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-700/pytorch_model.bin b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-700/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..7f9a063f5d749fac2a4353455869187dbbc9ff45 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-700/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:866229a872d623de293d1e5c54a355827dce8259c1623e149147d27853641383 +size 510396521 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-700/rng_state.pth b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-700/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..131ecadf93a3a5f6db4f7742a6eef6ec550e0b65 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-700/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:174b7d72a2ec47b15aae2dc91f0173c4edf260c8d91328a76c79160d698c41f5 +size 14567 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-700/scaler.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-700/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..12f2b8ec834e54a2bd7cfdd0e07b0c6e125b6490 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-700/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fb213daf5cce18a5f92167ca14da9df084d907f2b9796efc4666630f312b58c +size 559 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-700/scheduler.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-700/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..42408b6253265af34ae78746144fbba9316e0d7e --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-700/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2d4910fd408e002ebeff50d62bfb043dcae5ef658777d0c3ee4a3bbb515ec15 +size 623 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-700/special_tokens_map.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-700/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-700/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-700/tokenizer_config.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-700/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-700/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-700/trainer_state.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-700/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..26f63584112af583d02a3441739b15274a3a4900 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-700/trainer_state.json @@ -0,0 +1,106 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 25.020833333333332, + "global_step": 700, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0217, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.0012, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 5.7133, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 5.1467, + "step": 150 + }, + { + "epoch": 7.01, + "learning_rate": 0.0005555555555555556, + "loss": 4.9057, + "step": 200 + }, + { + "epoch": 9.01, + "learning_rate": 0.0005277777777777777, + "loss": 4.7624, + "step": 250 + }, + { + "epoch": 11.0, + "learning_rate": 0.0005, + "loss": 4.5953, + "step": 300 + }, + { + "epoch": 12.02, + "learning_rate": 0.00047222222222222224, + "loss": 4.3139, + "step": 350 + }, + { + "epoch": 14.02, + "learning_rate": 0.00044444444444444436, + "loss": 4.2007, + "step": 400 + }, + { + "epoch": 16.02, + "learning_rate": 0.00041666666666666664, + "loss": 4.0441, + "step": 450 + }, + { + "epoch": 18.01, + "learning_rate": 0.00038888888888888887, + "loss": 3.9169, + "step": 500 + }, + { + "epoch": 20.01, + "learning_rate": 0.0003611111111111111, + "loss": 3.8046, + "step": 550 + }, + { + "epoch": 22.0, + "learning_rate": 0.0003333333333333333, + "loss": 3.706, + "step": 600 + }, + { + "epoch": 24.0, + "learning_rate": 0.00030555555555555555, + "loss": 3.6192, + "step": 650 + }, + { + "epoch": 25.02, + "learning_rate": 0.0002777777777777778, + "loss": 3.4774, + "step": 700 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.937088479232e+17, + "trial_name": null, + "trial_params": null +} diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-700/training_args.bin b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-700/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..15b98514e767b3fc92f1a33b342cb1dd57904c39 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfe85938b028b043a65c4e8b12b6788c64a3e8013a7acfbf6b8fb24e8dd9b106 +size 3183 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-800/config.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-800/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-800/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-800/optimizer.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-800/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..df730ba143dde3ec3d34affce574c9e9890e39e5 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-800/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f94b34a6afd1e1a0498c68499bd82f67b0f35c153c878edce0a84fd291fed75b +size 995604017 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-800/pytorch_model.bin b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-800/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..0b375617236c786c9bcc6beeeed73da6710d19f9 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-800/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bdf66c500b707c27013b5f6f7cc86a26f03336deae147220bf301e18f92ed65 +size 510396521 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-800/rng_state.pth b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-800/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..c58472935c91619a49670362cd2c01f289ba57e1 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-800/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f51c0b551824c0049ecfeddf20ed1244ade64d34fe68524deba43ee6fb830ad7 +size 14567 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-800/scaler.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-800/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..b3c73372264156b02df8dada2192ee3c96dd5fc4 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-800/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c2074cdcefbaa0a39f736d6b0f7bf018c350d49e85648bc8accc4f756ad816e +size 559 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-800/scheduler.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-800/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..75713dbce3771306ca00343ecc497c4f19a01d03 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-800/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b27fb255c84833fb6ab5d93679cb236a569de9a1c4f805f72a2f60a2bc7c7499 +size 623 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-800/special_tokens_map.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-800/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-800/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-800/tokenizer_config.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-800/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-800/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-800/trainer_state.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-800/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..59c454cec831e351800992cdbea5132823532ea9 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-800/trainer_state.json @@ -0,0 +1,118 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 29.014166666666668, + "global_step": 800, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0217, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.0012, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 5.7133, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 5.1467, + "step": 150 + }, + { + "epoch": 7.01, + "learning_rate": 0.0005555555555555556, + "loss": 4.9057, + "step": 200 + }, + { + "epoch": 9.01, + "learning_rate": 0.0005277777777777777, + "loss": 4.7624, + "step": 250 + }, + { + "epoch": 11.0, + "learning_rate": 0.0005, + "loss": 4.5953, + "step": 300 + }, + { + "epoch": 12.02, + "learning_rate": 0.00047222222222222224, + "loss": 4.3139, + "step": 350 + }, + { + "epoch": 14.02, + "learning_rate": 0.00044444444444444436, + "loss": 4.2007, + "step": 400 + }, + { + "epoch": 16.02, + "learning_rate": 0.00041666666666666664, + "loss": 4.0441, + "step": 450 + }, + { + "epoch": 18.01, + "learning_rate": 0.00038888888888888887, + "loss": 3.9169, + "step": 500 + }, + { + "epoch": 20.01, + "learning_rate": 0.0003611111111111111, + "loss": 3.8046, + "step": 550 + }, + { + "epoch": 22.0, + "learning_rate": 0.0003333333333333333, + "loss": 3.706, + "step": 600 + }, + { + "epoch": 24.0, + "learning_rate": 0.00030555555555555555, + "loss": 3.6192, + "step": 650 + }, + { + "epoch": 25.02, + "learning_rate": 0.0002777777777777778, + "loss": 3.4774, + "step": 700 + }, + { + "epoch": 27.02, + "learning_rate": 0.00025, + "loss": 3.4742, + "step": 750 + }, + { + "epoch": 29.01, + "learning_rate": 0.00022222222222222218, + "loss": 3.411, + "step": 800 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.21491507101696e+17, + "trial_name": null, + "trial_params": null +} diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-800/training_args.bin b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-800/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..15b98514e767b3fc92f1a33b342cb1dd57904c39 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfe85938b028b043a65c4e8b12b6788c64a3e8013a7acfbf6b8fb24e8dd9b106 +size 3183 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-900/config.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-900/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-900/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-900/optimizer.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-900/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..d32eae0fa193aa80be5445a71c54c9fe8598540a --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-900/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:462e0d526f99c08355769d2b229255b960ded363c4b9fdf5a21e1f29033ff9c3 +size 995604017 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-900/pytorch_model.bin b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-900/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..6bc77218508f62d76d87d95c669ac65c5e380f26 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-900/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b287b4662b3a7beb1a3d086cfcd9c94cee1e5827bc23fce60ffae1be8ae5d9db +size 510396521 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-900/rng_state.pth b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-900/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..bb860fed02f8da078a2e88dcf4adc1ed3c5878dc --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-900/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e68d4540bd4d2020a0eca375ba30c7283631b967977d1b284d0a9e6d648b4b3a +size 14567 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-900/scaler.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-900/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..be54cb13c777bc6feccb478ff218e7e21fad482a --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-900/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8695f57df923e22b943b0b0f2b9cc7007008e80b53ccee275b3a35963fe67e9 +size 559 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-900/scheduler.pt b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-900/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..539d7c83ea252818dc9cbffac08cf340bb05a454 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-900/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2cb09e5db72772a15094286e93cbb61d745d9b63863703cf53da0bcb9827821 +size 623 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-900/special_tokens_map.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-900/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-900/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-900/tokenizer_config.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-900/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-900/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-900/trainer_state.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-900/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..ab790c48d51606624fd12a541fc8131c97d821f6 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-900/trainer_state.json @@ -0,0 +1,130 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 33.0075, + "global_step": 900, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0217, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.0012, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 5.7133, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 5.1467, + "step": 150 + }, + { + "epoch": 7.01, + "learning_rate": 0.0005555555555555556, + "loss": 4.9057, + "step": 200 + }, + { + "epoch": 9.01, + "learning_rate": 0.0005277777777777777, + "loss": 4.7624, + "step": 250 + }, + { + "epoch": 11.0, + "learning_rate": 0.0005, + "loss": 4.5953, + "step": 300 + }, + { + "epoch": 12.02, + "learning_rate": 0.00047222222222222224, + "loss": 4.3139, + "step": 350 + }, + { + "epoch": 14.02, + "learning_rate": 0.00044444444444444436, + "loss": 4.2007, + "step": 400 + }, + { + "epoch": 16.02, + "learning_rate": 0.00041666666666666664, + "loss": 4.0441, + "step": 450 + }, + { + "epoch": 18.01, + "learning_rate": 0.00038888888888888887, + "loss": 3.9169, + "step": 500 + }, + { + "epoch": 20.01, + "learning_rate": 0.0003611111111111111, + "loss": 3.8046, + "step": 550 + }, + { + "epoch": 22.0, + "learning_rate": 0.0003333333333333333, + "loss": 3.706, + "step": 600 + }, + { + "epoch": 24.0, + "learning_rate": 0.00030555555555555555, + "loss": 3.6192, + "step": 650 + }, + { + "epoch": 25.02, + "learning_rate": 0.0002777777777777778, + "loss": 3.4774, + "step": 700 + }, + { + "epoch": 27.02, + "learning_rate": 0.00025, + "loss": 3.4742, + "step": 750 + }, + { + "epoch": 29.01, + "learning_rate": 0.00022222222222222218, + "loss": 3.411, + "step": 800 + }, + { + "epoch": 31.01, + "learning_rate": 0.00019444444444444443, + "loss": 3.3501, + "step": 850 + }, + { + "epoch": 33.01, + "learning_rate": 0.00016666666666666666, + "loss": 3.2943, + "step": 900 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.49274166280192e+17, + "trial_name": null, + "trial_params": null +} diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-900/training_args.bin b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-900/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..15b98514e767b3fc92f1a33b342cb1dd57904c39 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/checkpoint-900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfe85938b028b043a65c4e8b12b6788c64a3e8013a7acfbf6b8fb24e8dd9b106 +size 3183 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/config.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/metrics.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/metrics.json new file mode 100755 index 0000000000000000000000000000000000000000..d6d7499d45f04d51eb5860d388d982f1281f7ea9 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/metrics.json @@ -0,0 +1,2505 @@ +{"num_parameters": 124439808, "trainable_parameters": 124439808, "step": 0} +{"train_info/time_between_train_steps": 3.6202704906463623, "step": 0} +{"info/global_step": 1, "train_info/time_within_train_step": 28.398200035095215, "step": 1} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 19761.71484375, "train_info/memory_reserved": 22624.0, "train_info/memory_max_reserved": 22624.0, "_timestamp": 1740823229, "_runtime": 35}, "step": 1} +{"logs": {"train/loss": 11.0217, "train/learning_rate": 4.9999999999999996e-06, "train/epoch": 0.0, "_timestamp": 1740823229, "_runtime": 35}, "step": 1} +{"train_info/time_between_train_steps": 0.011200904846191406, "step": 1} +{"info/global_step": 2, "train_info/time_within_train_step": 28.049723863601685, "step": 2} +{"train_info/time_between_train_steps": 0.012067794799804688, "step": 2} +{"info/global_step": 3, "train_info/time_within_train_step": 27.93549156188965, "step": 3} +{"train_info/time_between_train_steps": 0.005911111831665039, "step": 3} +{"info/global_step": 4, "train_info/time_within_train_step": 28.076862335205078, "step": 4} +{"train_info/time_between_train_steps": 0.006282329559326172, "step": 4} +{"info/global_step": 5, "train_info/time_within_train_step": 27.87696099281311, "step": 5} +{"train_info/time_between_train_steps": 0.006053447723388672, "step": 5} +{"info/global_step": 6, "train_info/time_within_train_step": 28.136687994003296, "step": 6} +{"train_info/time_between_train_steps": 0.006167411804199219, "step": 6} +{"info/global_step": 7, "train_info/time_within_train_step": 27.906961679458618, "step": 7} +{"train_info/time_between_train_steps": 0.007276058197021484, "step": 7} +{"info/global_step": 8, "train_info/time_within_train_step": 28.00594139099121, "step": 8} +{"train_info/time_between_train_steps": 0.0055828094482421875, "step": 8} +{"info/global_step": 9, "train_info/time_within_train_step": 27.888179779052734, "step": 9} +{"train_info/time_between_train_steps": 0.006288766860961914, "step": 9} +{"info/global_step": 10, "train_info/time_within_train_step": 27.861764669418335, "step": 10} +{"train_info/time_between_train_steps": 0.005463123321533203, "step": 10} +{"info/global_step": 11, "train_info/time_within_train_step": 27.87231755256653, "step": 11} +{"train_info/time_between_train_steps": 0.009516477584838867, "step": 11} +{"info/global_step": 12, "train_info/time_within_train_step": 27.880123376846313, "step": 12} +{"train_info/time_between_train_steps": 0.005652189254760742, "step": 12} +{"info/global_step": 13, "train_info/time_within_train_step": 27.88990616798401, "step": 13} +{"train_info/time_between_train_steps": 0.0063512325286865234, "step": 13} +{"info/global_step": 14, "train_info/time_within_train_step": 27.873393535614014, "step": 14} +{"train_info/time_between_train_steps": 0.005845308303833008, "step": 14} +{"info/global_step": 15, "train_info/time_within_train_step": 27.888432025909424, "step": 15} +{"train_info/time_between_train_steps": 0.005873680114746094, "step": 15} +{"info/global_step": 16, "train_info/time_within_train_step": 27.96719455718994, "step": 16} +{"train_info/time_between_train_steps": 0.005772590637207031, "step": 16} +{"info/global_step": 17, "train_info/time_within_train_step": 27.890626430511475, "step": 17} +{"train_info/time_between_train_steps": 0.006057262420654297, "step": 17} +{"info/global_step": 18, "train_info/time_within_train_step": 27.838449716567993, "step": 18} +{"train_info/time_between_train_steps": 0.0055065155029296875, "step": 18} +{"info/global_step": 19, "train_info/time_within_train_step": 27.91455316543579, "step": 19} +{"train_info/time_between_train_steps": 0.006654500961303711, "step": 19} +{"info/global_step": 20, "train_info/time_within_train_step": 27.91164803504944, "step": 20} +{"train_info/time_between_train_steps": 0.005525350570678711, "step": 20} +{"info/global_step": 21, "train_info/time_within_train_step": 27.85857844352722, "step": 21} +{"train_info/time_between_train_steps": 0.005818367004394531, "step": 21} +{"info/global_step": 22, "train_info/time_within_train_step": 27.833900451660156, "step": 22} +{"train_info/time_between_train_steps": 0.011306524276733398, "step": 22} +{"info/global_step": 23, "train_info/time_within_train_step": 27.923335075378418, "step": 23} +{"train_info/time_between_train_steps": 0.006353139877319336, "step": 23} +{"info/global_step": 24, "train_info/time_within_train_step": 27.910234689712524, "step": 24} +{"train_info/time_between_train_steps": 0.006014823913574219, "step": 24} +{"info/global_step": 25, "train_info/time_within_train_step": 27.917453289031982, "step": 25} +{"train_info/time_between_train_steps": 0.00585174560546875, "step": 25} +{"info/global_step": 26, "train_info/time_within_train_step": 27.92822027206421, "step": 26} +{"train_info/time_between_train_steps": 0.006350278854370117, "step": 26} +{"info/global_step": 27, "train_info/time_within_train_step": 27.884355783462524, "step": 27} +{"train_info/time_between_train_steps": 0.00610804557800293, "step": 27} +{"train_info/time_between_train_steps": 30.28951406478882, "step": 27} +{"info/global_step": 28, "train_info/time_within_train_step": 27.920156240463257, "step": 28} +{"train_info/time_between_train_steps": 0.00586700439453125, "step": 28} +{"info/global_step": 29, "train_info/time_within_train_step": 27.962992191314697, "step": 29} +{"train_info/time_between_train_steps": 0.008761405944824219, "step": 29} +{"info/global_step": 30, "train_info/time_within_train_step": 27.880796194076538, "step": 30} +{"train_info/time_between_train_steps": 0.006161212921142578, "step": 30} +{"info/global_step": 31, "train_info/time_within_train_step": 28.263022661209106, "step": 31} +{"train_info/time_between_train_steps": 0.006543874740600586, "step": 31} +{"info/global_step": 32, "train_info/time_within_train_step": 27.83896541595459, "step": 32} +{"train_info/time_between_train_steps": 0.006333112716674805, "step": 32} +{"info/global_step": 33, "train_info/time_within_train_step": 28.048543214797974, "step": 33} +{"train_info/time_between_train_steps": 0.006093740463256836, "step": 33} +{"info/global_step": 34, "train_info/time_within_train_step": 27.905397415161133, "step": 34} +{"train_info/time_between_train_steps": 0.009023189544677734, "step": 34} +{"info/global_step": 35, "train_info/time_within_train_step": 27.892625331878662, "step": 35} +{"train_info/time_between_train_steps": 0.006220340728759766, "step": 35} +{"info/global_step": 36, "train_info/time_within_train_step": 27.84145760536194, "step": 36} +{"train_info/time_between_train_steps": 0.0059092044830322266, "step": 36} +{"info/global_step": 37, "train_info/time_within_train_step": 27.90415644645691, "step": 37} +{"train_info/time_between_train_steps": 0.00685429573059082, "step": 37} +{"info/global_step": 38, "train_info/time_within_train_step": 27.87406039237976, "step": 38} +{"train_info/time_between_train_steps": 0.005933046340942383, "step": 38} +{"info/global_step": 39, "train_info/time_within_train_step": 27.822991609573364, "step": 39} +{"train_info/time_between_train_steps": 0.006239652633666992, "step": 39} +{"info/global_step": 40, "train_info/time_within_train_step": 27.86901545524597, "step": 40} +{"train_info/time_between_train_steps": 0.007295370101928711, "step": 40} +{"info/global_step": 41, "train_info/time_within_train_step": 27.838231086730957, "step": 41} +{"train_info/time_between_train_steps": 0.006144523620605469, "step": 41} +{"info/global_step": 42, "train_info/time_within_train_step": 27.87339973449707, "step": 42} +{"train_info/time_between_train_steps": 0.01090097427368164, "step": 42} +{"info/global_step": 43, "train_info/time_within_train_step": 27.871135473251343, "step": 43} +{"train_info/time_between_train_steps": 0.0061032772064208984, "step": 43} +{"info/global_step": 44, "train_info/time_within_train_step": 27.890681266784668, "step": 44} +{"train_info/time_between_train_steps": 0.006142139434814453, "step": 44} +{"info/global_step": 45, "train_info/time_within_train_step": 27.880035638809204, "step": 45} +{"train_info/time_between_train_steps": 0.006216526031494141, "step": 45} +{"info/global_step": 46, "train_info/time_within_train_step": 27.966050386428833, "step": 46} +{"train_info/time_between_train_steps": 0.0072553157806396484, "step": 46} +{"info/global_step": 47, "train_info/time_within_train_step": 27.8751060962677, "step": 47} +{"train_info/time_between_train_steps": 0.0060291290283203125, "step": 47} +{"info/global_step": 48, "train_info/time_within_train_step": 27.902859926223755, "step": 48} +{"train_info/time_between_train_steps": 0.0058536529541015625, "step": 48} +{"info/global_step": 49, "train_info/time_within_train_step": 27.88634419441223, "step": 49} +{"train_info/time_between_train_steps": 0.006044149398803711, "step": 49} +{"info/global_step": 50, "train_info/time_within_train_step": 27.85106372833252, "step": 50} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1740824628, "_runtime": 1434}, "step": 50} +{"logs": {"train/loss": 8.0012, "train/learning_rate": 0.00025, "train/epoch": 1.02, "_timestamp": 1740824628, "_runtime": 1434}, "step": 50} +{"train_info/time_between_train_steps": 0.011123180389404297, "step": 50} +{"info/global_step": 51, "train_info/time_within_train_step": 27.894611358642578, "step": 51} +{"train_info/time_between_train_steps": 0.005948781967163086, "step": 51} +{"info/global_step": 52, "train_info/time_within_train_step": 27.918092489242554, "step": 52} +{"train_info/time_between_train_steps": 0.0062024593353271484, "step": 52} +{"info/global_step": 53, "train_info/time_within_train_step": 27.83657670021057, "step": 53} +{"train_info/time_between_train_steps": 0.006983041763305664, "step": 53} +{"info/global_step": 54, "train_info/time_within_train_step": 27.874364137649536, "step": 54} +{"train_info/time_between_train_steps": 0.007112741470336914, "step": 54} +{"train_info/time_between_train_steps": 30.114413738250732, "step": 54} +{"info/global_step": 55, "train_info/time_within_train_step": 27.84037971496582, "step": 55} +{"train_info/time_between_train_steps": 0.005398273468017578, "step": 55} +{"info/global_step": 56, "train_info/time_within_train_step": 27.949012517929077, "step": 56} +{"train_info/time_between_train_steps": 0.006712913513183594, "step": 56} +{"info/global_step": 57, "train_info/time_within_train_step": 27.88629174232483, "step": 57} +{"train_info/time_between_train_steps": 0.006211996078491211, "step": 57} +{"info/global_step": 58, "train_info/time_within_train_step": 28.02495527267456, "step": 58} +{"train_info/time_between_train_steps": 0.005639314651489258, "step": 58} +{"info/global_step": 59, "train_info/time_within_train_step": 27.89282512664795, "step": 59} +{"train_info/time_between_train_steps": 0.005789279937744141, "step": 59} +{"info/global_step": 60, "train_info/time_within_train_step": 28.081668853759766, "step": 60} +{"train_info/time_between_train_steps": 0.008344173431396484, "step": 60} +{"info/global_step": 61, "train_info/time_within_train_step": 28.001875400543213, "step": 61} +{"train_info/time_between_train_steps": 0.005860805511474609, "step": 61} +{"info/global_step": 62, "train_info/time_within_train_step": 28.02206063270569, "step": 62} +{"train_info/time_between_train_steps": 0.008215188980102539, "step": 62} +{"info/global_step": 63, "train_info/time_within_train_step": 27.9860417842865, "step": 63} +{"train_info/time_between_train_steps": 0.006884336471557617, "step": 63} +{"info/global_step": 64, "train_info/time_within_train_step": 27.953574419021606, "step": 64} +{"train_info/time_between_train_steps": 0.008183717727661133, "step": 64} +{"info/global_step": 65, "train_info/time_within_train_step": 27.876402139663696, "step": 65} +{"train_info/time_between_train_steps": 0.009216070175170898, "step": 65} +{"info/global_step": 66, "train_info/time_within_train_step": 27.878890991210938, "step": 66} +{"train_info/time_between_train_steps": 0.010279178619384766, "step": 66} +{"info/global_step": 67, "train_info/time_within_train_step": 27.946533203125, "step": 67} +{"train_info/time_between_train_steps": 0.005519866943359375, "step": 67} +{"info/global_step": 68, "train_info/time_within_train_step": 27.88282585144043, "step": 68} +{"train_info/time_between_train_steps": 0.0058345794677734375, "step": 68} +{"info/global_step": 69, "train_info/time_within_train_step": 27.814297676086426, "step": 69} +{"train_info/time_between_train_steps": 0.005526304244995117, "step": 69} +{"info/global_step": 70, "train_info/time_within_train_step": 27.87025284767151, "step": 70} +{"train_info/time_between_train_steps": 0.007002592086791992, "step": 70} +{"info/global_step": 71, "train_info/time_within_train_step": 27.874943733215332, "step": 71} +{"train_info/time_between_train_steps": 0.006529569625854492, "step": 71} +{"info/global_step": 72, "train_info/time_within_train_step": 27.8717303276062, "step": 72} +{"train_info/time_between_train_steps": 0.009815692901611328, "step": 72} +{"info/global_step": 73, "train_info/time_within_train_step": 27.87533211708069, "step": 73} +{"train_info/time_between_train_steps": 0.0067021846771240234, "step": 73} +{"info/global_step": 74, "train_info/time_within_train_step": 27.89749240875244, "step": 74} +{"train_info/time_between_train_steps": 0.0060350894927978516, "step": 74} +{"info/global_step": 75, "train_info/time_within_train_step": 27.821978330612183, "step": 75} +{"train_info/time_between_train_steps": 0.0059435367584228516, "step": 75} +{"info/global_step": 76, "train_info/time_within_train_step": 27.83786392211914, "step": 76} +{"train_info/time_between_train_steps": 0.005833148956298828, "step": 76} +{"info/global_step": 77, "train_info/time_within_train_step": 27.966080904006958, "step": 77} +{"train_info/time_between_train_steps": 0.005998373031616211, "step": 77} +{"info/global_step": 78, "train_info/time_within_train_step": 27.891772508621216, "step": 78} +{"train_info/time_between_train_steps": 0.006107807159423828, "step": 78} +{"info/global_step": 79, "train_info/time_within_train_step": 27.859020948410034, "step": 79} +{"train_info/time_between_train_steps": 0.005742073059082031, "step": 79} +{"info/global_step": 80, "train_info/time_within_train_step": 27.87679958343506, "step": 80} +{"train_info/time_between_train_steps": 0.006368875503540039, "step": 80} +{"info/global_step": 81, "train_info/time_within_train_step": 27.867661952972412, "step": 81} +{"train_info/time_between_train_steps": 0.007950305938720703, "step": 81} +{"train_info/time_between_train_steps": 30.78544282913208, "step": 81} +{"info/global_step": 82, "train_info/time_within_train_step": 27.845990419387817, "step": 82} +{"train_info/time_between_train_steps": 0.008070945739746094, "step": 82} +{"info/global_step": 83, "train_info/time_within_train_step": 28.003783702850342, "step": 83} +{"train_info/time_between_train_steps": 0.005178213119506836, "step": 83} +{"info/global_step": 84, "train_info/time_within_train_step": 27.85000514984131, "step": 84} +{"train_info/time_between_train_steps": 0.005702018737792969, "step": 84} +{"info/global_step": 85, "train_info/time_within_train_step": 28.01048493385315, "step": 85} +{"train_info/time_between_train_steps": 0.00564122200012207, "step": 85} +{"info/global_step": 86, "train_info/time_within_train_step": 27.881004333496094, "step": 86} +{"train_info/time_between_train_steps": 0.006693124771118164, "step": 86} +{"info/global_step": 87, "train_info/time_within_train_step": 27.915112733840942, "step": 87} +{"train_info/time_between_train_steps": 0.005437612533569336, "step": 87} +{"info/global_step": 88, "train_info/time_within_train_step": 27.79124402999878, "step": 88} +{"train_info/time_between_train_steps": 0.005647420883178711, "step": 88} +{"info/global_step": 89, "train_info/time_within_train_step": 27.86949634552002, "step": 89} +{"train_info/time_between_train_steps": 0.010548830032348633, "step": 89} +{"info/global_step": 90, "train_info/time_within_train_step": 27.851351737976074, "step": 90} +{"train_info/time_between_train_steps": 0.0064754486083984375, "step": 90} +{"info/global_step": 91, "train_info/time_within_train_step": 27.968950748443604, "step": 91} +{"train_info/time_between_train_steps": 0.005433559417724609, "step": 91} +{"info/global_step": 92, "train_info/time_within_train_step": 27.889676570892334, "step": 92} +{"train_info/time_between_train_steps": 0.0076656341552734375, "step": 92} +{"info/global_step": 93, "train_info/time_within_train_step": 27.965562343597412, "step": 93} +{"train_info/time_between_train_steps": 0.006625175476074219, "step": 93} +{"info/global_step": 94, "train_info/time_within_train_step": 27.927173376083374, "step": 94} +{"train_info/time_between_train_steps": 0.00556182861328125, "step": 94} +{"info/global_step": 95, "train_info/time_within_train_step": 27.894549131393433, "step": 95} +{"train_info/time_between_train_steps": 0.010082006454467773, "step": 95} +{"info/global_step": 96, "train_info/time_within_train_step": 27.86102557182312, "step": 96} +{"train_info/time_between_train_steps": 0.006885051727294922, "step": 96} +{"info/global_step": 97, "train_info/time_within_train_step": 27.855329275131226, "step": 97} +{"train_info/time_between_train_steps": 0.0055904388427734375, "step": 97} +{"info/global_step": 98, "train_info/time_within_train_step": 27.873061180114746, "step": 98} +{"train_info/time_between_train_steps": 0.005834341049194336, "step": 98} +{"info/global_step": 99, "train_info/time_within_train_step": 27.903571605682373, "step": 99} +{"train_info/time_between_train_steps": 0.0056459903717041016, "step": 99} +{"info/global_step": 100, "train_info/time_within_train_step": 27.884268760681152, "step": 100} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1740826085, "_runtime": 2891}, "step": 100} +{"logs": {"train/loss": 5.7133, "train/learning_rate": 0.0005, "train/epoch": 3.02, "_timestamp": 1740826085, "_runtime": 2891}, "step": 100} +{"train_info/time_between_train_steps": 13.655586004257202, "step": 100} +{"info/global_step": 101, "train_info/time_within_train_step": 27.883735418319702, "step": 101} +{"train_info/time_between_train_steps": 0.005805253982543945, "step": 101} +{"info/global_step": 102, "train_info/time_within_train_step": 27.847142219543457, "step": 102} +{"train_info/time_between_train_steps": 0.005617856979370117, "step": 102} +{"info/global_step": 103, "train_info/time_within_train_step": 27.897843837738037, "step": 103} +{"train_info/time_between_train_steps": 0.005794048309326172, "step": 103} +{"info/global_step": 104, "train_info/time_within_train_step": 27.858957052230835, "step": 104} +{"train_info/time_between_train_steps": 0.0056421756744384766, "step": 104} +{"info/global_step": 105, "train_info/time_within_train_step": 27.87535786628723, "step": 105} +{"train_info/time_between_train_steps": 0.005644559860229492, "step": 105} +{"info/global_step": 106, "train_info/time_within_train_step": 27.83743381500244, "step": 106} +{"train_info/time_between_train_steps": 0.005645036697387695, "step": 106} +{"info/global_step": 107, "train_info/time_within_train_step": 27.96540665626526, "step": 107} +{"train_info/time_between_train_steps": 0.0060079097747802734, "step": 107} +{"info/global_step": 108, "train_info/time_within_train_step": 27.92102861404419, "step": 108} +{"train_info/time_between_train_steps": 0.007097482681274414, "step": 108} +{"train_info/time_between_train_steps": 30.054372310638428, "step": 108} +{"info/global_step": 109, "train_info/time_within_train_step": 27.84268307685852, "step": 109} +{"train_info/time_between_train_steps": 0.005582571029663086, "step": 109} +{"info/global_step": 110, "train_info/time_within_train_step": 27.972187757492065, "step": 110} +{"train_info/time_between_train_steps": 0.005593299865722656, "step": 110} +{"info/global_step": 111, "train_info/time_within_train_step": 27.902609825134277, "step": 111} +{"train_info/time_between_train_steps": 0.005812644958496094, "step": 111} +{"info/global_step": 112, "train_info/time_within_train_step": 27.975236654281616, "step": 112} +{"train_info/time_between_train_steps": 0.010628938674926758, "step": 112} +{"info/global_step": 113, "train_info/time_within_train_step": 27.870732307434082, "step": 113} +{"train_info/time_between_train_steps": 0.005522966384887695, "step": 113} +{"info/global_step": 114, "train_info/time_within_train_step": 28.06589412689209, "step": 114} +{"train_info/time_between_train_steps": 0.0058896541595458984, "step": 114} +{"info/global_step": 115, "train_info/time_within_train_step": 27.839991807937622, "step": 115} +{"train_info/time_between_train_steps": 0.005937337875366211, "step": 115} +{"info/global_step": 116, "train_info/time_within_train_step": 27.9061062335968, "step": 116} +{"train_info/time_between_train_steps": 0.005819082260131836, "step": 116} +{"info/global_step": 117, "train_info/time_within_train_step": 27.864696264266968, "step": 117} +{"train_info/time_between_train_steps": 0.005515336990356445, "step": 117} +{"info/global_step": 118, "train_info/time_within_train_step": 27.81730318069458, "step": 118} +{"train_info/time_between_train_steps": 0.005798816680908203, "step": 118} +{"info/global_step": 119, "train_info/time_within_train_step": 27.811917304992676, "step": 119} +{"train_info/time_between_train_steps": 0.0056569576263427734, "step": 119} +{"info/global_step": 120, "train_info/time_within_train_step": 27.87057876586914, "step": 120} +{"train_info/time_between_train_steps": 0.007812976837158203, "step": 120} +{"info/global_step": 121, "train_info/time_within_train_step": 27.81046962738037, "step": 121} +{"train_info/time_between_train_steps": 0.005361795425415039, "step": 121} +{"info/global_step": 122, "train_info/time_within_train_step": 27.94956874847412, "step": 122} +{"train_info/time_between_train_steps": 0.0068781375885009766, "step": 122} +{"info/global_step": 123, "train_info/time_within_train_step": 27.80056118965149, "step": 123} +{"train_info/time_between_train_steps": 0.005762815475463867, "step": 123} +{"info/global_step": 124, "train_info/time_within_train_step": 27.864063262939453, "step": 124} +{"train_info/time_between_train_steps": 0.005503416061401367, "step": 124} +{"info/global_step": 125, "train_info/time_within_train_step": 27.9296977519989, "step": 125} +{"train_info/time_between_train_steps": 0.007180213928222656, "step": 125} +{"info/global_step": 126, "train_info/time_within_train_step": 28.012876987457275, "step": 126} +{"train_info/time_between_train_steps": 0.006676912307739258, "step": 126} +{"info/global_step": 127, "train_info/time_within_train_step": 27.948411226272583, "step": 127} +{"train_info/time_between_train_steps": 0.005771636962890625, "step": 127} +{"info/global_step": 128, "train_info/time_within_train_step": 27.87645196914673, "step": 128} +{"train_info/time_between_train_steps": 0.005978822708129883, "step": 128} +{"info/global_step": 129, "train_info/time_within_train_step": 27.859951972961426, "step": 129} +{"train_info/time_between_train_steps": 0.005511283874511719, "step": 129} +{"info/global_step": 130, "train_info/time_within_train_step": 27.84231209754944, "step": 130} +{"train_info/time_between_train_steps": 0.01008296012878418, "step": 130} +{"info/global_step": 131, "train_info/time_within_train_step": 27.87318253517151, "step": 131} +{"train_info/time_between_train_steps": 0.006036043167114258, "step": 131} +{"info/global_step": 132, "train_info/time_within_train_step": 27.877030849456787, "step": 132} +{"train_info/time_between_train_steps": 0.010370969772338867, "step": 132} +{"info/global_step": 133, "train_info/time_within_train_step": 27.875184774398804, "step": 133} +{"train_info/time_between_train_steps": 0.005702495574951172, "step": 133} +{"info/global_step": 134, "train_info/time_within_train_step": 27.828492879867554, "step": 134} +{"train_info/time_between_train_steps": 0.0060231685638427734, "step": 134} +{"info/global_step": 135, "train_info/time_within_train_step": 27.839632749557495, "step": 135} +{"train_info/time_between_train_steps": 0.006114006042480469, "step": 135} +{"train_info/time_between_train_steps": 30.28325080871582, "step": 135} +{"info/global_step": 136, "train_info/time_within_train_step": 27.960612535476685, "step": 136} +{"train_info/time_between_train_steps": 0.006197929382324219, "step": 136} +{"info/global_step": 137, "train_info/time_within_train_step": 28.008299112319946, "step": 137} +{"train_info/time_between_train_steps": 0.010885477066040039, "step": 137} +{"info/global_step": 138, "train_info/time_within_train_step": 27.850351572036743, "step": 138} +{"train_info/time_between_train_steps": 0.005463123321533203, "step": 138} +{"info/global_step": 139, "train_info/time_within_train_step": 28.184556245803833, "step": 139} +{"train_info/time_between_train_steps": 0.011291742324829102, "step": 139} +{"info/global_step": 140, "train_info/time_within_train_step": 28.04635715484619, "step": 140} +{"train_info/time_between_train_steps": 0.01763319969177246, "step": 140} +{"info/global_step": 141, "train_info/time_within_train_step": 28.077219009399414, "step": 141} +{"train_info/time_between_train_steps": 0.006272315979003906, "step": 141} +{"info/global_step": 142, "train_info/time_within_train_step": 27.944556713104248, "step": 142} +{"train_info/time_between_train_steps": 0.0067598819732666016, "step": 142} +{"info/global_step": 143, "train_info/time_within_train_step": 28.025068044662476, "step": 143} +{"train_info/time_between_train_steps": 0.010085344314575195, "step": 143} +{"info/global_step": 144, "train_info/time_within_train_step": 27.916707038879395, "step": 144} +{"train_info/time_between_train_steps": 0.0068662166595458984, "step": 144} +{"info/global_step": 145, "train_info/time_within_train_step": 27.896459817886353, "step": 145} +{"train_info/time_between_train_steps": 0.005640268325805664, "step": 145} +{"info/global_step": 146, "train_info/time_within_train_step": 27.910611391067505, "step": 146} +{"train_info/time_between_train_steps": 0.005911588668823242, "step": 146} +{"info/global_step": 147, "train_info/time_within_train_step": 27.907285451889038, "step": 147} +{"train_info/time_between_train_steps": 0.005563974380493164, "step": 147} +{"info/global_step": 148, "train_info/time_within_train_step": 27.90671467781067, "step": 148} +{"train_info/time_between_train_steps": 0.005598545074462891, "step": 148} +{"info/global_step": 149, "train_info/time_within_train_step": 28.1685311794281, "step": 149} +{"train_info/time_between_train_steps": 0.00661468505859375, "step": 149} +{"info/global_step": 150, "train_info/time_within_train_step": 27.95040535926819, "step": 150} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1740827555, "_runtime": 4361}, "step": 150} +{"logs": {"train/loss": 5.1467, "train/learning_rate": 0.0005833333333333333, "train/epoch": 5.01, "_timestamp": 1740827555, "_runtime": 4361}, "step": 150} +{"train_info/time_between_train_steps": 0.010025501251220703, "step": 150} +{"info/global_step": 151, "train_info/time_within_train_step": 27.926822662353516, "step": 151} +{"train_info/time_between_train_steps": 0.0062541961669921875, "step": 151} +{"info/global_step": 152, "train_info/time_within_train_step": 27.987622499465942, "step": 152} +{"train_info/time_between_train_steps": 0.006011247634887695, "step": 152} +{"info/global_step": 153, "train_info/time_within_train_step": 27.844491243362427, "step": 153} +{"train_info/time_between_train_steps": 0.0058100223541259766, "step": 153} +{"info/global_step": 154, "train_info/time_within_train_step": 28.14706802368164, "step": 154} +{"train_info/time_between_train_steps": 0.010766983032226562, "step": 154} +{"info/global_step": 155, "train_info/time_within_train_step": 27.871870279312134, "step": 155} +{"train_info/time_between_train_steps": 0.0064084529876708984, "step": 155} +{"info/global_step": 156, "train_info/time_within_train_step": 27.85930347442627, "step": 156} +{"train_info/time_between_train_steps": 0.0057828426361083984, "step": 156} +{"info/global_step": 157, "train_info/time_within_train_step": 27.898738622665405, "step": 157} +{"train_info/time_between_train_steps": 0.006072521209716797, "step": 157} +{"info/global_step": 158, "train_info/time_within_train_step": 27.902660131454468, "step": 158} +{"train_info/time_between_train_steps": 0.006133317947387695, "step": 158} +{"info/global_step": 159, "train_info/time_within_train_step": 27.892138242721558, "step": 159} +{"train_info/time_between_train_steps": 0.006120443344116211, "step": 159} +{"info/global_step": 160, "train_info/time_within_train_step": 27.890724897384644, "step": 160} +{"train_info/time_between_train_steps": 0.00567626953125, "step": 160} +{"info/global_step": 161, "train_info/time_within_train_step": 27.88730239868164, "step": 161} +{"train_info/time_between_train_steps": 0.0063097476959228516, "step": 161} +{"info/global_step": 162, "train_info/time_within_train_step": 27.904974699020386, "step": 162} +{"train_info/time_between_train_steps": 0.008433818817138672, "step": 162} +{"train_info/time_between_train_steps": 30.8207745552063, "step": 162} +{"info/global_step": 163, "train_info/time_within_train_step": 27.911091327667236, "step": 163} +{"train_info/time_between_train_steps": 0.008232593536376953, "step": 163} +{"info/global_step": 164, "train_info/time_within_train_step": 28.002625465393066, "step": 164} +{"train_info/time_between_train_steps": 0.0059888362884521484, "step": 164} +{"info/global_step": 165, "train_info/time_within_train_step": 27.88414978981018, "step": 165} +{"train_info/time_between_train_steps": 0.0057904720306396484, "step": 165} +{"info/global_step": 166, "train_info/time_within_train_step": 28.05690884590149, "step": 166} +{"train_info/time_between_train_steps": 0.005982875823974609, "step": 166} +{"info/global_step": 167, "train_info/time_within_train_step": 27.973514318466187, "step": 167} +{"train_info/time_between_train_steps": 0.0064563751220703125, "step": 167} +{"info/global_step": 168, "train_info/time_within_train_step": 28.10438823699951, "step": 168} +{"train_info/time_between_train_steps": 0.006206989288330078, "step": 168} +{"info/global_step": 169, "train_info/time_within_train_step": 28.03047776222229, "step": 169} +{"train_info/time_between_train_steps": 0.006085395812988281, "step": 169} +{"info/global_step": 170, "train_info/time_within_train_step": 27.960110425949097, "step": 170} +{"train_info/time_between_train_steps": 0.005891323089599609, "step": 170} +{"info/global_step": 171, "train_info/time_within_train_step": 27.88709592819214, "step": 171} +{"train_info/time_between_train_steps": 0.005438327789306641, "step": 171} +{"info/global_step": 172, "train_info/time_within_train_step": 27.867131233215332, "step": 172} +{"train_info/time_between_train_steps": 0.015416860580444336, "step": 172} +{"info/global_step": 173, "train_info/time_within_train_step": 27.847069263458252, "step": 173} +{"train_info/time_between_train_steps": 0.00598454475402832, "step": 173} +{"info/global_step": 174, "train_info/time_within_train_step": 27.839613437652588, "step": 174} +{"train_info/time_between_train_steps": 0.0056095123291015625, "step": 174} +{"info/global_step": 175, "train_info/time_within_train_step": 27.86204242706299, "step": 175} +{"train_info/time_between_train_steps": 0.0057888031005859375, "step": 175} +{"info/global_step": 176, "train_info/time_within_train_step": 27.8561794757843, "step": 176} +{"train_info/time_between_train_steps": 0.0058193206787109375, "step": 176} +{"info/global_step": 177, "train_info/time_within_train_step": 27.871508836746216, "step": 177} +{"train_info/time_between_train_steps": 0.005638599395751953, "step": 177} +{"info/global_step": 178, "train_info/time_within_train_step": 27.8356454372406, "step": 178} +{"train_info/time_between_train_steps": 0.00572514533996582, "step": 178} +{"info/global_step": 179, "train_info/time_within_train_step": 27.828516960144043, "step": 179} +{"train_info/time_between_train_steps": 0.007322072982788086, "step": 179} +{"info/global_step": 180, "train_info/time_within_train_step": 27.82610535621643, "step": 180} +{"train_info/time_between_train_steps": 0.0060002803802490234, "step": 180} +{"info/global_step": 181, "train_info/time_within_train_step": 27.872085094451904, "step": 181} +{"train_info/time_between_train_steps": 0.009943485260009766, "step": 181} +{"info/global_step": 182, "train_info/time_within_train_step": 27.96524429321289, "step": 182} +{"train_info/time_between_train_steps": 0.006161928176879883, "step": 182} +{"info/global_step": 183, "train_info/time_within_train_step": 27.845847845077515, "step": 183} +{"train_info/time_between_train_steps": 0.00965428352355957, "step": 183} +{"info/global_step": 184, "train_info/time_within_train_step": 27.901209592819214, "step": 184} +{"train_info/time_between_train_steps": 0.010936498641967773, "step": 184} +{"info/global_step": 185, "train_info/time_within_train_step": 27.84361219406128, "step": 185} +{"train_info/time_between_train_steps": 0.005629777908325195, "step": 185} +{"info/global_step": 186, "train_info/time_within_train_step": 27.88525676727295, "step": 186} +{"train_info/time_between_train_steps": 0.006134748458862305, "step": 186} +{"info/global_step": 187, "train_info/time_within_train_step": 27.8486065864563, "step": 187} +{"train_info/time_between_train_steps": 0.010423421859741211, "step": 187} +{"info/global_step": 188, "train_info/time_within_train_step": 27.871290683746338, "step": 188} +{"train_info/time_between_train_steps": 0.006138801574707031, "step": 188} +{"info/global_step": 189, "train_info/time_within_train_step": 27.895599126815796, "step": 189} +{"train_info/time_between_train_steps": 0.006124973297119141, "step": 189} +{"train_info/time_between_train_steps": 30.054426431655884, "step": 189} +{"info/global_step": 190, "train_info/time_within_train_step": 27.83387804031372, "step": 190} +{"train_info/time_between_train_steps": 0.005517005920410156, "step": 190} +{"info/global_step": 191, "train_info/time_within_train_step": 27.918277263641357, "step": 191} +{"train_info/time_between_train_steps": 0.005349874496459961, "step": 191} +{"info/global_step": 192, "train_info/time_within_train_step": 27.867596864700317, "step": 192} +{"train_info/time_between_train_steps": 0.008391141891479492, "step": 192} +{"info/global_step": 193, "train_info/time_within_train_step": 28.02430748939514, "step": 193} +{"train_info/time_between_train_steps": 0.008971452713012695, "step": 193} +{"info/global_step": 194, "train_info/time_within_train_step": 27.88659644126892, "step": 194} +{"train_info/time_between_train_steps": 0.005773305892944336, "step": 194} +{"info/global_step": 195, "train_info/time_within_train_step": 28.02795672416687, "step": 195} +{"train_info/time_between_train_steps": 0.006061077117919922, "step": 195} +{"info/global_step": 196, "train_info/time_within_train_step": 27.88110876083374, "step": 196} +{"train_info/time_between_train_steps": 0.010294437408447266, "step": 196} +{"info/global_step": 197, "train_info/time_within_train_step": 28.040934085845947, "step": 197} +{"train_info/time_between_train_steps": 0.005918025970458984, "step": 197} +{"info/global_step": 198, "train_info/time_within_train_step": 27.80183434486389, "step": 198} +{"train_info/time_between_train_steps": 0.00556635856628418, "step": 198} +{"info/global_step": 199, "train_info/time_within_train_step": 27.882346153259277, "step": 199} +{"train_info/time_between_train_steps": 0.0071904659271240234, "step": 199} +{"info/global_step": 200, "train_info/time_within_train_step": 27.88114643096924, "step": 200} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1740829012, "_runtime": 5818}, "step": 200} +{"logs": {"train/loss": 4.9057, "train/learning_rate": 0.0005555555555555556, "train/epoch": 7.01, "_timestamp": 1740829012, "_runtime": 5818}, "step": 200} +{"train_info/time_between_train_steps": 13.518893241882324, "step": 200} +{"info/global_step": 201, "train_info/time_within_train_step": 27.920449018478394, "step": 201} +{"train_info/time_between_train_steps": 0.006963014602661133, "step": 201} +{"info/global_step": 202, "train_info/time_within_train_step": 27.99615240097046, "step": 202} +{"train_info/time_between_train_steps": 0.0058078765869140625, "step": 202} +{"info/global_step": 203, "train_info/time_within_train_step": 27.879692554473877, "step": 203} +{"train_info/time_between_train_steps": 0.006184577941894531, "step": 203} +{"info/global_step": 204, "train_info/time_within_train_step": 27.8603572845459, "step": 204} +{"train_info/time_between_train_steps": 0.005890607833862305, "step": 204} +{"info/global_step": 205, "train_info/time_within_train_step": 27.876006364822388, "step": 205} +{"train_info/time_between_train_steps": 0.005948543548583984, "step": 205} +{"info/global_step": 206, "train_info/time_within_train_step": 27.852856636047363, "step": 206} +{"train_info/time_between_train_steps": 0.005781888961791992, "step": 206} +{"info/global_step": 207, "train_info/time_within_train_step": 27.87215566635132, "step": 207} +{"train_info/time_between_train_steps": 0.0058650970458984375, "step": 207} +{"info/global_step": 208, "train_info/time_within_train_step": 27.84617590904236, "step": 208} +{"train_info/time_between_train_steps": 0.005849123001098633, "step": 208} +{"info/global_step": 209, "train_info/time_within_train_step": 27.837926149368286, "step": 209} +{"train_info/time_between_train_steps": 0.005818843841552734, "step": 209} +{"info/global_step": 210, "train_info/time_within_train_step": 27.887402057647705, "step": 210} +{"train_info/time_between_train_steps": 0.005793571472167969, "step": 210} +{"info/global_step": 211, "train_info/time_within_train_step": 27.860662937164307, "step": 211} +{"train_info/time_between_train_steps": 0.007351398468017578, "step": 211} +{"info/global_step": 212, "train_info/time_within_train_step": 28.05919098854065, "step": 212} +{"train_info/time_between_train_steps": 0.005784749984741211, "step": 212} +{"info/global_step": 213, "train_info/time_within_train_step": 27.901840448379517, "step": 213} +{"train_info/time_between_train_steps": 0.006338834762573242, "step": 213} +{"info/global_step": 214, "train_info/time_within_train_step": 27.91187620162964, "step": 214} +{"train_info/time_between_train_steps": 0.007100820541381836, "step": 214} +{"info/global_step": 215, "train_info/time_within_train_step": 27.97497868537903, "step": 215} +{"train_info/time_between_train_steps": 0.006434917449951172, "step": 215} +{"info/global_step": 216, "train_info/time_within_train_step": 27.881762981414795, "step": 216} +{"train_info/time_between_train_steps": 0.0066089630126953125, "step": 216} +{"train_info/time_between_train_steps": 30.23724913597107, "step": 216} +{"info/global_step": 217, "train_info/time_within_train_step": 27.82992434501648, "step": 217} +{"train_info/time_between_train_steps": 0.007677555084228516, "step": 217} +{"info/global_step": 218, "train_info/time_within_train_step": 28.000102996826172, "step": 218} +{"train_info/time_between_train_steps": 0.0058557987213134766, "step": 218} +{"info/global_step": 219, "train_info/time_within_train_step": 27.888965368270874, "step": 219} +{"train_info/time_between_train_steps": 0.0074579715728759766, "step": 219} +{"info/global_step": 220, "train_info/time_within_train_step": 27.98069977760315, "step": 220} +{"train_info/time_between_train_steps": 0.0057947635650634766, "step": 220} +{"info/global_step": 221, "train_info/time_within_train_step": 27.815778493881226, "step": 221} +{"train_info/time_between_train_steps": 0.0055615901947021484, "step": 221} +{"info/global_step": 222, "train_info/time_within_train_step": 28.035627841949463, "step": 222} +{"train_info/time_between_train_steps": 0.01250600814819336, "step": 222} +{"info/global_step": 223, "train_info/time_within_train_step": 27.87561821937561, "step": 223} +{"train_info/time_between_train_steps": 0.005967140197753906, "step": 223} +{"info/global_step": 224, "train_info/time_within_train_step": 27.91272759437561, "step": 224} +{"train_info/time_between_train_steps": 0.009638309478759766, "step": 224} +{"info/global_step": 225, "train_info/time_within_train_step": 27.834030389785767, "step": 225} +{"train_info/time_between_train_steps": 0.005598783493041992, "step": 225} +{"info/global_step": 226, "train_info/time_within_train_step": 27.803261041641235, "step": 226} +{"train_info/time_between_train_steps": 0.005437374114990234, "step": 226} +{"info/global_step": 227, "train_info/time_within_train_step": 27.918625354766846, "step": 227} +{"train_info/time_between_train_steps": 0.0056324005126953125, "step": 227} +{"info/global_step": 228, "train_info/time_within_train_step": 27.8224093914032, "step": 228} +{"train_info/time_between_train_steps": 0.010432720184326172, "step": 228} +{"info/global_step": 229, "train_info/time_within_train_step": 27.818904161453247, "step": 229} +{"train_info/time_between_train_steps": 0.005414724349975586, "step": 229} +{"info/global_step": 230, "train_info/time_within_train_step": 27.7933988571167, "step": 230} +{"train_info/time_between_train_steps": 0.0055027008056640625, "step": 230} +{"info/global_step": 231, "train_info/time_within_train_step": 27.798839807510376, "step": 231} +{"train_info/time_between_train_steps": 0.00613093376159668, "step": 231} +{"info/global_step": 232, "train_info/time_within_train_step": 27.810659170150757, "step": 232} +{"train_info/time_between_train_steps": 0.00555729866027832, "step": 232} +{"info/global_step": 233, "train_info/time_within_train_step": 27.823841094970703, "step": 233} +{"train_info/time_between_train_steps": 0.0055637359619140625, "step": 233} +{"info/global_step": 234, "train_info/time_within_train_step": 27.87192988395691, "step": 234} +{"train_info/time_between_train_steps": 0.008113861083984375, "step": 234} +{"info/global_step": 235, "train_info/time_within_train_step": 27.960933446884155, "step": 235} +{"train_info/time_between_train_steps": 0.010746955871582031, "step": 235} +{"info/global_step": 236, "train_info/time_within_train_step": 27.94012188911438, "step": 236} +{"train_info/time_between_train_steps": 0.006575345993041992, "step": 236} +{"info/global_step": 237, "train_info/time_within_train_step": 27.901373624801636, "step": 237} +{"train_info/time_between_train_steps": 0.0056819915771484375, "step": 237} +{"info/global_step": 238, "train_info/time_within_train_step": 27.875269174575806, "step": 238} +{"train_info/time_between_train_steps": 0.005898714065551758, "step": 238} +{"info/global_step": 239, "train_info/time_within_train_step": 27.859103441238403, "step": 239} +{"train_info/time_between_train_steps": 0.005678415298461914, "step": 239} +{"info/global_step": 240, "train_info/time_within_train_step": 27.88172197341919, "step": 240} +{"train_info/time_between_train_steps": 0.006592273712158203, "step": 240} +{"info/global_step": 241, "train_info/time_within_train_step": 27.865704774856567, "step": 241} +{"train_info/time_between_train_steps": 0.005721569061279297, "step": 241} +{"info/global_step": 242, "train_info/time_within_train_step": 27.875682830810547, "step": 242} +{"train_info/time_between_train_steps": 0.006436586380004883, "step": 242} +{"info/global_step": 243, "train_info/time_within_train_step": 27.97995901107788, "step": 243} +{"train_info/time_between_train_steps": 0.006730794906616211, "step": 243} +{"train_info/time_between_train_steps": 30.451430320739746, "step": 243} +{"info/global_step": 244, "train_info/time_within_train_step": 27.814359188079834, "step": 244} +{"train_info/time_between_train_steps": 0.0054967403411865234, "step": 244} +{"info/global_step": 245, "train_info/time_within_train_step": 27.91022491455078, "step": 245} +{"train_info/time_between_train_steps": 0.00542140007019043, "step": 245} +{"info/global_step": 246, "train_info/time_within_train_step": 27.815227508544922, "step": 246} +{"train_info/time_between_train_steps": 0.007851600646972656, "step": 246} +{"info/global_step": 247, "train_info/time_within_train_step": 28.007827520370483, "step": 247} +{"train_info/time_between_train_steps": 0.005717039108276367, "step": 247} +{"info/global_step": 248, "train_info/time_within_train_step": 27.88615345954895, "step": 248} +{"train_info/time_between_train_steps": 0.010200262069702148, "step": 248} +{"info/global_step": 249, "train_info/time_within_train_step": 27.953322410583496, "step": 249} +{"train_info/time_between_train_steps": 0.0057621002197265625, "step": 249} +{"info/global_step": 250, "train_info/time_within_train_step": 27.853726863861084, "step": 250} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1740830481, "_runtime": 7287}, "step": 250} +{"logs": {"train/loss": 4.7624, "train/learning_rate": 0.0005277777777777777, "train/epoch": 9.01, "_timestamp": 1740830481, "_runtime": 7287}, "step": 250} +{"train_info/time_between_train_steps": 0.009849071502685547, "step": 250} +{"info/global_step": 251, "train_info/time_within_train_step": 27.940833568572998, "step": 251} +{"train_info/time_between_train_steps": 0.006918191909790039, "step": 251} +{"info/global_step": 252, "train_info/time_within_train_step": 27.883810997009277, "step": 252} +{"train_info/time_between_train_steps": 0.006491661071777344, "step": 252} +{"info/global_step": 253, "train_info/time_within_train_step": 27.885854244232178, "step": 253} +{"train_info/time_between_train_steps": 0.00794076919555664, "step": 253} +{"info/global_step": 254, "train_info/time_within_train_step": 27.86849355697632, "step": 254} +{"train_info/time_between_train_steps": 0.0056498050689697266, "step": 254} +{"info/global_step": 255, "train_info/time_within_train_step": 27.830705404281616, "step": 255} +{"train_info/time_between_train_steps": 0.0056688785552978516, "step": 255} +{"info/global_step": 256, "train_info/time_within_train_step": 27.81626796722412, "step": 256} +{"train_info/time_between_train_steps": 0.005598783493041992, "step": 256} +{"info/global_step": 257, "train_info/time_within_train_step": 27.960545778274536, "step": 257} +{"train_info/time_between_train_steps": 0.01052236557006836, "step": 257} +{"info/global_step": 258, "train_info/time_within_train_step": 27.82056736946106, "step": 258} +{"train_info/time_between_train_steps": 0.0058135986328125, "step": 258} +{"info/global_step": 259, "train_info/time_within_train_step": 27.84479808807373, "step": 259} +{"train_info/time_between_train_steps": 0.005980014801025391, "step": 259} +{"info/global_step": 260, "train_info/time_within_train_step": 27.88333749771118, "step": 260} +{"train_info/time_between_train_steps": 0.01007223129272461, "step": 260} +{"info/global_step": 261, "train_info/time_within_train_step": 27.858604669570923, "step": 261} +{"train_info/time_between_train_steps": 0.0057849884033203125, "step": 261} +{"info/global_step": 262, "train_info/time_within_train_step": 27.866812705993652, "step": 262} +{"train_info/time_between_train_steps": 0.005436897277832031, "step": 262} +{"info/global_step": 263, "train_info/time_within_train_step": 27.85835552215576, "step": 263} +{"train_info/time_between_train_steps": 0.0055522918701171875, "step": 263} +{"info/global_step": 264, "train_info/time_within_train_step": 27.849590301513672, "step": 264} +{"train_info/time_between_train_steps": 0.006013154983520508, "step": 264} +{"info/global_step": 265, "train_info/time_within_train_step": 27.851332426071167, "step": 265} +{"train_info/time_between_train_steps": 0.0059087276458740234, "step": 265} +{"info/global_step": 266, "train_info/time_within_train_step": 27.838671445846558, "step": 266} +{"train_info/time_between_train_steps": 0.005669593811035156, "step": 266} +{"info/global_step": 267, "train_info/time_within_train_step": 27.81215214729309, "step": 267} +{"train_info/time_between_train_steps": 0.008550405502319336, "step": 267} +{"info/global_step": 268, "train_info/time_within_train_step": 27.86396598815918, "step": 268} +{"train_info/time_between_train_steps": 0.005948543548583984, "step": 268} +{"info/global_step": 269, "train_info/time_within_train_step": 27.8576021194458, "step": 269} +{"train_info/time_between_train_steps": 0.006006002426147461, "step": 269} +{"info/global_step": 270, "train_info/time_within_train_step": 27.888773918151855, "step": 270} +{"train_info/time_between_train_steps": 0.006392717361450195, "step": 270} +{"train_info/time_between_train_steps": 30.03133487701416, "step": 270} +{"info/global_step": 271, "train_info/time_within_train_step": 27.863681077957153, "step": 271} +{"train_info/time_between_train_steps": 0.005201816558837891, "step": 271} +{"info/global_step": 272, "train_info/time_within_train_step": 27.9801025390625, "step": 272} +{"train_info/time_between_train_steps": 0.006368398666381836, "step": 272} +{"info/global_step": 273, "train_info/time_within_train_step": 27.821810245513916, "step": 273} +{"train_info/time_between_train_steps": 0.005240678787231445, "step": 273} +{"info/global_step": 274, "train_info/time_within_train_step": 27.936182737350464, "step": 274} +{"train_info/time_between_train_steps": 0.008188724517822266, "step": 274} +{"info/global_step": 275, "train_info/time_within_train_step": 27.877224922180176, "step": 275} +{"train_info/time_between_train_steps": 0.0059511661529541016, "step": 275} +{"info/global_step": 276, "train_info/time_within_train_step": 27.965786695480347, "step": 276} +{"train_info/time_between_train_steps": 0.005654335021972656, "step": 276} +{"info/global_step": 277, "train_info/time_within_train_step": 27.863364219665527, "step": 277} +{"train_info/time_between_train_steps": 0.005667448043823242, "step": 277} +{"info/global_step": 278, "train_info/time_within_train_step": 27.906915187835693, "step": 278} +{"train_info/time_between_train_steps": 0.010577917098999023, "step": 278} +{"info/global_step": 279, "train_info/time_within_train_step": 27.87593698501587, "step": 279} +{"train_info/time_between_train_steps": 0.006474018096923828, "step": 279} +{"info/global_step": 280, "train_info/time_within_train_step": 27.84889578819275, "step": 280} +{"train_info/time_between_train_steps": 0.00551605224609375, "step": 280} +{"info/global_step": 281, "train_info/time_within_train_step": 28.444270849227905, "step": 281} +{"train_info/time_between_train_steps": 0.00549626350402832, "step": 281} +{"info/global_step": 282, "train_info/time_within_train_step": 27.831202030181885, "step": 282} +{"train_info/time_between_train_steps": 0.005551815032958984, "step": 282} +{"info/global_step": 283, "train_info/time_within_train_step": 27.859955310821533, "step": 283} +{"train_info/time_between_train_steps": 0.005890607833862305, "step": 283} +{"info/global_step": 284, "train_info/time_within_train_step": 27.807185888290405, "step": 284} +{"train_info/time_between_train_steps": 0.006435871124267578, "step": 284} +{"info/global_step": 285, "train_info/time_within_train_step": 27.78674840927124, "step": 285} +{"train_info/time_between_train_steps": 0.005597829818725586, "step": 285} +{"info/global_step": 286, "train_info/time_within_train_step": 27.819979429244995, "step": 286} +{"train_info/time_between_train_steps": 0.0055887699127197266, "step": 286} +{"info/global_step": 287, "train_info/time_within_train_step": 27.787051916122437, "step": 287} +{"train_info/time_between_train_steps": 0.0053713321685791016, "step": 287} +{"info/global_step": 288, "train_info/time_within_train_step": 27.89934277534485, "step": 288} +{"train_info/time_between_train_steps": 0.0059778690338134766, "step": 288} +{"info/global_step": 289, "train_info/time_within_train_step": 27.794929265975952, "step": 289} +{"train_info/time_between_train_steps": 0.005518436431884766, "step": 289} +{"info/global_step": 290, "train_info/time_within_train_step": 27.832767724990845, "step": 290} +{"train_info/time_between_train_steps": 0.005460500717163086, "step": 290} +{"info/global_step": 291, "train_info/time_within_train_step": 27.809151887893677, "step": 291} +{"train_info/time_between_train_steps": 0.005501985549926758, "step": 291} +{"info/global_step": 292, "train_info/time_within_train_step": 27.823378324508667, "step": 292} +{"train_info/time_between_train_steps": 0.005417823791503906, "step": 292} +{"info/global_step": 293, "train_info/time_within_train_step": 27.829708337783813, "step": 293} +{"train_info/time_between_train_steps": 0.011628389358520508, "step": 293} +{"info/global_step": 294, "train_info/time_within_train_step": 27.89794158935547, "step": 294} +{"train_info/time_between_train_steps": 0.010825872421264648, "step": 294} +{"info/global_step": 295, "train_info/time_within_train_step": 27.830968618392944, "step": 295} +{"train_info/time_between_train_steps": 0.005656242370605469, "step": 295} +{"info/global_step": 296, "train_info/time_within_train_step": 27.894269466400146, "step": 296} +{"train_info/time_between_train_steps": 0.010895490646362305, "step": 296} +{"info/global_step": 297, "train_info/time_within_train_step": 27.881417274475098, "step": 297} +{"train_info/time_between_train_steps": 0.009895086288452148, "step": 297} +{"train_info/time_between_train_steps": 30.67125415802002, "step": 297} +{"info/global_step": 298, "train_info/time_within_train_step": 27.81615710258484, "step": 298} +{"train_info/time_between_train_steps": 0.005588531494140625, "step": 298} +{"info/global_step": 299, "train_info/time_within_train_step": 28.025782346725464, "step": 299} +{"train_info/time_between_train_steps": 0.00609898567199707, "step": 299} +{"info/global_step": 300, "train_info/time_within_train_step": 27.801010608673096, "step": 300} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1740831936, "_runtime": 8742}, "step": 300} +{"logs": {"train/loss": 4.5953, "train/learning_rate": 0.0005, "train/epoch": 11.0, "_timestamp": 1740831936, "_runtime": 8742}, "step": 300} +{"train_info/time_between_train_steps": 13.431044816970825, "step": 300} +{"info/global_step": 301, "train_info/time_within_train_step": 28.183988571166992, "step": 301} +{"train_info/time_between_train_steps": 0.0070323944091796875, "step": 301} +{"info/global_step": 302, "train_info/time_within_train_step": 27.98400616645813, "step": 302} +{"train_info/time_between_train_steps": 0.005719423294067383, "step": 302} +{"info/global_step": 303, "train_info/time_within_train_step": 28.007476329803467, "step": 303} +{"train_info/time_between_train_steps": 0.0060291290283203125, "step": 303} +{"info/global_step": 304, "train_info/time_within_train_step": 27.8350567817688, "step": 304} +{"train_info/time_between_train_steps": 0.0056574344635009766, "step": 304} +{"info/global_step": 305, "train_info/time_within_train_step": 27.889195442199707, "step": 305} +{"train_info/time_between_train_steps": 0.005554676055908203, "step": 305} +{"info/global_step": 306, "train_info/time_within_train_step": 27.874332904815674, "step": 306} +{"train_info/time_between_train_steps": 0.005644559860229492, "step": 306} +{"info/global_step": 307, "train_info/time_within_train_step": 27.864688873291016, "step": 307} +{"train_info/time_between_train_steps": 0.0055828094482421875, "step": 307} +{"info/global_step": 308, "train_info/time_within_train_step": 27.873628854751587, "step": 308} +{"train_info/time_between_train_steps": 0.0055425167083740234, "step": 308} +{"info/global_step": 309, "train_info/time_within_train_step": 27.839917421340942, "step": 309} +{"train_info/time_between_train_steps": 0.00998997688293457, "step": 309} +{"info/global_step": 310, "train_info/time_within_train_step": 27.87177324295044, "step": 310} +{"train_info/time_between_train_steps": 0.005629062652587891, "step": 310} +{"info/global_step": 311, "train_info/time_within_train_step": 27.870466470718384, "step": 311} +{"train_info/time_between_train_steps": 0.005845069885253906, "step": 311} +{"info/global_step": 312, "train_info/time_within_train_step": 27.89648461341858, "step": 312} +{"train_info/time_between_train_steps": 0.005659580230712891, "step": 312} +{"info/global_step": 313, "train_info/time_within_train_step": 27.877453565597534, "step": 313} +{"train_info/time_between_train_steps": 0.008096933364868164, "step": 313} +{"info/global_step": 314, "train_info/time_within_train_step": 27.885167837142944, "step": 314} +{"train_info/time_between_train_steps": 0.00555729866027832, "step": 314} +{"info/global_step": 315, "train_info/time_within_train_step": 27.862312078475952, "step": 315} +{"train_info/time_between_train_steps": 0.0057642459869384766, "step": 315} +{"info/global_step": 316, "train_info/time_within_train_step": 27.865069150924683, "step": 316} +{"train_info/time_between_train_steps": 0.005644798278808594, "step": 316} +{"info/global_step": 317, "train_info/time_within_train_step": 27.85721755027771, "step": 317} +{"train_info/time_between_train_steps": 0.005342960357666016, "step": 317} +{"info/global_step": 318, "train_info/time_within_train_step": 27.974331855773926, "step": 318} +{"train_info/time_between_train_steps": 0.005380868911743164, "step": 318} +{"info/global_step": 319, "train_info/time_within_train_step": 27.862208127975464, "step": 319} +{"train_info/time_between_train_steps": 0.005758523941040039, "step": 319} +{"info/global_step": 320, "train_info/time_within_train_step": 27.867955207824707, "step": 320} +{"train_info/time_between_train_steps": 0.005800724029541016, "step": 320} +{"info/global_step": 321, "train_info/time_within_train_step": 27.923182249069214, "step": 321} +{"train_info/time_between_train_steps": 0.011632442474365234, "step": 321} +{"info/global_step": 322, "train_info/time_within_train_step": 27.882627248764038, "step": 322} +{"train_info/time_between_train_steps": 0.007666349411010742, "step": 322} +{"info/global_step": 323, "train_info/time_within_train_step": 27.90539789199829, "step": 323} +{"train_info/time_between_train_steps": 0.006061553955078125, "step": 323} +{"info/global_step": 324, "train_info/time_within_train_step": 27.89738368988037, "step": 324} +{"train_info/time_between_train_steps": 0.009799480438232422, "step": 324} +{"train_info/time_between_train_steps": 30.326019525527954, "step": 324} +{"info/global_step": 325, "train_info/time_within_train_step": 27.83491611480713, "step": 325} +{"train_info/time_between_train_steps": 0.007859468460083008, "step": 325} +{"info/global_step": 326, "train_info/time_within_train_step": 27.913843154907227, "step": 326} +{"train_info/time_between_train_steps": 0.005438566207885742, "step": 326} +{"info/global_step": 327, "train_info/time_within_train_step": 27.818984746932983, "step": 327} +{"train_info/time_between_train_steps": 0.005705356597900391, "step": 327} +{"info/global_step": 328, "train_info/time_within_train_step": 28.071890592575073, "step": 328} +{"train_info/time_between_train_steps": 0.005586385726928711, "step": 328} +{"info/global_step": 329, "train_info/time_within_train_step": 27.81501603126526, "step": 329} +{"train_info/time_between_train_steps": 0.0054476261138916016, "step": 329} +{"info/global_step": 330, "train_info/time_within_train_step": 27.918297290802002, "step": 330} +{"train_info/time_between_train_steps": 0.005552768707275391, "step": 330} +{"info/global_step": 331, "train_info/time_within_train_step": 27.795741319656372, "step": 331} +{"train_info/time_between_train_steps": 0.007960796356201172, "step": 331} +{"info/global_step": 332, "train_info/time_within_train_step": 27.935148239135742, "step": 332} +{"train_info/time_between_train_steps": 0.005756378173828125, "step": 332} +{"info/global_step": 333, "train_info/time_within_train_step": 27.990936040878296, "step": 333} +{"train_info/time_between_train_steps": 0.005524158477783203, "step": 333} +{"info/global_step": 334, "train_info/time_within_train_step": 27.85382914543152, "step": 334} +{"train_info/time_between_train_steps": 0.011969804763793945, "step": 334} +{"info/global_step": 335, "train_info/time_within_train_step": 27.834710121154785, "step": 335} +{"train_info/time_between_train_steps": 0.009464502334594727, "step": 335} +{"info/global_step": 336, "train_info/time_within_train_step": 27.86616039276123, "step": 336} +{"train_info/time_between_train_steps": 0.005398750305175781, "step": 336} +{"info/global_step": 337, "train_info/time_within_train_step": 27.868229627609253, "step": 337} +{"train_info/time_between_train_steps": 0.010651350021362305, "step": 337} +{"info/global_step": 338, "train_info/time_within_train_step": 27.86155605316162, "step": 338} +{"train_info/time_between_train_steps": 0.005595684051513672, "step": 338} +{"info/global_step": 339, "train_info/time_within_train_step": 27.86681866645813, "step": 339} +{"train_info/time_between_train_steps": 0.005804538726806641, "step": 339} +{"info/global_step": 340, "train_info/time_within_train_step": 27.865009784698486, "step": 340} +{"train_info/time_between_train_steps": 0.005527019500732422, "step": 340} +{"info/global_step": 341, "train_info/time_within_train_step": 27.828630924224854, "step": 341} +{"train_info/time_between_train_steps": 0.005825042724609375, "step": 341} +{"info/global_step": 342, "train_info/time_within_train_step": 27.865234851837158, "step": 342} +{"train_info/time_between_train_steps": 0.006000995635986328, "step": 342} +{"info/global_step": 343, "train_info/time_within_train_step": 27.838578939437866, "step": 343} +{"train_info/time_between_train_steps": 0.005460977554321289, "step": 343} +{"info/global_step": 344, "train_info/time_within_train_step": 27.792906284332275, "step": 344} +{"train_info/time_between_train_steps": 0.00571751594543457, "step": 344} +{"info/global_step": 345, "train_info/time_within_train_step": 27.865906953811646, "step": 345} +{"train_info/time_between_train_steps": 0.007970094680786133, "step": 345} +{"info/global_step": 346, "train_info/time_within_train_step": 27.87007212638855, "step": 346} +{"train_info/time_between_train_steps": 0.0054891109466552734, "step": 346} +{"info/global_step": 347, "train_info/time_within_train_step": 27.81438112258911, "step": 347} +{"train_info/time_between_train_steps": 0.0054399967193603516, "step": 347} +{"info/global_step": 348, "train_info/time_within_train_step": 27.937549829483032, "step": 348} +{"train_info/time_between_train_steps": 0.005755901336669922, "step": 348} +{"info/global_step": 349, "train_info/time_within_train_step": 27.88797402381897, "step": 349} +{"train_info/time_between_train_steps": 0.005613088607788086, "step": 349} +{"info/global_step": 350, "train_info/time_within_train_step": 27.994195699691772, "step": 350} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1740833375, "_runtime": 10181}, "step": 350} +{"logs": {"train/loss": 4.3139, "train/learning_rate": 0.00047222222222222224, "train/epoch": 12.02, "_timestamp": 1740833375, "_runtime": 10181}, "step": 350} +{"train_info/time_between_train_steps": 0.012214422225952148, "step": 350} +{"info/global_step": 351, "train_info/time_within_train_step": 27.979339838027954, "step": 351} +{"train_info/time_between_train_steps": 0.006680965423583984, "step": 351} +{"train_info/time_between_train_steps": 30.358925819396973, "step": 351} +{"info/global_step": 352, "train_info/time_within_train_step": 27.824191570281982, "step": 352} +{"train_info/time_between_train_steps": 0.005384683609008789, "step": 352} +{"info/global_step": 353, "train_info/time_within_train_step": 28.020115852355957, "step": 353} +{"train_info/time_between_train_steps": 0.0059871673583984375, "step": 353} +{"info/global_step": 354, "train_info/time_within_train_step": 27.82621717453003, "step": 354} +{"train_info/time_between_train_steps": 0.005692243576049805, "step": 354} +{"info/global_step": 355, "train_info/time_within_train_step": 27.96956491470337, "step": 355} +{"train_info/time_between_train_steps": 0.010503768920898438, "step": 355} +{"info/global_step": 356, "train_info/time_within_train_step": 27.861464262008667, "step": 356} +{"train_info/time_between_train_steps": 0.0067005157470703125, "step": 356} +{"info/global_step": 357, "train_info/time_within_train_step": 27.987279176712036, "step": 357} +{"train_info/time_between_train_steps": 0.010440349578857422, "step": 357} +{"info/global_step": 358, "train_info/time_within_train_step": 27.886709213256836, "step": 358} +{"train_info/time_between_train_steps": 0.005902290344238281, "step": 358} +{"info/global_step": 359, "train_info/time_within_train_step": 27.96164035797119, "step": 359} +{"train_info/time_between_train_steps": 0.005391120910644531, "step": 359} +{"info/global_step": 360, "train_info/time_within_train_step": 27.91954755783081, "step": 360} +{"train_info/time_between_train_steps": 0.00561213493347168, "step": 360} +{"info/global_step": 361, "train_info/time_within_train_step": 27.83403491973877, "step": 361} +{"train_info/time_between_train_steps": 0.005863666534423828, "step": 361} +{"info/global_step": 362, "train_info/time_within_train_step": 27.841620922088623, "step": 362} +{"train_info/time_between_train_steps": 0.0057315826416015625, "step": 362} +{"info/global_step": 363, "train_info/time_within_train_step": 27.98875665664673, "step": 363} +{"train_info/time_between_train_steps": 0.008816719055175781, "step": 363} +{"info/global_step": 364, "train_info/time_within_train_step": 27.8582444190979, "step": 364} +{"train_info/time_between_train_steps": 0.00546574592590332, "step": 364} +{"info/global_step": 365, "train_info/time_within_train_step": 27.878384113311768, "step": 365} +{"train_info/time_between_train_steps": 0.010804176330566406, "step": 365} +{"info/global_step": 366, "train_info/time_within_train_step": 27.866106510162354, "step": 366} +{"train_info/time_between_train_steps": 0.005574703216552734, "step": 366} +{"info/global_step": 367, "train_info/time_within_train_step": 27.8619122505188, "step": 367} +{"train_info/time_between_train_steps": 0.0054509639739990234, "step": 367} +{"info/global_step": 368, "train_info/time_within_train_step": 27.821506023406982, "step": 368} +{"train_info/time_between_train_steps": 0.005709171295166016, "step": 368} +{"info/global_step": 369, "train_info/time_within_train_step": 27.833512544631958, "step": 369} +{"train_info/time_between_train_steps": 0.005522489547729492, "step": 369} +{"info/global_step": 370, "train_info/time_within_train_step": 27.849364519119263, "step": 370} +{"train_info/time_between_train_steps": 0.005628108978271484, "step": 370} +{"info/global_step": 371, "train_info/time_within_train_step": 27.889708757400513, "step": 371} +{"train_info/time_between_train_steps": 0.005568742752075195, "step": 371} +{"info/global_step": 372, "train_info/time_within_train_step": 27.881330728530884, "step": 372} +{"train_info/time_between_train_steps": 0.005843162536621094, "step": 372} +{"info/global_step": 373, "train_info/time_within_train_step": 27.85663342475891, "step": 373} +{"train_info/time_between_train_steps": 0.0056455135345458984, "step": 373} +{"info/global_step": 374, "train_info/time_within_train_step": 27.85307812690735, "step": 374} +{"train_info/time_between_train_steps": 0.005515336990356445, "step": 374} +{"info/global_step": 375, "train_info/time_within_train_step": 27.857134103775024, "step": 375} +{"train_info/time_between_train_steps": 0.005803108215332031, "step": 375} +{"info/global_step": 376, "train_info/time_within_train_step": 27.854702711105347, "step": 376} +{"train_info/time_between_train_steps": 0.005712032318115234, "step": 376} +{"info/global_step": 377, "train_info/time_within_train_step": 27.83741784095764, "step": 377} +{"train_info/time_between_train_steps": 0.011057138442993164, "step": 377} +{"info/global_step": 378, "train_info/time_within_train_step": 27.864508390426636, "step": 378} +{"train_info/time_between_train_steps": 0.0065686702728271484, "step": 378} +{"train_info/time_between_train_steps": 30.160752296447754, "step": 378} +{"info/global_step": 379, "train_info/time_within_train_step": 27.80960178375244, "step": 379} +{"train_info/time_between_train_steps": 0.00540471076965332, "step": 379} +{"info/global_step": 380, "train_info/time_within_train_step": 27.983996868133545, "step": 380} +{"train_info/time_between_train_steps": 0.006939888000488281, "step": 380} +{"info/global_step": 381, "train_info/time_within_train_step": 27.827547788619995, "step": 381} +{"train_info/time_between_train_steps": 0.006138801574707031, "step": 381} +{"info/global_step": 382, "train_info/time_within_train_step": 28.02044367790222, "step": 382} +{"train_info/time_between_train_steps": 0.006123065948486328, "step": 382} +{"info/global_step": 383, "train_info/time_within_train_step": 27.899614095687866, "step": 383} +{"train_info/time_between_train_steps": 0.005707979202270508, "step": 383} +{"info/global_step": 384, "train_info/time_within_train_step": 28.043463468551636, "step": 384} +{"train_info/time_between_train_steps": 0.005612850189208984, "step": 384} +{"info/global_step": 385, "train_info/time_within_train_step": 27.84029197692871, "step": 385} +{"train_info/time_between_train_steps": 0.006232500076293945, "step": 385} +{"info/global_step": 386, "train_info/time_within_train_step": 27.91986608505249, "step": 386} +{"train_info/time_between_train_steps": 0.005757570266723633, "step": 386} +{"info/global_step": 387, "train_info/time_within_train_step": 27.86759305000305, "step": 387} +{"train_info/time_between_train_steps": 0.010422945022583008, "step": 387} +{"info/global_step": 388, "train_info/time_within_train_step": 27.792836904525757, "step": 388} +{"train_info/time_between_train_steps": 0.0056378841400146484, "step": 388} +{"info/global_step": 389, "train_info/time_within_train_step": 27.81405997276306, "step": 389} +{"train_info/time_between_train_steps": 0.005906820297241211, "step": 389} +{"info/global_step": 390, "train_info/time_within_train_step": 27.871445894241333, "step": 390} +{"train_info/time_between_train_steps": 0.00557398796081543, "step": 390} +{"info/global_step": 391, "train_info/time_within_train_step": 27.87499189376831, "step": 391} +{"train_info/time_between_train_steps": 0.005588531494140625, "step": 391} +{"info/global_step": 392, "train_info/time_within_train_step": 27.866706132888794, "step": 392} +{"train_info/time_between_train_steps": 0.008424043655395508, "step": 392} +{"info/global_step": 393, "train_info/time_within_train_step": 27.923279762268066, "step": 393} +{"train_info/time_between_train_steps": 0.0057833194732666016, "step": 393} +{"info/global_step": 394, "train_info/time_within_train_step": 27.860687017440796, "step": 394} +{"train_info/time_between_train_steps": 0.00558161735534668, "step": 394} +{"info/global_step": 395, "train_info/time_within_train_step": 27.82089591026306, "step": 395} +{"train_info/time_between_train_steps": 0.005670070648193359, "step": 395} +{"info/global_step": 396, "train_info/time_within_train_step": 27.862597703933716, "step": 396} +{"train_info/time_between_train_steps": 0.0056612491607666016, "step": 396} +{"info/global_step": 397, "train_info/time_within_train_step": 27.859788417816162, "step": 397} +{"train_info/time_between_train_steps": 0.00584721565246582, "step": 397} +{"info/global_step": 398, "train_info/time_within_train_step": 27.866466522216797, "step": 398} +{"train_info/time_between_train_steps": 0.005438327789306641, "step": 398} +{"info/global_step": 399, "train_info/time_within_train_step": 27.91116762161255, "step": 399} +{"train_info/time_between_train_steps": 0.008869409561157227, "step": 399} +{"info/global_step": 400, "train_info/time_within_train_step": 27.856075286865234, "step": 400} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1740834830, "_runtime": 11636}, "step": 400} +{"logs": {"train/loss": 4.2007, "train/learning_rate": 0.00044444444444444436, "train/epoch": 14.02, "_timestamp": 1740834830, "_runtime": 11636}, "step": 400} +{"train_info/time_between_train_steps": 13.364769697189331, "step": 400} +{"info/global_step": 401, "train_info/time_within_train_step": 27.93506169319153, "step": 401} +{"train_info/time_between_train_steps": 0.008486509323120117, "step": 401} +{"info/global_step": 402, "train_info/time_within_train_step": 27.875202894210815, "step": 402} +{"train_info/time_between_train_steps": 0.010068178176879883, "step": 402} +{"info/global_step": 403, "train_info/time_within_train_step": 27.864012956619263, "step": 403} +{"train_info/time_between_train_steps": 0.005923032760620117, "step": 403} +{"info/global_step": 404, "train_info/time_within_train_step": 27.89768147468567, "step": 404} +{"train_info/time_between_train_steps": 0.006226778030395508, "step": 404} +{"info/global_step": 405, "train_info/time_within_train_step": 27.930134534835815, "step": 405} +{"train_info/time_between_train_steps": 0.006249189376831055, "step": 405} +{"train_info/time_between_train_steps": 30.22260808944702, "step": 405} +{"info/global_step": 406, "train_info/time_within_train_step": 27.87432026863098, "step": 406} +{"train_info/time_between_train_steps": 0.006037473678588867, "step": 406} +{"info/global_step": 407, "train_info/time_within_train_step": 27.940497398376465, "step": 407} +{"train_info/time_between_train_steps": 0.005751132965087891, "step": 407} +{"info/global_step": 408, "train_info/time_within_train_step": 27.98459768295288, "step": 408} +{"train_info/time_between_train_steps": 0.007693052291870117, "step": 408} +{"info/global_step": 409, "train_info/time_within_train_step": 27.93605875968933, "step": 409} +{"train_info/time_between_train_steps": 0.009261369705200195, "step": 409} +{"info/global_step": 410, "train_info/time_within_train_step": 27.81739830970764, "step": 410} +{"train_info/time_between_train_steps": 0.0074710845947265625, "step": 410} +{"info/global_step": 411, "train_info/time_within_train_step": 28.040581464767456, "step": 411} +{"train_info/time_between_train_steps": 0.010359525680541992, "step": 411} +{"info/global_step": 412, "train_info/time_within_train_step": 27.88280963897705, "step": 412} +{"train_info/time_between_train_steps": 0.005898714065551758, "step": 412} +{"info/global_step": 413, "train_info/time_within_train_step": 27.92323899269104, "step": 413} +{"train_info/time_between_train_steps": 0.0058116912841796875, "step": 413} +{"info/global_step": 414, "train_info/time_within_train_step": 27.868455171585083, "step": 414} +{"train_info/time_between_train_steps": 0.009870052337646484, "step": 414} +{"info/global_step": 415, "train_info/time_within_train_step": 27.835580348968506, "step": 415} +{"train_info/time_between_train_steps": 0.005522727966308594, "step": 415} +{"info/global_step": 416, "train_info/time_within_train_step": 27.79317331314087, "step": 416} +{"train_info/time_between_train_steps": 0.005731105804443359, "step": 416} +{"info/global_step": 417, "train_info/time_within_train_step": 27.793151378631592, "step": 417} +{"train_info/time_between_train_steps": 0.005482196807861328, "step": 417} +{"info/global_step": 418, "train_info/time_within_train_step": 27.806245803833008, "step": 418} +{"train_info/time_between_train_steps": 0.005552768707275391, "step": 418} +{"info/global_step": 419, "train_info/time_within_train_step": 27.87759828567505, "step": 419} +{"train_info/time_between_train_steps": 0.0060079097747802734, "step": 419} +{"info/global_step": 420, "train_info/time_within_train_step": 27.87320828437805, "step": 420} +{"train_info/time_between_train_steps": 0.010503530502319336, "step": 420} +{"info/global_step": 421, "train_info/time_within_train_step": 27.88043713569641, "step": 421} +{"train_info/time_between_train_steps": 0.005841970443725586, "step": 421} +{"info/global_step": 422, "train_info/time_within_train_step": 27.846530199050903, "step": 422} +{"train_info/time_between_train_steps": 0.010596275329589844, "step": 422} +{"info/global_step": 423, "train_info/time_within_train_step": 27.96095108985901, "step": 423} +{"train_info/time_between_train_steps": 0.005775928497314453, "step": 423} +{"info/global_step": 424, "train_info/time_within_train_step": 27.868441104888916, "step": 424} +{"train_info/time_between_train_steps": 0.0058476924896240234, "step": 424} +{"info/global_step": 425, "train_info/time_within_train_step": 27.87687373161316, "step": 425} +{"train_info/time_between_train_steps": 0.0058422088623046875, "step": 425} +{"info/global_step": 426, "train_info/time_within_train_step": 27.88424563407898, "step": 426} +{"train_info/time_between_train_steps": 0.005568504333496094, "step": 426} +{"info/global_step": 427, "train_info/time_within_train_step": 27.89107370376587, "step": 427} +{"train_info/time_between_train_steps": 0.010552167892456055, "step": 427} +{"info/global_step": 428, "train_info/time_within_train_step": 27.81200933456421, "step": 428} +{"train_info/time_between_train_steps": 0.005734682083129883, "step": 428} +{"info/global_step": 429, "train_info/time_within_train_step": 27.85061764717102, "step": 429} +{"train_info/time_between_train_steps": 0.006000041961669922, "step": 429} +{"info/global_step": 430, "train_info/time_within_train_step": 27.87765073776245, "step": 430} +{"train_info/time_between_train_steps": 0.00913691520690918, "step": 430} +{"info/global_step": 431, "train_info/time_within_train_step": 27.867655038833618, "step": 431} +{"train_info/time_between_train_steps": 0.006556987762451172, "step": 431} +{"info/global_step": 432, "train_info/time_within_train_step": 27.839937210083008, "step": 432} +{"train_info/time_between_train_steps": 0.006502389907836914, "step": 432} +{"train_info/time_between_train_steps": 30.876237869262695, "step": 432} +{"info/global_step": 433, "train_info/time_within_train_step": 27.891247272491455, "step": 433} +{"train_info/time_between_train_steps": 0.008236169815063477, "step": 433} +{"info/global_step": 434, "train_info/time_within_train_step": 27.98596954345703, "step": 434} +{"train_info/time_between_train_steps": 0.0056514739990234375, "step": 434} +{"info/global_step": 435, "train_info/time_within_train_step": 27.887510776519775, "step": 435} +{"train_info/time_between_train_steps": 0.009260892868041992, "step": 435} +{"info/global_step": 436, "train_info/time_within_train_step": 27.99273180961609, "step": 436} +{"train_info/time_between_train_steps": 0.00634002685546875, "step": 436} +{"info/global_step": 437, "train_info/time_within_train_step": 27.88111901283264, "step": 437} +{"train_info/time_between_train_steps": 0.00564885139465332, "step": 437} +{"info/global_step": 438, "train_info/time_within_train_step": 28.3105411529541, "step": 438} +{"train_info/time_between_train_steps": 0.005952596664428711, "step": 438} +{"info/global_step": 439, "train_info/time_within_train_step": 27.928963661193848, "step": 439} +{"train_info/time_between_train_steps": 0.010592222213745117, "step": 439} +{"info/global_step": 440, "train_info/time_within_train_step": 28.045235872268677, "step": 440} +{"train_info/time_between_train_steps": 0.006417751312255859, "step": 440} +{"info/global_step": 441, "train_info/time_within_train_step": 27.94930672645569, "step": 441} +{"train_info/time_between_train_steps": 0.007544755935668945, "step": 441} +{"info/global_step": 442, "train_info/time_within_train_step": 27.97111701965332, "step": 442} +{"train_info/time_between_train_steps": 0.00743556022644043, "step": 442} +{"info/global_step": 443, "train_info/time_within_train_step": 27.909705638885498, "step": 443} +{"train_info/time_between_train_steps": 0.005495309829711914, "step": 443} +{"info/global_step": 444, "train_info/time_within_train_step": 27.848289728164673, "step": 444} +{"train_info/time_between_train_steps": 0.008292913436889648, "step": 444} +{"info/global_step": 445, "train_info/time_within_train_step": 27.90094780921936, "step": 445} +{"train_info/time_between_train_steps": 0.005503177642822266, "step": 445} +{"info/global_step": 446, "train_info/time_within_train_step": 27.852866888046265, "step": 446} +{"train_info/time_between_train_steps": 0.005725860595703125, "step": 446} +{"info/global_step": 447, "train_info/time_within_train_step": 27.879093647003174, "step": 447} +{"train_info/time_between_train_steps": 0.005917787551879883, "step": 447} +{"info/global_step": 448, "train_info/time_within_train_step": 27.89617371559143, "step": 448} +{"train_info/time_between_train_steps": 0.0058367252349853516, "step": 448} +{"info/global_step": 449, "train_info/time_within_train_step": 27.940409898757935, "step": 449} +{"train_info/time_between_train_steps": 0.005570173263549805, "step": 449} +{"info/global_step": 450, "train_info/time_within_train_step": 27.833435535430908, "step": 450} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1740836300, "_runtime": 13106}, "step": 450} +{"logs": {"train/loss": 4.0441, "train/learning_rate": 0.00041666666666666664, "train/epoch": 16.02, "_timestamp": 1740836300, "_runtime": 13106}, "step": 450} +{"train_info/time_between_train_steps": 0.011562347412109375, "step": 450} +{"info/global_step": 451, "train_info/time_within_train_step": 27.879051685333252, "step": 451} +{"train_info/time_between_train_steps": 0.0057032108306884766, "step": 451} +{"info/global_step": 452, "train_info/time_within_train_step": 27.84365940093994, "step": 452} +{"train_info/time_between_train_steps": 0.005711555480957031, "step": 452} +{"info/global_step": 453, "train_info/time_within_train_step": 27.988147974014282, "step": 453} +{"train_info/time_between_train_steps": 0.01271820068359375, "step": 453} +{"info/global_step": 454, "train_info/time_within_train_step": 28.028918266296387, "step": 454} +{"train_info/time_between_train_steps": 0.007012605667114258, "step": 454} +{"info/global_step": 455, "train_info/time_within_train_step": 27.916759490966797, "step": 455} +{"train_info/time_between_train_steps": 0.0060274600982666016, "step": 455} +{"info/global_step": 456, "train_info/time_within_train_step": 27.89230513572693, "step": 456} +{"train_info/time_between_train_steps": 0.006140470504760742, "step": 456} +{"info/global_step": 457, "train_info/time_within_train_step": 27.9235520362854, "step": 457} +{"train_info/time_between_train_steps": 0.011152982711791992, "step": 457} +{"info/global_step": 458, "train_info/time_within_train_step": 27.885788440704346, "step": 458} +{"train_info/time_between_train_steps": 0.0060694217681884766, "step": 458} +{"info/global_step": 459, "train_info/time_within_train_step": 27.902823448181152, "step": 459} +{"train_info/time_between_train_steps": 0.0064699649810791016, "step": 459} +{"train_info/time_between_train_steps": 30.146681785583496, "step": 459} +{"info/global_step": 460, "train_info/time_within_train_step": 27.868287086486816, "step": 460} +{"train_info/time_between_train_steps": 0.00682377815246582, "step": 460} +{"info/global_step": 461, "train_info/time_within_train_step": 27.9750497341156, "step": 461} +{"train_info/time_between_train_steps": 0.012323617935180664, "step": 461} +{"info/global_step": 462, "train_info/time_within_train_step": 27.87037682533264, "step": 462} +{"train_info/time_between_train_steps": 0.005789995193481445, "step": 462} +{"info/global_step": 463, "train_info/time_within_train_step": 28.071197509765625, "step": 463} +{"train_info/time_between_train_steps": 0.005922794342041016, "step": 463} +{"info/global_step": 464, "train_info/time_within_train_step": 27.814095735549927, "step": 464} +{"train_info/time_between_train_steps": 0.0069427490234375, "step": 464} +{"info/global_step": 465, "train_info/time_within_train_step": 28.026614904403687, "step": 465} +{"train_info/time_between_train_steps": 0.008072853088378906, "step": 465} +{"info/global_step": 466, "train_info/time_within_train_step": 27.882356643676758, "step": 466} +{"train_info/time_between_train_steps": 0.006090879440307617, "step": 466} +{"info/global_step": 467, "train_info/time_within_train_step": 27.875301361083984, "step": 467} +{"train_info/time_between_train_steps": 0.0055370330810546875, "step": 467} +{"info/global_step": 468, "train_info/time_within_train_step": 27.98504638671875, "step": 468} +{"train_info/time_between_train_steps": 0.009237051010131836, "step": 468} +{"info/global_step": 469, "train_info/time_within_train_step": 27.820528745651245, "step": 469} +{"train_info/time_between_train_steps": 0.005661487579345703, "step": 469} +{"info/global_step": 470, "train_info/time_within_train_step": 27.792689085006714, "step": 470} +{"train_info/time_between_train_steps": 0.005466461181640625, "step": 470} +{"info/global_step": 471, "train_info/time_within_train_step": 27.847057342529297, "step": 471} +{"train_info/time_between_train_steps": 0.006001710891723633, "step": 471} +{"info/global_step": 472, "train_info/time_within_train_step": 27.83088994026184, "step": 472} +{"train_info/time_between_train_steps": 0.010527610778808594, "step": 472} +{"info/global_step": 473, "train_info/time_within_train_step": 27.864502668380737, "step": 473} +{"train_info/time_between_train_steps": 0.005570888519287109, "step": 473} +{"info/global_step": 474, "train_info/time_within_train_step": 27.855641841888428, "step": 474} +{"train_info/time_between_train_steps": 0.010488510131835938, "step": 474} +{"info/global_step": 475, "train_info/time_within_train_step": 27.851510286331177, "step": 475} +{"train_info/time_between_train_steps": 0.005581855773925781, "step": 475} +{"info/global_step": 476, "train_info/time_within_train_step": 27.833488702774048, "step": 476} +{"train_info/time_between_train_steps": 0.0057833194732666016, "step": 476} +{"info/global_step": 477, "train_info/time_within_train_step": 27.87452268600464, "step": 477} +{"train_info/time_between_train_steps": 0.005600690841674805, "step": 477} +{"info/global_step": 478, "train_info/time_within_train_step": 27.859236478805542, "step": 478} +{"train_info/time_between_train_steps": 0.005984067916870117, "step": 478} +{"info/global_step": 479, "train_info/time_within_train_step": 27.862711429595947, "step": 479} +{"train_info/time_between_train_steps": 0.005660533905029297, "step": 479} +{"info/global_step": 480, "train_info/time_within_train_step": 27.906373500823975, "step": 480} +{"train_info/time_between_train_steps": 0.005491971969604492, "step": 480} +{"info/global_step": 481, "train_info/time_within_train_step": 27.857980728149414, "step": 481} +{"train_info/time_between_train_steps": 0.009862899780273438, "step": 481} +{"info/global_step": 482, "train_info/time_within_train_step": 27.862734079360962, "step": 482} +{"train_info/time_between_train_steps": 0.005799055099487305, "step": 482} +{"info/global_step": 483, "train_info/time_within_train_step": 27.888301849365234, "step": 483} +{"train_info/time_between_train_steps": 0.005726814270019531, "step": 483} +{"info/global_step": 484, "train_info/time_within_train_step": 27.969315767288208, "step": 484} +{"train_info/time_between_train_steps": 0.009689807891845703, "step": 484} +{"info/global_step": 485, "train_info/time_within_train_step": 27.892069339752197, "step": 485} +{"train_info/time_between_train_steps": 0.006037473678588867, "step": 485} +{"info/global_step": 486, "train_info/time_within_train_step": 27.907841444015503, "step": 486} +{"train_info/time_between_train_steps": 0.0061147212982177734, "step": 486} +{"train_info/time_between_train_steps": 30.304569482803345, "step": 486} +{"info/global_step": 487, "train_info/time_within_train_step": 27.883514404296875, "step": 487} +{"train_info/time_between_train_steps": 0.008539438247680664, "step": 487} +{"info/global_step": 488, "train_info/time_within_train_step": 27.9749276638031, "step": 488} +{"train_info/time_between_train_steps": 0.005660295486450195, "step": 488} +{"info/global_step": 489, "train_info/time_within_train_step": 27.857323169708252, "step": 489} +{"train_info/time_between_train_steps": 0.005599260330200195, "step": 489} +{"info/global_step": 490, "train_info/time_within_train_step": 28.015337467193604, "step": 490} +{"train_info/time_between_train_steps": 0.010383129119873047, "step": 490} +{"info/global_step": 491, "train_info/time_within_train_step": 27.896780729293823, "step": 491} +{"train_info/time_between_train_steps": 0.0058765411376953125, "step": 491} +{"info/global_step": 492, "train_info/time_within_train_step": 27.929849863052368, "step": 492} +{"train_info/time_between_train_steps": 0.005841970443725586, "step": 492} +{"info/global_step": 493, "train_info/time_within_train_step": 27.86791443824768, "step": 493} +{"train_info/time_between_train_steps": 0.0059413909912109375, "step": 493} +{"info/global_step": 494, "train_info/time_within_train_step": 27.889002323150635, "step": 494} +{"train_info/time_between_train_steps": 0.0056493282318115234, "step": 494} +{"info/global_step": 495, "train_info/time_within_train_step": 27.799968004226685, "step": 495} +{"train_info/time_between_train_steps": 0.0055391788482666016, "step": 495} +{"info/global_step": 496, "train_info/time_within_train_step": 27.785148859024048, "step": 496} +{"train_info/time_between_train_steps": 0.005772829055786133, "step": 496} +{"info/global_step": 497, "train_info/time_within_train_step": 27.862079858779907, "step": 497} +{"train_info/time_between_train_steps": 0.011852264404296875, "step": 497} +{"info/global_step": 498, "train_info/time_within_train_step": 27.86294651031494, "step": 498} +{"train_info/time_between_train_steps": 0.005226612091064453, "step": 498} +{"info/global_step": 499, "train_info/time_within_train_step": 28.059932231903076, "step": 499} +{"train_info/time_between_train_steps": 0.006051301956176758, "step": 499} +{"info/global_step": 500, "train_info/time_within_train_step": 27.897966623306274, "step": 500} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1740837756, "_runtime": 14562}, "step": 500} +{"logs": {"train/loss": 3.9169, "train/learning_rate": 0.00038888888888888887, "train/epoch": 18.01, "_timestamp": 1740837756, "_runtime": 14562}, "step": 500} +{"train_info/time_between_train_steps": 13.326775074005127, "step": 500} +{"info/global_step": 501, "train_info/time_within_train_step": 27.84434723854065, "step": 501} +{"train_info/time_between_train_steps": 0.006293058395385742, "step": 501} +{"info/global_step": 502, "train_info/time_within_train_step": 27.813929796218872, "step": 502} +{"train_info/time_between_train_steps": 0.01034235954284668, "step": 502} +{"info/global_step": 503, "train_info/time_within_train_step": 27.875284910202026, "step": 503} +{"train_info/time_between_train_steps": 0.005455970764160156, "step": 503} +{"info/global_step": 504, "train_info/time_within_train_step": 27.85174536705017, "step": 504} +{"train_info/time_between_train_steps": 0.0056421756744384766, "step": 504} +{"info/global_step": 505, "train_info/time_within_train_step": 27.882614374160767, "step": 505} +{"train_info/time_between_train_steps": 0.010477066040039062, "step": 505} +{"info/global_step": 506, "train_info/time_within_train_step": 27.85069704055786, "step": 506} +{"train_info/time_between_train_steps": 0.005661964416503906, "step": 506} +{"info/global_step": 507, "train_info/time_within_train_step": 27.85747003555298, "step": 507} +{"train_info/time_between_train_steps": 0.00557255744934082, "step": 507} +{"info/global_step": 508, "train_info/time_within_train_step": 27.857595920562744, "step": 508} +{"train_info/time_between_train_steps": 0.008129596710205078, "step": 508} +{"info/global_step": 509, "train_info/time_within_train_step": 27.88602924346924, "step": 509} +{"train_info/time_between_train_steps": 0.005643367767333984, "step": 509} +{"info/global_step": 510, "train_info/time_within_train_step": 27.889447450637817, "step": 510} +{"train_info/time_between_train_steps": 0.005939960479736328, "step": 510} +{"info/global_step": 511, "train_info/time_within_train_step": 27.87203335762024, "step": 511} +{"train_info/time_between_train_steps": 0.00604248046875, "step": 511} +{"info/global_step": 512, "train_info/time_within_train_step": 27.867092609405518, "step": 512} +{"train_info/time_between_train_steps": 0.00598454475402832, "step": 512} +{"info/global_step": 513, "train_info/time_within_train_step": 27.98174810409546, "step": 513} +{"train_info/time_between_train_steps": 0.010780572891235352, "step": 513} +{"train_info/time_between_train_steps": 30.354830980300903, "step": 513} +{"info/global_step": 514, "train_info/time_within_train_step": 27.786157369613647, "step": 514} +{"train_info/time_between_train_steps": 0.005467891693115234, "step": 514} +{"info/global_step": 515, "train_info/time_within_train_step": 28.008342504501343, "step": 515} +{"train_info/time_between_train_steps": 0.005567073822021484, "step": 515} +{"info/global_step": 516, "train_info/time_within_train_step": 27.849275827407837, "step": 516} +{"train_info/time_between_train_steps": 0.00577998161315918, "step": 516} +{"info/global_step": 517, "train_info/time_within_train_step": 28.02324604988098, "step": 517} +{"train_info/time_between_train_steps": 0.005659580230712891, "step": 517} +{"info/global_step": 518, "train_info/time_within_train_step": 27.903552770614624, "step": 518} +{"train_info/time_between_train_steps": 0.005721330642700195, "step": 518} +{"info/global_step": 519, "train_info/time_within_train_step": 28.02929663658142, "step": 519} +{"train_info/time_between_train_steps": 0.00665736198425293, "step": 519} +{"info/global_step": 520, "train_info/time_within_train_step": 27.870779275894165, "step": 520} +{"train_info/time_between_train_steps": 0.0055620670318603516, "step": 520} +{"info/global_step": 521, "train_info/time_within_train_step": 27.932773113250732, "step": 521} +{"train_info/time_between_train_steps": 0.005826473236083984, "step": 521} +{"info/global_step": 522, "train_info/time_within_train_step": 27.81373381614685, "step": 522} +{"train_info/time_between_train_steps": 0.005364656448364258, "step": 522} +{"info/global_step": 523, "train_info/time_within_train_step": 27.812828063964844, "step": 523} +{"train_info/time_between_train_steps": 0.00576019287109375, "step": 523} +{"info/global_step": 524, "train_info/time_within_train_step": 27.805013179779053, "step": 524} +{"train_info/time_between_train_steps": 0.005415678024291992, "step": 524} +{"info/global_step": 525, "train_info/time_within_train_step": 27.947207927703857, "step": 525} +{"train_info/time_between_train_steps": 0.0057032108306884766, "step": 525} +{"info/global_step": 526, "train_info/time_within_train_step": 27.84563899040222, "step": 526} +{"train_info/time_between_train_steps": 0.010590553283691406, "step": 526} +{"info/global_step": 527, "train_info/time_within_train_step": 27.836323976516724, "step": 527} +{"train_info/time_between_train_steps": 0.005244255065917969, "step": 527} +{"info/global_step": 528, "train_info/time_within_train_step": 27.8100528717041, "step": 528} +{"train_info/time_between_train_steps": 0.005736827850341797, "step": 528} +{"info/global_step": 529, "train_info/time_within_train_step": 27.972007036209106, "step": 529} +{"train_info/time_between_train_steps": 0.005393028259277344, "step": 529} +{"info/global_step": 530, "train_info/time_within_train_step": 27.835757970809937, "step": 530} +{"train_info/time_between_train_steps": 0.01031184196472168, "step": 530} +{"info/global_step": 531, "train_info/time_within_train_step": 27.83814001083374, "step": 531} +{"train_info/time_between_train_steps": 0.005504608154296875, "step": 531} +{"info/global_step": 532, "train_info/time_within_train_step": 27.83446168899536, "step": 532} +{"train_info/time_between_train_steps": 0.00840139389038086, "step": 532} +{"info/global_step": 533, "train_info/time_within_train_step": 27.850450038909912, "step": 533} +{"train_info/time_between_train_steps": 0.010431528091430664, "step": 533} +{"info/global_step": 534, "train_info/time_within_train_step": 27.870835304260254, "step": 534} +{"train_info/time_between_train_steps": 0.005669355392456055, "step": 534} +{"info/global_step": 535, "train_info/time_within_train_step": 27.94419765472412, "step": 535} +{"train_info/time_between_train_steps": 0.005730867385864258, "step": 535} +{"info/global_step": 536, "train_info/time_within_train_step": 27.81489086151123, "step": 536} +{"train_info/time_between_train_steps": 0.005614280700683594, "step": 536} +{"info/global_step": 537, "train_info/time_within_train_step": 27.83901619911194, "step": 537} +{"train_info/time_between_train_steps": 0.0070514678955078125, "step": 537} +{"info/global_step": 538, "train_info/time_within_train_step": 28.005216598510742, "step": 538} +{"train_info/time_between_train_steps": 0.007597684860229492, "step": 538} +{"info/global_step": 539, "train_info/time_within_train_step": 27.978904247283936, "step": 539} +{"train_info/time_between_train_steps": 0.007032871246337891, "step": 539} +{"info/global_step": 540, "train_info/time_within_train_step": 27.97546148300171, "step": 540} +{"train_info/time_between_train_steps": 0.007422924041748047, "step": 540} +{"train_info/time_between_train_steps": 30.481203079223633, "step": 540} +{"info/global_step": 541, "train_info/time_within_train_step": 27.851648569107056, "step": 541} +{"train_info/time_between_train_steps": 0.005636453628540039, "step": 541} +{"info/global_step": 542, "train_info/time_within_train_step": 28.004557847976685, "step": 542} +{"train_info/time_between_train_steps": 0.005846738815307617, "step": 542} +{"info/global_step": 543, "train_info/time_within_train_step": 27.844549894332886, "step": 543} +{"train_info/time_between_train_steps": 0.005733013153076172, "step": 543} +{"info/global_step": 544, "train_info/time_within_train_step": 28.022465229034424, "step": 544} +{"train_info/time_between_train_steps": 0.012246370315551758, "step": 544} +{"info/global_step": 545, "train_info/time_within_train_step": 27.860750675201416, "step": 545} +{"train_info/time_between_train_steps": 0.006277799606323242, "step": 545} +{"info/global_step": 546, "train_info/time_within_train_step": 27.950593948364258, "step": 546} +{"train_info/time_between_train_steps": 0.005921363830566406, "step": 546} +{"info/global_step": 547, "train_info/time_within_train_step": 27.900510549545288, "step": 547} +{"train_info/time_between_train_steps": 0.006187915802001953, "step": 547} +{"info/global_step": 548, "train_info/time_within_train_step": 27.92233443260193, "step": 548} +{"train_info/time_between_train_steps": 0.011373043060302734, "step": 548} +{"info/global_step": 549, "train_info/time_within_train_step": 27.83729338645935, "step": 549} +{"train_info/time_between_train_steps": 0.005559682846069336, "step": 549} +{"info/global_step": 550, "train_info/time_within_train_step": 27.782644987106323, "step": 550} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1740839225, "_runtime": 16031}, "step": 550} +{"logs": {"train/loss": 3.8046, "train/learning_rate": 0.0003611111111111111, "train/epoch": 20.01, "_timestamp": 1740839225, "_runtime": 16031}, "step": 550} +{"train_info/time_between_train_steps": 0.009241819381713867, "step": 550} +{"info/global_step": 551, "train_info/time_within_train_step": 27.868516445159912, "step": 551} +{"train_info/time_between_train_steps": 0.009954214096069336, "step": 551} +{"info/global_step": 552, "train_info/time_within_train_step": 27.86543035507202, "step": 552} +{"train_info/time_between_train_steps": 0.0054264068603515625, "step": 552} +{"info/global_step": 553, "train_info/time_within_train_step": 27.83580708503723, "step": 553} +{"train_info/time_between_train_steps": 0.0056493282318115234, "step": 553} +{"info/global_step": 554, "train_info/time_within_train_step": 27.837483406066895, "step": 554} +{"train_info/time_between_train_steps": 0.005422353744506836, "step": 554} +{"info/global_step": 555, "train_info/time_within_train_step": 27.850187063217163, "step": 555} +{"train_info/time_between_train_steps": 0.00551915168762207, "step": 555} +{"info/global_step": 556, "train_info/time_within_train_step": 27.865020275115967, "step": 556} +{"train_info/time_between_train_steps": 0.0060367584228515625, "step": 556} +{"info/global_step": 557, "train_info/time_within_train_step": 27.895012855529785, "step": 557} +{"train_info/time_between_train_steps": 0.005463123321533203, "step": 557} +{"info/global_step": 558, "train_info/time_within_train_step": 27.86424684524536, "step": 558} +{"train_info/time_between_train_steps": 0.005751848220825195, "step": 558} +{"info/global_step": 559, "train_info/time_within_train_step": 27.924052715301514, "step": 559} +{"train_info/time_between_train_steps": 0.005620241165161133, "step": 559} +{"info/global_step": 560, "train_info/time_within_train_step": 27.913856744766235, "step": 560} +{"train_info/time_between_train_steps": 0.009475946426391602, "step": 560} +{"info/global_step": 561, "train_info/time_within_train_step": 27.930074214935303, "step": 561} +{"train_info/time_between_train_steps": 0.008089542388916016, "step": 561} +{"info/global_step": 562, "train_info/time_within_train_step": 27.84295916557312, "step": 562} +{"train_info/time_between_train_steps": 0.012874364852905273, "step": 562} +{"info/global_step": 563, "train_info/time_within_train_step": 27.877854824066162, "step": 563} +{"train_info/time_between_train_steps": 0.010251760482788086, "step": 563} +{"info/global_step": 564, "train_info/time_within_train_step": 27.857059001922607, "step": 564} +{"train_info/time_between_train_steps": 0.005609035491943359, "step": 564} +{"info/global_step": 565, "train_info/time_within_train_step": 27.85010528564453, "step": 565} +{"train_info/time_between_train_steps": 0.007952690124511719, "step": 565} +{"info/global_step": 566, "train_info/time_within_train_step": 27.843584775924683, "step": 566} +{"train_info/time_between_train_steps": 0.007379770278930664, "step": 566} +{"info/global_step": 567, "train_info/time_within_train_step": 27.867974758148193, "step": 567} +{"train_info/time_between_train_steps": 0.00632476806640625, "step": 567} +{"train_info/time_between_train_steps": 30.369191646575928, "step": 567} +{"info/global_step": 568, "train_info/time_within_train_step": 27.83904719352722, "step": 568} +{"train_info/time_between_train_steps": 0.009093046188354492, "step": 568} +{"info/global_step": 569, "train_info/time_within_train_step": 27.936599731445312, "step": 569} +{"train_info/time_between_train_steps": 0.005321502685546875, "step": 569} +{"info/global_step": 570, "train_info/time_within_train_step": 27.84259295463562, "step": 570} +{"train_info/time_between_train_steps": 0.005700349807739258, "step": 570} +{"info/global_step": 571, "train_info/time_within_train_step": 27.97428560256958, "step": 571} +{"train_info/time_between_train_steps": 0.005720376968383789, "step": 571} +{"info/global_step": 572, "train_info/time_within_train_step": 27.82793402671814, "step": 572} +{"train_info/time_between_train_steps": 0.00935983657836914, "step": 572} +{"info/global_step": 573, "train_info/time_within_train_step": 27.962982416152954, "step": 573} +{"train_info/time_between_train_steps": 0.005796194076538086, "step": 573} +{"info/global_step": 574, "train_info/time_within_train_step": 27.984302043914795, "step": 574} +{"train_info/time_between_train_steps": 0.0055768489837646484, "step": 574} +{"info/global_step": 575, "train_info/time_within_train_step": 28.027758836746216, "step": 575} +{"train_info/time_between_train_steps": 0.006185293197631836, "step": 575} +{"info/global_step": 576, "train_info/time_within_train_step": 27.848342895507812, "step": 576} +{"train_info/time_between_train_steps": 0.005630970001220703, "step": 576} +{"info/global_step": 577, "train_info/time_within_train_step": 27.81645917892456, "step": 577} +{"train_info/time_between_train_steps": 0.005562782287597656, "step": 577} +{"info/global_step": 578, "train_info/time_within_train_step": 27.850732564926147, "step": 578} +{"train_info/time_between_train_steps": 0.008723258972167969, "step": 578} +{"info/global_step": 579, "train_info/time_within_train_step": 27.808130264282227, "step": 579} +{"train_info/time_between_train_steps": 0.008706331253051758, "step": 579} +{"info/global_step": 580, "train_info/time_within_train_step": 27.83866047859192, "step": 580} +{"train_info/time_between_train_steps": 0.006955146789550781, "step": 580} +{"info/global_step": 581, "train_info/time_within_train_step": 27.92470693588257, "step": 581} +{"train_info/time_between_train_steps": 0.009737014770507812, "step": 581} +{"info/global_step": 582, "train_info/time_within_train_step": 27.85173010826111, "step": 582} +{"train_info/time_between_train_steps": 0.010151147842407227, "step": 582} +{"info/global_step": 583, "train_info/time_within_train_step": 27.881864309310913, "step": 583} +{"train_info/time_between_train_steps": 0.005517721176147461, "step": 583} +{"info/global_step": 584, "train_info/time_within_train_step": 27.812114477157593, "step": 584} +{"train_info/time_between_train_steps": 0.0054912567138671875, "step": 584} +{"info/global_step": 585, "train_info/time_within_train_step": 27.892736196517944, "step": 585} +{"train_info/time_between_train_steps": 0.005356550216674805, "step": 585} +{"info/global_step": 586, "train_info/time_within_train_step": 27.836791038513184, "step": 586} +{"train_info/time_between_train_steps": 0.0055999755859375, "step": 586} +{"info/global_step": 587, "train_info/time_within_train_step": 27.831902980804443, "step": 587} +{"train_info/time_between_train_steps": 0.0099639892578125, "step": 587} +{"info/global_step": 588, "train_info/time_within_train_step": 27.846962451934814, "step": 588} +{"train_info/time_between_train_steps": 0.00542759895324707, "step": 588} +{"info/global_step": 589, "train_info/time_within_train_step": 27.860331773757935, "step": 589} +{"train_info/time_between_train_steps": 0.00970149040222168, "step": 589} +{"info/global_step": 590, "train_info/time_within_train_step": 27.92347550392151, "step": 590} +{"train_info/time_between_train_steps": 0.0062999725341796875, "step": 590} +{"info/global_step": 591, "train_info/time_within_train_step": 27.826313734054565, "step": 591} +{"train_info/time_between_train_steps": 0.0057561397552490234, "step": 591} +{"info/global_step": 592, "train_info/time_within_train_step": 27.883646965026855, "step": 592} +{"train_info/time_between_train_steps": 0.005785703659057617, "step": 592} +{"info/global_step": 593, "train_info/time_within_train_step": 27.877159595489502, "step": 593} +{"train_info/time_between_train_steps": 0.008774757385253906, "step": 593} +{"info/global_step": 594, "train_info/time_within_train_step": 27.873844861984253, "step": 594} +{"train_info/time_between_train_steps": 0.006281852722167969, "step": 594} +{"train_info/time_between_train_steps": 30.866169452667236, "step": 594} +{"info/global_step": 595, "train_info/time_within_train_step": 27.863967895507812, "step": 595} +{"train_info/time_between_train_steps": 0.0058934688568115234, "step": 595} +{"info/global_step": 596, "train_info/time_within_train_step": 28.024685621261597, "step": 596} +{"train_info/time_between_train_steps": 0.00611567497253418, "step": 596} +{"info/global_step": 597, "train_info/time_within_train_step": 27.914451599121094, "step": 597} +{"train_info/time_between_train_steps": 0.007211923599243164, "step": 597} +{"info/global_step": 598, "train_info/time_within_train_step": 27.943758964538574, "step": 598} +{"train_info/time_between_train_steps": 0.005908489227294922, "step": 598} +{"info/global_step": 599, "train_info/time_within_train_step": 27.831801891326904, "step": 599} +{"train_info/time_between_train_steps": 0.005747556686401367, "step": 599} +{"info/global_step": 600, "train_info/time_within_train_step": 27.991815328598022, "step": 600} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1740840681, "_runtime": 17487}, "step": 600} +{"logs": {"train/loss": 3.706, "train/learning_rate": 0.0003333333333333333, "train/epoch": 22.0, "_timestamp": 1740840681, "_runtime": 17487}, "step": 600} +{"train_info/time_between_train_steps": 15.396233558654785, "step": 600} +{"info/global_step": 601, "train_info/time_within_train_step": 28.06710958480835, "step": 601} +{"train_info/time_between_train_steps": 0.010864019393920898, "step": 601} +{"info/global_step": 602, "train_info/time_within_train_step": 27.916870832443237, "step": 602} +{"train_info/time_between_train_steps": 0.007213592529296875, "step": 602} +{"info/global_step": 603, "train_info/time_within_train_step": 27.88351321220398, "step": 603} +{"train_info/time_between_train_steps": 0.006333827972412109, "step": 603} +{"info/global_step": 604, "train_info/time_within_train_step": 27.9801824092865, "step": 604} +{"train_info/time_between_train_steps": 0.00617218017578125, "step": 604} +{"info/global_step": 605, "train_info/time_within_train_step": 27.895495176315308, "step": 605} +{"train_info/time_between_train_steps": 0.005813121795654297, "step": 605} +{"info/global_step": 606, "train_info/time_within_train_step": 27.908781051635742, "step": 606} +{"train_info/time_between_train_steps": 0.005840778350830078, "step": 606} +{"info/global_step": 607, "train_info/time_within_train_step": 27.865482807159424, "step": 607} +{"train_info/time_between_train_steps": 0.00608515739440918, "step": 607} +{"info/global_step": 608, "train_info/time_within_train_step": 27.8377206325531, "step": 608} +{"train_info/time_between_train_steps": 0.006588935852050781, "step": 608} +{"info/global_step": 609, "train_info/time_within_train_step": 27.856555223464966, "step": 609} +{"train_info/time_between_train_steps": 0.007977724075317383, "step": 609} +{"info/global_step": 610, "train_info/time_within_train_step": 27.864930629730225, "step": 610} +{"train_info/time_between_train_steps": 0.008066892623901367, "step": 610} +{"info/global_step": 611, "train_info/time_within_train_step": 27.860334157943726, "step": 611} +{"train_info/time_between_train_steps": 0.01156306266784668, "step": 611} +{"info/global_step": 612, "train_info/time_within_train_step": 27.86117172241211, "step": 612} +{"train_info/time_between_train_steps": 0.005742311477661133, "step": 612} +{"info/global_step": 613, "train_info/time_within_train_step": 27.84760093688965, "step": 613} +{"train_info/time_between_train_steps": 0.0058956146240234375, "step": 613} +{"info/global_step": 614, "train_info/time_within_train_step": 27.864999771118164, "step": 614} +{"train_info/time_between_train_steps": 0.007939577102661133, "step": 614} +{"info/global_step": 615, "train_info/time_within_train_step": 27.882610082626343, "step": 615} +{"train_info/time_between_train_steps": 0.010404586791992188, "step": 615} +{"info/global_step": 616, "train_info/time_within_train_step": 27.878270387649536, "step": 616} +{"train_info/time_between_train_steps": 0.005709409713745117, "step": 616} +{"info/global_step": 617, "train_info/time_within_train_step": 27.806825399398804, "step": 617} +{"train_info/time_between_train_steps": 0.005889415740966797, "step": 617} +{"info/global_step": 618, "train_info/time_within_train_step": 27.807262897491455, "step": 618} +{"train_info/time_between_train_steps": 0.00574183464050293, "step": 618} +{"info/global_step": 619, "train_info/time_within_train_step": 27.879461765289307, "step": 619} +{"train_info/time_between_train_steps": 0.007855415344238281, "step": 619} +{"info/global_step": 620, "train_info/time_within_train_step": 27.965074062347412, "step": 620} +{"train_info/time_between_train_steps": 0.006166696548461914, "step": 620} +{"info/global_step": 621, "train_info/time_within_train_step": 27.870923280715942, "step": 621} +{"train_info/time_between_train_steps": 0.006470680236816406, "step": 621} +{"train_info/time_between_train_steps": 30.09471821784973, "step": 621} +{"info/global_step": 622, "train_info/time_within_train_step": 27.859275817871094, "step": 622} +{"train_info/time_between_train_steps": 0.006463527679443359, "step": 622} +{"info/global_step": 623, "train_info/time_within_train_step": 27.96415138244629, "step": 623} +{"train_info/time_between_train_steps": 0.005462169647216797, "step": 623} +{"info/global_step": 624, "train_info/time_within_train_step": 27.8131046295166, "step": 624} +{"train_info/time_between_train_steps": 0.005998849868774414, "step": 624} +{"info/global_step": 625, "train_info/time_within_train_step": 27.913182258605957, "step": 625} +{"train_info/time_between_train_steps": 0.009515523910522461, "step": 625} +{"info/global_step": 626, "train_info/time_within_train_step": 27.84808588027954, "step": 626} +{"train_info/time_between_train_steps": 0.007153749465942383, "step": 626} +{"info/global_step": 627, "train_info/time_within_train_step": 28.00502848625183, "step": 627} +{"train_info/time_between_train_steps": 0.005777597427368164, "step": 627} +{"info/global_step": 628, "train_info/time_within_train_step": 27.89423966407776, "step": 628} +{"train_info/time_between_train_steps": 0.009156465530395508, "step": 628} +{"info/global_step": 629, "train_info/time_within_train_step": 27.96561074256897, "step": 629} +{"train_info/time_between_train_steps": 0.010503292083740234, "step": 629} +{"info/global_step": 630, "train_info/time_within_train_step": 27.827880382537842, "step": 630} +{"train_info/time_between_train_steps": 0.005637168884277344, "step": 630} +{"info/global_step": 631, "train_info/time_within_train_step": 27.80354070663452, "step": 631} +{"train_info/time_between_train_steps": 0.008019685745239258, "step": 631} +{"info/global_step": 632, "train_info/time_within_train_step": 27.831397533416748, "step": 632} +{"train_info/time_between_train_steps": 0.0053632259368896484, "step": 632} +{"info/global_step": 633, "train_info/time_within_train_step": 27.87848401069641, "step": 633} +{"train_info/time_between_train_steps": 0.006356239318847656, "step": 633} +{"info/global_step": 634, "train_info/time_within_train_step": 27.935900688171387, "step": 634} +{"train_info/time_between_train_steps": 0.005411624908447266, "step": 634} +{"info/global_step": 635, "train_info/time_within_train_step": 27.87170433998108, "step": 635} +{"train_info/time_between_train_steps": 0.009468317031860352, "step": 635} +{"info/global_step": 636, "train_info/time_within_train_step": 27.84793782234192, "step": 636} +{"train_info/time_between_train_steps": 0.005484580993652344, "step": 636} +{"info/global_step": 637, "train_info/time_within_train_step": 27.796725749969482, "step": 637} +{"train_info/time_between_train_steps": 0.005520343780517578, "step": 637} +{"info/global_step": 638, "train_info/time_within_train_step": 27.789829969406128, "step": 638} +{"train_info/time_between_train_steps": 0.01019906997680664, "step": 638} +{"info/global_step": 639, "train_info/time_within_train_step": 27.814900159835815, "step": 639} +{"train_info/time_between_train_steps": 0.00526118278503418, "step": 639} +{"info/global_step": 640, "train_info/time_within_train_step": 27.857258319854736, "step": 640} +{"train_info/time_between_train_steps": 0.005593299865722656, "step": 640} +{"info/global_step": 641, "train_info/time_within_train_step": 27.873435735702515, "step": 641} +{"train_info/time_between_train_steps": 0.01017904281616211, "step": 641} +{"info/global_step": 642, "train_info/time_within_train_step": 27.86690878868103, "step": 642} +{"train_info/time_between_train_steps": 0.005456447601318359, "step": 642} +{"info/global_step": 643, "train_info/time_within_train_step": 27.80998206138611, "step": 643} +{"train_info/time_between_train_steps": 0.0054323673248291016, "step": 643} +{"info/global_step": 644, "train_info/time_within_train_step": 27.784589052200317, "step": 644} +{"train_info/time_between_train_steps": 0.0054891109466552734, "step": 644} +{"info/global_step": 645, "train_info/time_within_train_step": 27.845566749572754, "step": 645} +{"train_info/time_between_train_steps": 0.005760669708251953, "step": 645} +{"info/global_step": 646, "train_info/time_within_train_step": 27.830265522003174, "step": 646} +{"train_info/time_between_train_steps": 0.005522727966308594, "step": 646} +{"info/global_step": 647, "train_info/time_within_train_step": 27.835288047790527, "step": 647} +{"train_info/time_between_train_steps": 0.006028890609741211, "step": 647} +{"info/global_step": 648, "train_info/time_within_train_step": 27.89777421951294, "step": 648} +{"train_info/time_between_train_steps": 0.008013010025024414, "step": 648} +{"train_info/time_between_train_steps": 30.509405851364136, "step": 648} +{"info/global_step": 649, "train_info/time_within_train_step": 27.960862636566162, "step": 649} +{"train_info/time_between_train_steps": 0.0054967403411865234, "step": 649} +{"info/global_step": 650, "train_info/time_within_train_step": 27.9853298664093, "step": 650} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1740842152, "_runtime": 18958}, "step": 650} +{"logs": {"train/loss": 3.6192, "train/learning_rate": 0.00030555555555555555, "train/epoch": 24.0, "_timestamp": 1740842152, "_runtime": 18958}, "step": 650} +{"train_info/time_between_train_steps": 0.015804290771484375, "step": 650} +{"info/global_step": 651, "train_info/time_within_train_step": 27.846230030059814, "step": 651} +{"train_info/time_between_train_steps": 0.00545501708984375, "step": 651} +{"info/global_step": 652, "train_info/time_within_train_step": 28.121474504470825, "step": 652} +{"train_info/time_between_train_steps": 0.00894618034362793, "step": 652} +{"info/global_step": 653, "train_info/time_within_train_step": 27.910502672195435, "step": 653} +{"train_info/time_between_train_steps": 0.006152153015136719, "step": 653} +{"info/global_step": 654, "train_info/time_within_train_step": 28.03154444694519, "step": 654} +{"train_info/time_between_train_steps": 0.006017446517944336, "step": 654} +{"info/global_step": 655, "train_info/time_within_train_step": 27.897331714630127, "step": 655} +{"train_info/time_between_train_steps": 0.006819486618041992, "step": 655} +{"info/global_step": 656, "train_info/time_within_train_step": 27.932653665542603, "step": 656} +{"train_info/time_between_train_steps": 0.009726524353027344, "step": 656} +{"info/global_step": 657, "train_info/time_within_train_step": 27.877564191818237, "step": 657} +{"train_info/time_between_train_steps": 0.005797386169433594, "step": 657} +{"info/global_step": 658, "train_info/time_within_train_step": 27.8681001663208, "step": 658} +{"train_info/time_between_train_steps": 0.013153314590454102, "step": 658} +{"info/global_step": 659, "train_info/time_within_train_step": 27.86588454246521, "step": 659} +{"train_info/time_between_train_steps": 0.005904674530029297, "step": 659} +{"info/global_step": 660, "train_info/time_within_train_step": 27.848585844039917, "step": 660} +{"train_info/time_between_train_steps": 0.005739927291870117, "step": 660} +{"info/global_step": 661, "train_info/time_within_train_step": 27.846876859664917, "step": 661} +{"train_info/time_between_train_steps": 0.005579948425292969, "step": 661} +{"info/global_step": 662, "train_info/time_within_train_step": 27.854531288146973, "step": 662} +{"train_info/time_between_train_steps": 0.00599217414855957, "step": 662} +{"info/global_step": 663, "train_info/time_within_train_step": 27.825361013412476, "step": 663} +{"train_info/time_between_train_steps": 0.005855560302734375, "step": 663} +{"info/global_step": 664, "train_info/time_within_train_step": 27.850297212600708, "step": 664} +{"train_info/time_between_train_steps": 0.010530948638916016, "step": 664} +{"info/global_step": 665, "train_info/time_within_train_step": 27.974830389022827, "step": 665} +{"train_info/time_between_train_steps": 0.011312484741210938, "step": 665} +{"info/global_step": 666, "train_info/time_within_train_step": 27.878457069396973, "step": 666} +{"train_info/time_between_train_steps": 0.005690097808837891, "step": 666} +{"info/global_step": 667, "train_info/time_within_train_step": 27.863823413848877, "step": 667} +{"train_info/time_between_train_steps": 0.005992889404296875, "step": 667} +{"info/global_step": 668, "train_info/time_within_train_step": 27.89633297920227, "step": 668} +{"train_info/time_between_train_steps": 0.0058269500732421875, "step": 668} +{"info/global_step": 669, "train_info/time_within_train_step": 27.885395765304565, "step": 669} +{"train_info/time_between_train_steps": 0.008023262023925781, "step": 669} +{"info/global_step": 670, "train_info/time_within_train_step": 27.880375385284424, "step": 670} +{"train_info/time_between_train_steps": 0.006515026092529297, "step": 670} +{"info/global_step": 671, "train_info/time_within_train_step": 27.896501064300537, "step": 671} +{"train_info/time_between_train_steps": 0.00605010986328125, "step": 671} +{"info/global_step": 672, "train_info/time_within_train_step": 27.91500735282898, "step": 672} +{"train_info/time_between_train_steps": 0.005860805511474609, "step": 672} +{"info/global_step": 673, "train_info/time_within_train_step": 27.864070653915405, "step": 673} +{"train_info/time_between_train_steps": 0.005808830261230469, "step": 673} +{"info/global_step": 674, "train_info/time_within_train_step": 27.878456592559814, "step": 674} +{"train_info/time_between_train_steps": 0.006402015686035156, "step": 674} +{"info/global_step": 675, "train_info/time_within_train_step": 27.885738849639893, "step": 675} +{"train_info/time_between_train_steps": 0.006586313247680664, "step": 675} +{"train_info/time_between_train_steps": 30.430750131607056, "step": 675} +{"info/global_step": 676, "train_info/time_within_train_step": 27.866924285888672, "step": 676} +{"train_info/time_between_train_steps": 0.006910562515258789, "step": 676} +{"info/global_step": 677, "train_info/time_within_train_step": 28.032556772232056, "step": 677} +{"train_info/time_between_train_steps": 0.005759716033935547, "step": 677} +{"info/global_step": 678, "train_info/time_within_train_step": 27.821990251541138, "step": 678} +{"train_info/time_between_train_steps": 0.005783557891845703, "step": 678} +{"info/global_step": 679, "train_info/time_within_train_step": 28.068729639053345, "step": 679} +{"train_info/time_between_train_steps": 0.005565166473388672, "step": 679} +{"info/global_step": 680, "train_info/time_within_train_step": 27.8462495803833, "step": 680} +{"train_info/time_between_train_steps": 0.007920265197753906, "step": 680} +{"info/global_step": 681, "train_info/time_within_train_step": 28.282124042510986, "step": 681} +{"train_info/time_between_train_steps": 0.005806446075439453, "step": 681} +{"info/global_step": 682, "train_info/time_within_train_step": 27.88621711730957, "step": 682} +{"train_info/time_between_train_steps": 0.005989551544189453, "step": 682} +{"info/global_step": 683, "train_info/time_within_train_step": 28.064454078674316, "step": 683} +{"train_info/time_between_train_steps": 0.005563974380493164, "step": 683} +{"info/global_step": 684, "train_info/time_within_train_step": 27.882591724395752, "step": 684} +{"train_info/time_between_train_steps": 0.005559206008911133, "step": 684} +{"info/global_step": 685, "train_info/time_within_train_step": 27.807411432266235, "step": 685} +{"train_info/time_between_train_steps": 0.005626201629638672, "step": 685} +{"info/global_step": 686, "train_info/time_within_train_step": 27.86114764213562, "step": 686} +{"train_info/time_between_train_steps": 0.005454540252685547, "step": 686} +{"info/global_step": 687, "train_info/time_within_train_step": 27.81855273246765, "step": 687} +{"train_info/time_between_train_steps": 0.005463361740112305, "step": 687} +{"info/global_step": 688, "train_info/time_within_train_step": 27.788217544555664, "step": 688} +{"train_info/time_between_train_steps": 0.005451679229736328, "step": 688} +{"info/global_step": 689, "train_info/time_within_train_step": 27.838131427764893, "step": 689} +{"train_info/time_between_train_steps": 0.005377292633056641, "step": 689} +{"info/global_step": 690, "train_info/time_within_train_step": 27.870269060134888, "step": 690} +{"train_info/time_between_train_steps": 0.007775068283081055, "step": 690} +{"info/global_step": 691, "train_info/time_within_train_step": 27.863428354263306, "step": 691} +{"train_info/time_between_train_steps": 0.005684375762939453, "step": 691} +{"info/global_step": 692, "train_info/time_within_train_step": 27.8858380317688, "step": 692} +{"train_info/time_between_train_steps": 0.009353876113891602, "step": 692} +{"info/global_step": 693, "train_info/time_within_train_step": 27.87053108215332, "step": 693} +{"train_info/time_between_train_steps": 0.005512714385986328, "step": 693} +{"info/global_step": 694, "train_info/time_within_train_step": 27.838082551956177, "step": 694} +{"train_info/time_between_train_steps": 0.010472536087036133, "step": 694} +{"info/global_step": 695, "train_info/time_within_train_step": 28.067343950271606, "step": 695} +{"train_info/time_between_train_steps": 0.006440877914428711, "step": 695} +{"info/global_step": 696, "train_info/time_within_train_step": 27.941232681274414, "step": 696} +{"train_info/time_between_train_steps": 0.0055658817291259766, "step": 696} +{"info/global_step": 697, "train_info/time_within_train_step": 27.804091215133667, "step": 697} +{"train_info/time_between_train_steps": 0.007155179977416992, "step": 697} +{"info/global_step": 698, "train_info/time_within_train_step": 27.86318802833557, "step": 698} +{"train_info/time_between_train_steps": 0.005675792694091797, "step": 698} +{"info/global_step": 699, "train_info/time_within_train_step": 27.868014335632324, "step": 699} +{"train_info/time_between_train_steps": 0.005633115768432617, "step": 699} +{"info/global_step": 700, "train_info/time_within_train_step": 27.854140043258667, "step": 700} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1740843577, "_runtime": 20383}, "step": 700} +{"logs": {"train/loss": 3.4774, "train/learning_rate": 0.0002777777777777778, "train/epoch": 25.02, "_timestamp": 1740843577, "_runtime": 20383}, "step": 700} +{"train_info/time_between_train_steps": 13.385041236877441, "step": 700} +{"info/global_step": 701, "train_info/time_within_train_step": 27.86581802368164, "step": 701} +{"train_info/time_between_train_steps": 0.006209850311279297, "step": 701} +{"info/global_step": 702, "train_info/time_within_train_step": 27.87184739112854, "step": 702} +{"train_info/time_between_train_steps": 0.006181955337524414, "step": 702} +{"train_info/time_between_train_steps": 29.879961252212524, "step": 702} +{"info/global_step": 703, "train_info/time_within_train_step": 27.851000547409058, "step": 703} +{"train_info/time_between_train_steps": 0.005379438400268555, "step": 703} +{"info/global_step": 704, "train_info/time_within_train_step": 27.92788863182068, "step": 704} +{"train_info/time_between_train_steps": 0.007518291473388672, "step": 704} +{"info/global_step": 705, "train_info/time_within_train_step": 27.818341970443726, "step": 705} +{"train_info/time_between_train_steps": 0.005292654037475586, "step": 705} +{"info/global_step": 706, "train_info/time_within_train_step": 27.914865016937256, "step": 706} +{"train_info/time_between_train_steps": 0.0055027008056640625, "step": 706} +{"info/global_step": 707, "train_info/time_within_train_step": 27.791539192199707, "step": 707} +{"train_info/time_between_train_steps": 0.005545616149902344, "step": 707} +{"info/global_step": 708, "train_info/time_within_train_step": 27.920127391815186, "step": 708} +{"train_info/time_between_train_steps": 0.005905628204345703, "step": 708} +{"info/global_step": 709, "train_info/time_within_train_step": 27.845271825790405, "step": 709} +{"train_info/time_between_train_steps": 0.006839752197265625, "step": 709} +{"info/global_step": 710, "train_info/time_within_train_step": 28.20642876625061, "step": 710} +{"train_info/time_between_train_steps": 0.00719904899597168, "step": 710} +{"info/global_step": 711, "train_info/time_within_train_step": 27.942796230316162, "step": 711} +{"train_info/time_between_train_steps": 0.0064618587493896484, "step": 711} +{"info/global_step": 712, "train_info/time_within_train_step": 27.880216360092163, "step": 712} +{"train_info/time_between_train_steps": 0.005315542221069336, "step": 712} +{"info/global_step": 713, "train_info/time_within_train_step": 27.88123106956482, "step": 713} +{"train_info/time_between_train_steps": 0.005743503570556641, "step": 713} +{"info/global_step": 714, "train_info/time_within_train_step": 27.84316849708557, "step": 714} +{"train_info/time_between_train_steps": 0.005388021469116211, "step": 714} +{"info/global_step": 715, "train_info/time_within_train_step": 27.815740823745728, "step": 715} +{"train_info/time_between_train_steps": 0.005829811096191406, "step": 715} +{"info/global_step": 716, "train_info/time_within_train_step": 27.807511806488037, "step": 716} +{"train_info/time_between_train_steps": 0.005693197250366211, "step": 716} +{"info/global_step": 717, "train_info/time_within_train_step": 27.830188989639282, "step": 717} +{"train_info/time_between_train_steps": 0.0063190460205078125, "step": 717} +{"info/global_step": 718, "train_info/time_within_train_step": 27.856396675109863, "step": 718} +{"train_info/time_between_train_steps": 0.010410308837890625, "step": 718} +{"info/global_step": 719, "train_info/time_within_train_step": 27.869321584701538, "step": 719} +{"train_info/time_between_train_steps": 0.005721330642700195, "step": 719} +{"info/global_step": 720, "train_info/time_within_train_step": 27.848347663879395, "step": 720} +{"train_info/time_between_train_steps": 0.010354280471801758, "step": 720} +{"info/global_step": 721, "train_info/time_within_train_step": 27.81459617614746, "step": 721} +{"train_info/time_between_train_steps": 0.0057430267333984375, "step": 721} +{"info/global_step": 722, "train_info/time_within_train_step": 27.86828112602234, "step": 722} +{"train_info/time_between_train_steps": 0.005928993225097656, "step": 722} +{"info/global_step": 723, "train_info/time_within_train_step": 27.86976718902588, "step": 723} +{"train_info/time_between_train_steps": 0.005457162857055664, "step": 723} +{"info/global_step": 724, "train_info/time_within_train_step": 27.826661586761475, "step": 724} +{"train_info/time_between_train_steps": 0.005770206451416016, "step": 724} +{"info/global_step": 725, "train_info/time_within_train_step": 27.962380170822144, "step": 725} +{"train_info/time_between_train_steps": 0.005262136459350586, "step": 725} +{"info/global_step": 726, "train_info/time_within_train_step": 27.854978561401367, "step": 726} +{"train_info/time_between_train_steps": 0.005641937255859375, "step": 726} +{"info/global_step": 727, "train_info/time_within_train_step": 27.88543725013733, "step": 727} +{"train_info/time_between_train_steps": 0.0065326690673828125, "step": 727} +{"info/global_step": 728, "train_info/time_within_train_step": 27.818047046661377, "step": 728} +{"train_info/time_between_train_steps": 0.005895853042602539, "step": 728} +{"info/global_step": 729, "train_info/time_within_train_step": 27.816906452178955, "step": 729} +{"train_info/time_between_train_steps": 0.006314992904663086, "step": 729} +{"train_info/time_between_train_steps": 29.961889505386353, "step": 729} +{"info/global_step": 730, "train_info/time_within_train_step": 27.869518518447876, "step": 730} +{"train_info/time_between_train_steps": 0.005135059356689453, "step": 730} +{"info/global_step": 731, "train_info/time_within_train_step": 27.955256700515747, "step": 731} +{"train_info/time_between_train_steps": 0.005533933639526367, "step": 731} +{"info/global_step": 732, "train_info/time_within_train_step": 27.848098754882812, "step": 732} +{"train_info/time_between_train_steps": 0.0057179927825927734, "step": 732} +{"info/global_step": 733, "train_info/time_within_train_step": 27.943501710891724, "step": 733} +{"train_info/time_between_train_steps": 0.009266853332519531, "step": 733} +{"info/global_step": 734, "train_info/time_within_train_step": 27.847459316253662, "step": 734} +{"train_info/time_between_train_steps": 0.005782365798950195, "step": 734} +{"info/global_step": 735, "train_info/time_within_train_step": 28.024093866348267, "step": 735} +{"train_info/time_between_train_steps": 0.0072209835052490234, "step": 735} +{"info/global_step": 736, "train_info/time_within_train_step": 27.848414659500122, "step": 736} +{"train_info/time_between_train_steps": 0.0059697628021240234, "step": 736} +{"info/global_step": 737, "train_info/time_within_train_step": 27.908924102783203, "step": 737} +{"train_info/time_between_train_steps": 0.005747556686401367, "step": 737} +{"info/global_step": 738, "train_info/time_within_train_step": 27.85844898223877, "step": 738} +{"train_info/time_between_train_steps": 0.012345075607299805, "step": 738} +{"info/global_step": 739, "train_info/time_within_train_step": 27.835562467575073, "step": 739} +{"train_info/time_between_train_steps": 0.0051648616790771484, "step": 739} +{"info/global_step": 740, "train_info/time_within_train_step": 27.956993341445923, "step": 740} +{"train_info/time_between_train_steps": 0.005357027053833008, "step": 740} +{"info/global_step": 741, "train_info/time_within_train_step": 27.86070728302002, "step": 741} +{"train_info/time_between_train_steps": 0.009363651275634766, "step": 741} +{"info/global_step": 742, "train_info/time_within_train_step": 27.852092504501343, "step": 742} +{"train_info/time_between_train_steps": 0.005480766296386719, "step": 742} +{"info/global_step": 743, "train_info/time_within_train_step": 27.811574459075928, "step": 743} +{"train_info/time_between_train_steps": 0.0054798126220703125, "step": 743} +{"info/global_step": 744, "train_info/time_within_train_step": 27.834659814834595, "step": 744} +{"train_info/time_between_train_steps": 0.005580902099609375, "step": 744} +{"info/global_step": 745, "train_info/time_within_train_step": 27.873377084732056, "step": 745} +{"train_info/time_between_train_steps": 0.005263328552246094, "step": 745} +{"info/global_step": 746, "train_info/time_within_train_step": 27.86015248298645, "step": 746} +{"train_info/time_between_train_steps": 0.005394697189331055, "step": 746} +{"info/global_step": 747, "train_info/time_within_train_step": 27.80195188522339, "step": 747} +{"train_info/time_between_train_steps": 0.0056552886962890625, "step": 747} +{"info/global_step": 748, "train_info/time_within_train_step": 27.821431159973145, "step": 748} +{"train_info/time_between_train_steps": 0.005414247512817383, "step": 748} +{"info/global_step": 749, "train_info/time_within_train_step": 27.798651933670044, "step": 749} +{"train_info/time_between_train_steps": 0.005332231521606445, "step": 749} +{"info/global_step": 750, "train_info/time_within_train_step": 27.93459725379944, "step": 750} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1740845045, "_runtime": 21851}, "step": 750} +{"logs": {"train/loss": 3.4742, "train/learning_rate": 0.00025, "train/epoch": 27.02, "_timestamp": 1740845045, "_runtime": 21851}, "step": 750} +{"train_info/time_between_train_steps": 0.01610708236694336, "step": 750} +{"info/global_step": 751, "train_info/time_within_train_step": 27.817842721939087, "step": 751} +{"train_info/time_between_train_steps": 0.005716800689697266, "step": 751} +{"info/global_step": 752, "train_info/time_within_train_step": 27.887842655181885, "step": 752} +{"train_info/time_between_train_steps": 0.008172273635864258, "step": 752} +{"info/global_step": 753, "train_info/time_within_train_step": 27.837313652038574, "step": 753} +{"train_info/time_between_train_steps": 0.006654977798461914, "step": 753} +{"info/global_step": 754, "train_info/time_within_train_step": 27.875598907470703, "step": 754} +{"train_info/time_between_train_steps": 0.00548243522644043, "step": 754} +{"info/global_step": 755, "train_info/time_within_train_step": 27.973631381988525, "step": 755} +{"train_info/time_between_train_steps": 0.009785652160644531, "step": 755} +{"info/global_step": 756, "train_info/time_within_train_step": 28.048564672470093, "step": 756} +{"train_info/time_between_train_steps": 0.007142066955566406, "step": 756} +{"train_info/time_between_train_steps": 31.18745994567871, "step": 756} +{"info/global_step": 757, "train_info/time_within_train_step": 27.84940791130066, "step": 757} +{"train_info/time_between_train_steps": 0.010415792465209961, "step": 757} +{"info/global_step": 758, "train_info/time_within_train_step": 28.09257483482361, "step": 758} +{"train_info/time_between_train_steps": 0.0073146820068359375, "step": 758} +{"info/global_step": 759, "train_info/time_within_train_step": 27.90846085548401, "step": 759} +{"train_info/time_between_train_steps": 0.00824117660522461, "step": 759} +{"info/global_step": 760, "train_info/time_within_train_step": 28.03617525100708, "step": 760} +{"train_info/time_between_train_steps": 0.010481834411621094, "step": 760} +{"info/global_step": 761, "train_info/time_within_train_step": 27.843754053115845, "step": 761} +{"train_info/time_between_train_steps": 0.006067514419555664, "step": 761} +{"info/global_step": 762, "train_info/time_within_train_step": 28.058298110961914, "step": 762} +{"train_info/time_between_train_steps": 0.006216526031494141, "step": 762} +{"info/global_step": 763, "train_info/time_within_train_step": 27.902299642562866, "step": 763} +{"train_info/time_between_train_steps": 0.0058765411376953125, "step": 763} +{"info/global_step": 764, "train_info/time_within_train_step": 28.002859115600586, "step": 764} +{"train_info/time_between_train_steps": 0.0066301822662353516, "step": 764} +{"info/global_step": 765, "train_info/time_within_train_step": 27.903735160827637, "step": 765} +{"train_info/time_between_train_steps": 0.008172273635864258, "step": 765} +{"info/global_step": 766, "train_info/time_within_train_step": 27.89102029800415, "step": 766} +{"train_info/time_between_train_steps": 0.010329723358154297, "step": 766} +{"info/global_step": 767, "train_info/time_within_train_step": 27.889973878860474, "step": 767} +{"train_info/time_between_train_steps": 0.007848501205444336, "step": 767} +{"info/global_step": 768, "train_info/time_within_train_step": 27.906309127807617, "step": 768} +{"train_info/time_between_train_steps": 0.009578466415405273, "step": 768} +{"info/global_step": 769, "train_info/time_within_train_step": 27.903334379196167, "step": 769} +{"train_info/time_between_train_steps": 0.009097576141357422, "step": 769} +{"info/global_step": 770, "train_info/time_within_train_step": 27.99630880355835, "step": 770} +{"train_info/time_between_train_steps": 0.0062618255615234375, "step": 770} +{"info/global_step": 771, "train_info/time_within_train_step": 27.94390320777893, "step": 771} +{"train_info/time_between_train_steps": 0.005483388900756836, "step": 771} +{"info/global_step": 772, "train_info/time_within_train_step": 27.891854763031006, "step": 772} +{"train_info/time_between_train_steps": 0.005778789520263672, "step": 772} +{"info/global_step": 773, "train_info/time_within_train_step": 27.890225410461426, "step": 773} +{"train_info/time_between_train_steps": 0.005999088287353516, "step": 773} +{"info/global_step": 774, "train_info/time_within_train_step": 27.88193130493164, "step": 774} +{"train_info/time_between_train_steps": 0.00578618049621582, "step": 774} +{"info/global_step": 775, "train_info/time_within_train_step": 27.85020112991333, "step": 775} +{"train_info/time_between_train_steps": 0.005788087844848633, "step": 775} +{"info/global_step": 776, "train_info/time_within_train_step": 27.88096022605896, "step": 776} +{"train_info/time_between_train_steps": 0.00575709342956543, "step": 776} +{"info/global_step": 777, "train_info/time_within_train_step": 27.898780822753906, "step": 777} +{"train_info/time_between_train_steps": 0.005520820617675781, "step": 777} +{"info/global_step": 778, "train_info/time_within_train_step": 27.864516258239746, "step": 778} +{"train_info/time_between_train_steps": 0.013519763946533203, "step": 778} +{"info/global_step": 779, "train_info/time_within_train_step": 27.892046451568604, "step": 779} +{"train_info/time_between_train_steps": 0.00619816780090332, "step": 779} +{"info/global_step": 780, "train_info/time_within_train_step": 27.92564296722412, "step": 780} +{"train_info/time_between_train_steps": 0.006886005401611328, "step": 780} +{"info/global_step": 781, "train_info/time_within_train_step": 27.914897918701172, "step": 781} +{"train_info/time_between_train_steps": 0.007615327835083008, "step": 781} +{"info/global_step": 782, "train_info/time_within_train_step": 27.889331102371216, "step": 782} +{"train_info/time_between_train_steps": 0.0067713260650634766, "step": 782} +{"info/global_step": 783, "train_info/time_within_train_step": 27.93960428237915, "step": 783} +{"train_info/time_between_train_steps": 0.0071446895599365234, "step": 783} +{"train_info/time_between_train_steps": 29.935412406921387, "step": 783} +{"info/global_step": 784, "train_info/time_within_train_step": 27.83754277229309, "step": 784} +{"train_info/time_between_train_steps": 0.005106687545776367, "step": 784} +{"info/global_step": 785, "train_info/time_within_train_step": 28.071072816848755, "step": 785} +{"train_info/time_between_train_steps": 0.005331754684448242, "step": 785} +{"info/global_step": 786, "train_info/time_within_train_step": 27.82002902030945, "step": 786} +{"train_info/time_between_train_steps": 0.00952601432800293, "step": 786} +{"info/global_step": 787, "train_info/time_within_train_step": 27.99164605140686, "step": 787} +{"train_info/time_between_train_steps": 0.0057756900787353516, "step": 787} +{"info/global_step": 788, "train_info/time_within_train_step": 27.868584394454956, "step": 788} +{"train_info/time_between_train_steps": 0.005635738372802734, "step": 788} +{"info/global_step": 789, "train_info/time_within_train_step": 27.950015783309937, "step": 789} +{"train_info/time_between_train_steps": 0.005770444869995117, "step": 789} +{"info/global_step": 790, "train_info/time_within_train_step": 27.878870010375977, "step": 790} +{"train_info/time_between_train_steps": 0.011434078216552734, "step": 790} +{"info/global_step": 791, "train_info/time_within_train_step": 27.923210620880127, "step": 791} +{"train_info/time_between_train_steps": 0.005407094955444336, "step": 791} +{"info/global_step": 792, "train_info/time_within_train_step": 27.85324454307556, "step": 792} +{"train_info/time_between_train_steps": 0.005499601364135742, "step": 792} +{"info/global_step": 793, "train_info/time_within_train_step": 27.85214114189148, "step": 793} +{"train_info/time_between_train_steps": 0.007410764694213867, "step": 793} +{"info/global_step": 794, "train_info/time_within_train_step": 27.866066455841064, "step": 794} +{"train_info/time_between_train_steps": 0.0056035518646240234, "step": 794} +{"info/global_step": 795, "train_info/time_within_train_step": 27.808539628982544, "step": 795} +{"train_info/time_between_train_steps": 0.005415678024291992, "step": 795} +{"info/global_step": 796, "train_info/time_within_train_step": 27.867923974990845, "step": 796} +{"train_info/time_between_train_steps": 0.0061779022216796875, "step": 796} +{"info/global_step": 797, "train_info/time_within_train_step": 27.91459059715271, "step": 797} +{"train_info/time_between_train_steps": 0.008167505264282227, "step": 797} +{"info/global_step": 798, "train_info/time_within_train_step": 27.83493995666504, "step": 798} +{"train_info/time_between_train_steps": 0.0053863525390625, "step": 798} +{"info/global_step": 799, "train_info/time_within_train_step": 27.89142107963562, "step": 799} +{"train_info/time_between_train_steps": 0.005636692047119141, "step": 799} +{"info/global_step": 800, "train_info/time_within_train_step": 27.869879245758057, "step": 800} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1740846502, "_runtime": 23308}, "step": 800} +{"logs": {"train/loss": 3.411, "train/learning_rate": 0.00022222222222222218, "train/epoch": 29.01, "_timestamp": 1740846502, "_runtime": 23308}, "step": 800} +{"train_info/time_between_train_steps": 13.632590293884277, "step": 800} +{"info/global_step": 801, "train_info/time_within_train_step": 27.944470167160034, "step": 801} +{"train_info/time_between_train_steps": 0.008341073989868164, "step": 801} +{"info/global_step": 802, "train_info/time_within_train_step": 27.84864592552185, "step": 802} +{"train_info/time_between_train_steps": 0.010160684585571289, "step": 802} +{"info/global_step": 803, "train_info/time_within_train_step": 27.81823706626892, "step": 803} +{"train_info/time_between_train_steps": 0.0056040287017822266, "step": 803} +{"info/global_step": 804, "train_info/time_within_train_step": 27.825165510177612, "step": 804} +{"train_info/time_between_train_steps": 0.007863521575927734, "step": 804} +{"info/global_step": 805, "train_info/time_within_train_step": 27.847384214401245, "step": 805} +{"train_info/time_between_train_steps": 0.007199525833129883, "step": 805} +{"info/global_step": 806, "train_info/time_within_train_step": 27.822221040725708, "step": 806} +{"train_info/time_between_train_steps": 0.0054073333740234375, "step": 806} +{"info/global_step": 807, "train_info/time_within_train_step": 27.86064863204956, "step": 807} +{"train_info/time_between_train_steps": 0.005377769470214844, "step": 807} +{"info/global_step": 808, "train_info/time_within_train_step": 27.897339582443237, "step": 808} +{"train_info/time_between_train_steps": 0.00710749626159668, "step": 808} +{"info/global_step": 809, "train_info/time_within_train_step": 27.89056086540222, "step": 809} +{"train_info/time_between_train_steps": 0.005780935287475586, "step": 809} +{"info/global_step": 810, "train_info/time_within_train_step": 27.896753311157227, "step": 810} +{"train_info/time_between_train_steps": 0.0063533782958984375, "step": 810} +{"train_info/time_between_train_steps": 30.002497673034668, "step": 810} +{"info/global_step": 811, "train_info/time_within_train_step": 27.833038806915283, "step": 811} +{"train_info/time_between_train_steps": 0.005513906478881836, "step": 811} +{"info/global_step": 812, "train_info/time_within_train_step": 27.972973823547363, "step": 812} +{"train_info/time_between_train_steps": 0.005467653274536133, "step": 812} +{"info/global_step": 813, "train_info/time_within_train_step": 27.88308596611023, "step": 813} +{"train_info/time_between_train_steps": 0.005348682403564453, "step": 813} +{"info/global_step": 814, "train_info/time_within_train_step": 28.01780652999878, "step": 814} +{"train_info/time_between_train_steps": 0.006265163421630859, "step": 814} +{"info/global_step": 815, "train_info/time_within_train_step": 27.94503402709961, "step": 815} +{"train_info/time_between_train_steps": 0.005680084228515625, "step": 815} +{"info/global_step": 816, "train_info/time_within_train_step": 27.986271619796753, "step": 816} +{"train_info/time_between_train_steps": 0.006465911865234375, "step": 816} +{"info/global_step": 817, "train_info/time_within_train_step": 27.84039044380188, "step": 817} +{"train_info/time_between_train_steps": 0.0086517333984375, "step": 817} +{"info/global_step": 818, "train_info/time_within_train_step": 27.9368999004364, "step": 818} +{"train_info/time_between_train_steps": 0.0055789947509765625, "step": 818} +{"info/global_step": 819, "train_info/time_within_train_step": 27.814674854278564, "step": 819} +{"train_info/time_between_train_steps": 0.005623579025268555, "step": 819} +{"info/global_step": 820, "train_info/time_within_train_step": 27.84053659439087, "step": 820} +{"train_info/time_between_train_steps": 0.005318880081176758, "step": 820} +{"info/global_step": 821, "train_info/time_within_train_step": 27.83417582511902, "step": 821} +{"train_info/time_between_train_steps": 0.005236625671386719, "step": 821} +{"info/global_step": 822, "train_info/time_within_train_step": 27.824065446853638, "step": 822} +{"train_info/time_between_train_steps": 0.005393028259277344, "step": 822} +{"info/global_step": 823, "train_info/time_within_train_step": 27.849738121032715, "step": 823} +{"train_info/time_between_train_steps": 0.005471944808959961, "step": 823} +{"info/global_step": 824, "train_info/time_within_train_step": 28.09470772743225, "step": 824} +{"train_info/time_between_train_steps": 0.005274772644042969, "step": 824} +{"info/global_step": 825, "train_info/time_within_train_step": 27.868917226791382, "step": 825} +{"train_info/time_between_train_steps": 0.006362199783325195, "step": 825} +{"info/global_step": 826, "train_info/time_within_train_step": 27.847087621688843, "step": 826} +{"train_info/time_between_train_steps": 0.007653951644897461, "step": 826} +{"info/global_step": 827, "train_info/time_within_train_step": 27.834003925323486, "step": 827} +{"train_info/time_between_train_steps": 0.005537748336791992, "step": 827} +{"info/global_step": 828, "train_info/time_within_train_step": 27.875441074371338, "step": 828} +{"train_info/time_between_train_steps": 0.006571531295776367, "step": 828} +{"info/global_step": 829, "train_info/time_within_train_step": 27.951153993606567, "step": 829} +{"train_info/time_between_train_steps": 0.006552696228027344, "step": 829} +{"info/global_step": 830, "train_info/time_within_train_step": 27.887980222702026, "step": 830} +{"train_info/time_between_train_steps": 0.0055272579193115234, "step": 830} +{"info/global_step": 831, "train_info/time_within_train_step": 27.983684062957764, "step": 831} +{"train_info/time_between_train_steps": 0.007896900177001953, "step": 831} +{"info/global_step": 832, "train_info/time_within_train_step": 27.795000791549683, "step": 832} +{"train_info/time_between_train_steps": 0.005428314208984375, "step": 832} +{"info/global_step": 833, "train_info/time_within_train_step": 27.824674367904663, "step": 833} +{"train_info/time_between_train_steps": 0.0051991939544677734, "step": 833} +{"info/global_step": 834, "train_info/time_within_train_step": 27.846629858016968, "step": 834} +{"train_info/time_between_train_steps": 0.005628824234008789, "step": 834} +{"info/global_step": 835, "train_info/time_within_train_step": 27.85027313232422, "step": 835} +{"train_info/time_between_train_steps": 0.005881071090698242, "step": 835} +{"info/global_step": 836, "train_info/time_within_train_step": 27.874098777770996, "step": 836} +{"train_info/time_between_train_steps": 0.006137847900390625, "step": 836} +{"info/global_step": 837, "train_info/time_within_train_step": 27.8704514503479, "step": 837} +{"train_info/time_between_train_steps": 0.005980253219604492, "step": 837} +{"train_info/time_between_train_steps": 30.212310314178467, "step": 837} +{"info/global_step": 838, "train_info/time_within_train_step": 27.839590072631836, "step": 838} +{"train_info/time_between_train_steps": 0.00644230842590332, "step": 838} +{"info/global_step": 839, "train_info/time_within_train_step": 27.932167768478394, "step": 839} +{"train_info/time_between_train_steps": 0.0051116943359375, "step": 839} +{"info/global_step": 840, "train_info/time_within_train_step": 27.786500215530396, "step": 840} +{"train_info/time_between_train_steps": 0.005320072174072266, "step": 840} +{"info/global_step": 841, "train_info/time_within_train_step": 27.960594177246094, "step": 841} +{"train_info/time_between_train_steps": 0.008157968521118164, "step": 841} +{"info/global_step": 842, "train_info/time_within_train_step": 27.945752382278442, "step": 842} +{"train_info/time_between_train_steps": 0.005875587463378906, "step": 842} +{"info/global_step": 843, "train_info/time_within_train_step": 28.019387245178223, "step": 843} +{"train_info/time_between_train_steps": 0.005794048309326172, "step": 843} +{"info/global_step": 844, "train_info/time_within_train_step": 27.88164973258972, "step": 844} +{"train_info/time_between_train_steps": 0.005819082260131836, "step": 844} +{"info/global_step": 845, "train_info/time_within_train_step": 28.169129848480225, "step": 845} +{"train_info/time_between_train_steps": 0.00835561752319336, "step": 845} +{"info/global_step": 846, "train_info/time_within_train_step": 27.895224809646606, "step": 846} +{"train_info/time_between_train_steps": 0.005563974380493164, "step": 846} +{"info/global_step": 847, "train_info/time_within_train_step": 27.836427688598633, "step": 847} +{"train_info/time_between_train_steps": 0.006301164627075195, "step": 847} +{"info/global_step": 848, "train_info/time_within_train_step": 27.78814125061035, "step": 848} +{"train_info/time_between_train_steps": 0.005513191223144531, "step": 848} +{"info/global_step": 849, "train_info/time_within_train_step": 27.79324984550476, "step": 849} +{"train_info/time_between_train_steps": 0.005435466766357422, "step": 849} +{"info/global_step": 850, "train_info/time_within_train_step": 27.79348111152649, "step": 850} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1740847971, "_runtime": 24777}, "step": 850} +{"logs": {"train/loss": 3.3501, "train/learning_rate": 0.00019444444444444443, "train/epoch": 31.01, "_timestamp": 1740847971, "_runtime": 24777}, "step": 850} +{"train_info/time_between_train_steps": 0.009329795837402344, "step": 850} +{"info/global_step": 851, "train_info/time_within_train_step": 27.8274085521698, "step": 851} +{"train_info/time_between_train_steps": 0.005321979522705078, "step": 851} +{"info/global_step": 852, "train_info/time_within_train_step": 27.848379611968994, "step": 852} +{"train_info/time_between_train_steps": 0.005632877349853516, "step": 852} +{"info/global_step": 853, "train_info/time_within_train_step": 27.828104496002197, "step": 853} +{"train_info/time_between_train_steps": 0.00905466079711914, "step": 853} +{"info/global_step": 854, "train_info/time_within_train_step": 27.853922843933105, "step": 854} +{"train_info/time_between_train_steps": 0.008702754974365234, "step": 854} +{"info/global_step": 855, "train_info/time_within_train_step": 27.864034414291382, "step": 855} +{"train_info/time_between_train_steps": 0.005446195602416992, "step": 855} +{"info/global_step": 856, "train_info/time_within_train_step": 27.859248876571655, "step": 856} +{"train_info/time_between_train_steps": 0.0055561065673828125, "step": 856} +{"info/global_step": 857, "train_info/time_within_train_step": 27.856409072875977, "step": 857} +{"train_info/time_between_train_steps": 0.005521059036254883, "step": 857} +{"info/global_step": 858, "train_info/time_within_train_step": 27.85268259048462, "step": 858} +{"train_info/time_between_train_steps": 0.0054967403411865234, "step": 858} +{"info/global_step": 859, "train_info/time_within_train_step": 27.83344268798828, "step": 859} +{"train_info/time_between_train_steps": 0.01093745231628418, "step": 859} +{"info/global_step": 860, "train_info/time_within_train_step": 27.814224243164062, "step": 860} +{"train_info/time_between_train_steps": 0.00598454475402832, "step": 860} +{"info/global_step": 861, "train_info/time_within_train_step": 27.999088525772095, "step": 861} +{"train_info/time_between_train_steps": 0.005361795425415039, "step": 861} +{"info/global_step": 862, "train_info/time_within_train_step": 27.85371446609497, "step": 862} +{"train_info/time_between_train_steps": 0.005919456481933594, "step": 862} +{"info/global_step": 863, "train_info/time_within_train_step": 27.832655668258667, "step": 863} +{"train_info/time_between_train_steps": 0.005662202835083008, "step": 863} +{"info/global_step": 864, "train_info/time_within_train_step": 27.876856088638306, "step": 864} +{"train_info/time_between_train_steps": 0.006234645843505859, "step": 864} +{"train_info/time_between_train_steps": 30.20665454864502, "step": 864} +{"info/global_step": 865, "train_info/time_within_train_step": 27.81935691833496, "step": 865} +{"train_info/time_between_train_steps": 0.005307912826538086, "step": 865} +{"info/global_step": 866, "train_info/time_within_train_step": 27.972630262374878, "step": 866} +{"train_info/time_between_train_steps": 0.0056798458099365234, "step": 866} +{"info/global_step": 867, "train_info/time_within_train_step": 27.878029108047485, "step": 867} +{"train_info/time_between_train_steps": 0.00807809829711914, "step": 867} +{"info/global_step": 868, "train_info/time_within_train_step": 27.964282512664795, "step": 868} +{"train_info/time_between_train_steps": 0.005578279495239258, "step": 868} +{"info/global_step": 869, "train_info/time_within_train_step": 27.811034202575684, "step": 869} +{"train_info/time_between_train_steps": 0.00835561752319336, "step": 869} +{"info/global_step": 870, "train_info/time_within_train_step": 27.91804552078247, "step": 870} +{"train_info/time_between_train_steps": 0.008258342742919922, "step": 870} +{"info/global_step": 871, "train_info/time_within_train_step": 27.827407121658325, "step": 871} +{"train_info/time_between_train_steps": 0.0056726932525634766, "step": 871} +{"info/global_step": 872, "train_info/time_within_train_step": 27.92923069000244, "step": 872} +{"train_info/time_between_train_steps": 0.009870052337646484, "step": 872} +{"info/global_step": 873, "train_info/time_within_train_step": 27.910048723220825, "step": 873} +{"train_info/time_between_train_steps": 0.0056231021881103516, "step": 873} +{"info/global_step": 874, "train_info/time_within_train_step": 27.870726585388184, "step": 874} +{"train_info/time_between_train_steps": 0.005427837371826172, "step": 874} +{"info/global_step": 875, "train_info/time_within_train_step": 27.871641874313354, "step": 875} +{"train_info/time_between_train_steps": 0.0053806304931640625, "step": 875} +{"info/global_step": 876, "train_info/time_within_train_step": 27.96159267425537, "step": 876} +{"train_info/time_between_train_steps": 0.005473136901855469, "step": 876} +{"info/global_step": 877, "train_info/time_within_train_step": 27.84999132156372, "step": 877} +{"train_info/time_between_train_steps": 0.005327939987182617, "step": 877} +{"info/global_step": 878, "train_info/time_within_train_step": 27.78445601463318, "step": 878} +{"train_info/time_between_train_steps": 0.0053522586822509766, "step": 878} +{"info/global_step": 879, "train_info/time_within_train_step": 27.785872220993042, "step": 879} +{"train_info/time_between_train_steps": 0.005541801452636719, "step": 879} +{"info/global_step": 880, "train_info/time_within_train_step": 27.78122878074646, "step": 880} +{"train_info/time_between_train_steps": 0.0053899288177490234, "step": 880} +{"info/global_step": 881, "train_info/time_within_train_step": 27.85050106048584, "step": 881} +{"train_info/time_between_train_steps": 0.00973057746887207, "step": 881} +{"info/global_step": 882, "train_info/time_within_train_step": 27.86555242538452, "step": 882} +{"train_info/time_between_train_steps": 0.008922338485717773, "step": 882} +{"info/global_step": 883, "train_info/time_within_train_step": 27.867830276489258, "step": 883} +{"train_info/time_between_train_steps": 0.0055277347564697266, "step": 883} +{"info/global_step": 884, "train_info/time_within_train_step": 27.838948249816895, "step": 884} +{"train_info/time_between_train_steps": 0.0053730010986328125, "step": 884} +{"info/global_step": 885, "train_info/time_within_train_step": 27.851752281188965, "step": 885} +{"train_info/time_between_train_steps": 0.005502223968505859, "step": 885} +{"info/global_step": 886, "train_info/time_within_train_step": 27.835376739501953, "step": 886} +{"train_info/time_between_train_steps": 0.005656003952026367, "step": 886} +{"info/global_step": 887, "train_info/time_within_train_step": 27.792518854141235, "step": 887} +{"train_info/time_between_train_steps": 0.005277872085571289, "step": 887} +{"info/global_step": 888, "train_info/time_within_train_step": 27.83087944984436, "step": 888} +{"train_info/time_between_train_steps": 0.005615711212158203, "step": 888} +{"info/global_step": 889, "train_info/time_within_train_step": 27.853187084197998, "step": 889} +{"train_info/time_between_train_steps": 0.00536036491394043, "step": 889} +{"info/global_step": 890, "train_info/time_within_train_step": 27.877115726470947, "step": 890} +{"train_info/time_between_train_steps": 0.00929570198059082, "step": 890} +{"info/global_step": 891, "train_info/time_within_train_step": 27.93033456802368, "step": 891} +{"train_info/time_between_train_steps": 0.006345272064208984, "step": 891} +{"train_info/time_between_train_steps": 29.828296422958374, "step": 891} +{"info/global_step": 892, "train_info/time_within_train_step": 27.84209704399109, "step": 892} +{"train_info/time_between_train_steps": 0.0053539276123046875, "step": 892} +{"info/global_step": 893, "train_info/time_within_train_step": 27.959561109542847, "step": 893} +{"train_info/time_between_train_steps": 0.0052776336669921875, "step": 893} +{"info/global_step": 894, "train_info/time_within_train_step": 27.855016469955444, "step": 894} +{"train_info/time_between_train_steps": 0.005316734313964844, "step": 894} +{"info/global_step": 895, "train_info/time_within_train_step": 28.03976845741272, "step": 895} +{"train_info/time_between_train_steps": 0.005633115768432617, "step": 895} +{"info/global_step": 896, "train_info/time_within_train_step": 27.872852325439453, "step": 896} +{"train_info/time_between_train_steps": 0.005682468414306641, "step": 896} +{"info/global_step": 897, "train_info/time_within_train_step": 27.98095965385437, "step": 897} +{"train_info/time_between_train_steps": 0.005653858184814453, "step": 897} +{"info/global_step": 898, "train_info/time_within_train_step": 27.847965478897095, "step": 898} +{"train_info/time_between_train_steps": 0.010277986526489258, "step": 898} +{"info/global_step": 899, "train_info/time_within_train_step": 27.90568232536316, "step": 899} +{"train_info/time_between_train_steps": 0.005568265914916992, "step": 899} +{"info/global_step": 900, "train_info/time_within_train_step": 27.879136562347412, "step": 900} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1740849425, "_runtime": 26231}, "step": 900} +{"logs": {"train/loss": 3.2943, "train/learning_rate": 0.00016666666666666666, "train/epoch": 33.01, "_timestamp": 1740849425, "_runtime": 26231}, "step": 900} +{"train_info/time_between_train_steps": 13.594642639160156, "step": 900} +{"info/global_step": 901, "train_info/time_within_train_step": 27.785354137420654, "step": 901} +{"train_info/time_between_train_steps": 0.005606174468994141, "step": 901} +{"info/global_step": 902, "train_info/time_within_train_step": 27.806861400604248, "step": 902} +{"train_info/time_between_train_steps": 0.005379199981689453, "step": 902} +{"info/global_step": 903, "train_info/time_within_train_step": 27.87341284751892, "step": 903} +{"train_info/time_between_train_steps": 0.00684666633605957, "step": 903} +{"info/global_step": 904, "train_info/time_within_train_step": 27.925127744674683, "step": 904} +{"train_info/time_between_train_steps": 0.006621122360229492, "step": 904} +{"info/global_step": 905, "train_info/time_within_train_step": 27.87781310081482, "step": 905} +{"train_info/time_between_train_steps": 0.00558781623840332, "step": 905} +{"info/global_step": 906, "train_info/time_within_train_step": 27.910998821258545, "step": 906} +{"train_info/time_between_train_steps": 0.0054836273193359375, "step": 906} +{"info/global_step": 907, "train_info/time_within_train_step": 27.776922464370728, "step": 907} +{"train_info/time_between_train_steps": 0.005484104156494141, "step": 907} +{"info/global_step": 908, "train_info/time_within_train_step": 27.787414073944092, "step": 908} +{"train_info/time_between_train_steps": 0.005537748336791992, "step": 908} +{"info/global_step": 909, "train_info/time_within_train_step": 27.873125553131104, "step": 909} +{"train_info/time_between_train_steps": 0.005519390106201172, "step": 909} +{"info/global_step": 910, "train_info/time_within_train_step": 27.868340730667114, "step": 910} +{"train_info/time_between_train_steps": 0.005478382110595703, "step": 910} +{"info/global_step": 911, "train_info/time_within_train_step": 27.854341983795166, "step": 911} +{"train_info/time_between_train_steps": 0.00591278076171875, "step": 911} +{"info/global_step": 912, "train_info/time_within_train_step": 27.832974672317505, "step": 912} +{"train_info/time_between_train_steps": 0.005524158477783203, "step": 912} +{"info/global_step": 913, "train_info/time_within_train_step": 27.87399125099182, "step": 913} +{"train_info/time_between_train_steps": 0.007984161376953125, "step": 913} +{"info/global_step": 914, "train_info/time_within_train_step": 27.816967725753784, "step": 914} +{"train_info/time_between_train_steps": 0.005620241165161133, "step": 914} +{"info/global_step": 915, "train_info/time_within_train_step": 27.975001335144043, "step": 915} +{"train_info/time_between_train_steps": 0.005608558654785156, "step": 915} +{"info/global_step": 916, "train_info/time_within_train_step": 27.859930276870728, "step": 916} +{"train_info/time_between_train_steps": 0.005791902542114258, "step": 916} +{"info/global_step": 917, "train_info/time_within_train_step": 27.885084867477417, "step": 917} +{"train_info/time_between_train_steps": 0.0059816837310791016, "step": 917} +{"info/global_step": 918, "train_info/time_within_train_step": 27.864380598068237, "step": 918} +{"train_info/time_between_train_steps": 0.005896568298339844, "step": 918} +{"train_info/time_between_train_steps": 30.08806300163269, "step": 918} +{"info/global_step": 919, "train_info/time_within_train_step": 27.84047532081604, "step": 919} +{"train_info/time_between_train_steps": 0.0053102970123291016, "step": 919} +{"info/global_step": 920, "train_info/time_within_train_step": 28.24191427230835, "step": 920} +{"train_info/time_between_train_steps": 0.00562739372253418, "step": 920} +{"info/global_step": 921, "train_info/time_within_train_step": 27.94611430168152, "step": 921} +{"train_info/time_between_train_steps": 0.0071794986724853516, "step": 921} +{"info/global_step": 922, "train_info/time_within_train_step": 28.055168628692627, "step": 922} +{"train_info/time_between_train_steps": 0.005667924880981445, "step": 922} +{"info/global_step": 923, "train_info/time_within_train_step": 27.876217365264893, "step": 923} +{"train_info/time_between_train_steps": 0.006641387939453125, "step": 923} +{"info/global_step": 924, "train_info/time_within_train_step": 28.073357343673706, "step": 924} +{"train_info/time_between_train_steps": 0.006000518798828125, "step": 924} +{"info/global_step": 925, "train_info/time_within_train_step": 27.82821011543274, "step": 925} +{"train_info/time_between_train_steps": 0.010487794876098633, "step": 925} +{"info/global_step": 926, "train_info/time_within_train_step": 27.866421222686768, "step": 926} +{"train_info/time_between_train_steps": 0.005535602569580078, "step": 926} +{"info/global_step": 927, "train_info/time_within_train_step": 27.87462830543518, "step": 927} +{"train_info/time_between_train_steps": 0.005656242370605469, "step": 927} +{"info/global_step": 928, "train_info/time_within_train_step": 27.866241216659546, "step": 928} +{"train_info/time_between_train_steps": 0.007809638977050781, "step": 928} +{"info/global_step": 929, "train_info/time_within_train_step": 27.86361575126648, "step": 929} +{"train_info/time_between_train_steps": 0.005572795867919922, "step": 929} +{"info/global_step": 930, "train_info/time_within_train_step": 27.865079164505005, "step": 930} +{"train_info/time_between_train_steps": 0.005558013916015625, "step": 930} +{"info/global_step": 931, "train_info/time_within_train_step": 27.912596940994263, "step": 931} +{"train_info/time_between_train_steps": 0.006883144378662109, "step": 931} +{"info/global_step": 932, "train_info/time_within_train_step": 27.825610399246216, "step": 932} +{"train_info/time_between_train_steps": 0.005259037017822266, "step": 932} +{"info/global_step": 933, "train_info/time_within_train_step": 27.792192935943604, "step": 933} +{"train_info/time_between_train_steps": 0.005543708801269531, "step": 933} +{"info/global_step": 934, "train_info/time_within_train_step": 27.842503547668457, "step": 934} +{"train_info/time_between_train_steps": 0.010088205337524414, "step": 934} +{"info/global_step": 935, "train_info/time_within_train_step": 27.91608762741089, "step": 935} +{"train_info/time_between_train_steps": 0.006273984909057617, "step": 935} +{"info/global_step": 936, "train_info/time_within_train_step": 27.961133241653442, "step": 936} +{"train_info/time_between_train_steps": 0.005648136138916016, "step": 936} +{"info/global_step": 937, "train_info/time_within_train_step": 27.852683067321777, "step": 937} +{"train_info/time_between_train_steps": 0.005982637405395508, "step": 937} +{"info/global_step": 938, "train_info/time_within_train_step": 27.890281915664673, "step": 938} +{"train_info/time_between_train_steps": 0.0055637359619140625, "step": 938} +{"info/global_step": 939, "train_info/time_within_train_step": 27.78810739517212, "step": 939} +{"train_info/time_between_train_steps": 0.005349636077880859, "step": 939} +{"info/global_step": 940, "train_info/time_within_train_step": 27.81560516357422, "step": 940} +{"train_info/time_between_train_steps": 0.005616903305053711, "step": 940} +{"info/global_step": 941, "train_info/time_within_train_step": 27.8749942779541, "step": 941} +{"train_info/time_between_train_steps": 0.005735635757446289, "step": 941} +{"info/global_step": 942, "train_info/time_within_train_step": 27.84960913658142, "step": 942} +{"train_info/time_between_train_steps": 0.007908105850219727, "step": 942} +{"info/global_step": 943, "train_info/time_within_train_step": 27.907367706298828, "step": 943} +{"train_info/time_between_train_steps": 0.007205486297607422, "step": 943} +{"info/global_step": 944, "train_info/time_within_train_step": 27.881389141082764, "step": 944} +{"train_info/time_between_train_steps": 0.007912635803222656, "step": 944} +{"info/global_step": 945, "train_info/time_within_train_step": 27.841171979904175, "step": 945} +{"train_info/time_between_train_steps": 0.011546611785888672, "step": 945} +{"train_info/time_between_train_steps": 30.235855102539062, "step": 945} +{"info/global_step": 946, "train_info/time_within_train_step": 27.834509134292603, "step": 946} +{"train_info/time_between_train_steps": 0.0053102970123291016, "step": 946} +{"info/global_step": 947, "train_info/time_within_train_step": 27.93798017501831, "step": 947} +{"train_info/time_between_train_steps": 0.009975910186767578, "step": 947} +{"info/global_step": 948, "train_info/time_within_train_step": 27.85160255432129, "step": 948} +{"train_info/time_between_train_steps": 0.005678892135620117, "step": 948} +{"info/global_step": 949, "train_info/time_within_train_step": 27.945444345474243, "step": 949} +{"train_info/time_between_train_steps": 0.005383014678955078, "step": 949} +{"info/global_step": 950, "train_info/time_within_train_step": 27.816812992095947, "step": 950} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1740850893, "_runtime": 27699}, "step": 950} +{"logs": {"train/loss": 3.2435, "train/learning_rate": 0.0001388888888888889, "train/epoch": 35.0, "_timestamp": 1740850893, "_runtime": 27699}, "step": 950} +{"train_info/time_between_train_steps": 0.012539148330688477, "step": 950} +{"info/global_step": 951, "train_info/time_within_train_step": 28.213008165359497, "step": 951} +{"train_info/time_between_train_steps": 0.005849361419677734, "step": 951} +{"info/global_step": 952, "train_info/time_within_train_step": 27.844560861587524, "step": 952} +{"train_info/time_between_train_steps": 0.006222724914550781, "step": 952} +{"info/global_step": 953, "train_info/time_within_train_step": 27.901777982711792, "step": 953} +{"train_info/time_between_train_steps": 0.005450248718261719, "step": 953} +{"info/global_step": 954, "train_info/time_within_train_step": 27.829272747039795, "step": 954} +{"train_info/time_between_train_steps": 0.005466938018798828, "step": 954} +{"info/global_step": 955, "train_info/time_within_train_step": 27.76989459991455, "step": 955} +{"train_info/time_between_train_steps": 0.0051572322845458984, "step": 955} +{"info/global_step": 956, "train_info/time_within_train_step": 27.817236185073853, "step": 956} +{"train_info/time_between_train_steps": 0.005464076995849609, "step": 956} +{"info/global_step": 957, "train_info/time_within_train_step": 27.837953567504883, "step": 957} +{"train_info/time_between_train_steps": 0.005490541458129883, "step": 957} +{"info/global_step": 958, "train_info/time_within_train_step": 27.82840847969055, "step": 958} +{"train_info/time_between_train_steps": 0.00551295280456543, "step": 958} +{"info/global_step": 959, "train_info/time_within_train_step": 27.8504741191864, "step": 959} +{"train_info/time_between_train_steps": 0.005455493927001953, "step": 959} +{"info/global_step": 960, "train_info/time_within_train_step": 27.8455970287323, "step": 960} +{"train_info/time_between_train_steps": 0.005244016647338867, "step": 960} +{"info/global_step": 961, "train_info/time_within_train_step": 27.84359622001648, "step": 961} +{"train_info/time_between_train_steps": 0.005242109298706055, "step": 961} +{"info/global_step": 962, "train_info/time_within_train_step": 27.840276956558228, "step": 962} +{"train_info/time_between_train_steps": 0.0054302215576171875, "step": 962} +{"info/global_step": 963, "train_info/time_within_train_step": 27.85474967956543, "step": 963} +{"train_info/time_between_train_steps": 0.005485057830810547, "step": 963} +{"info/global_step": 964, "train_info/time_within_train_step": 27.84940481185913, "step": 964} +{"train_info/time_between_train_steps": 0.005455970764160156, "step": 964} +{"info/global_step": 965, "train_info/time_within_train_step": 27.793617963790894, "step": 965} +{"train_info/time_between_train_steps": 0.005644559860229492, "step": 965} +{"info/global_step": 966, "train_info/time_within_train_step": 27.793699502944946, "step": 966} +{"train_info/time_between_train_steps": 0.0052492618560791016, "step": 966} +{"info/global_step": 967, "train_info/time_within_train_step": 27.908201217651367, "step": 967} +{"train_info/time_between_train_steps": 0.005686759948730469, "step": 967} +{"info/global_step": 968, "train_info/time_within_train_step": 27.828054904937744, "step": 968} +{"train_info/time_between_train_steps": 0.005605459213256836, "step": 968} +{"info/global_step": 969, "train_info/time_within_train_step": 27.828906774520874, "step": 969} +{"train_info/time_between_train_steps": 0.008033514022827148, "step": 969} +{"info/global_step": 970, "train_info/time_within_train_step": 27.832195281982422, "step": 970} +{"train_info/time_between_train_steps": 0.005410432815551758, "step": 970} +{"info/global_step": 971, "train_info/time_within_train_step": 27.873929977416992, "step": 971} +{"train_info/time_between_train_steps": 0.007563352584838867, "step": 971} +{"info/global_step": 972, "train_info/time_within_train_step": 27.860097408294678, "step": 972} +{"train_info/time_between_train_steps": 0.006241559982299805, "step": 972} +{"train_info/time_between_train_steps": 30.24685311317444, "step": 972} +{"info/global_step": 973, "train_info/time_within_train_step": 27.8369300365448, "step": 973} +{"train_info/time_between_train_steps": 0.010146141052246094, "step": 973} +{"info/global_step": 974, "train_info/time_within_train_step": 27.988215923309326, "step": 974} +{"train_info/time_between_train_steps": 0.005430459976196289, "step": 974} +{"info/global_step": 975, "train_info/time_within_train_step": 27.897567987442017, "step": 975} +{"train_info/time_between_train_steps": 0.005642414093017578, "step": 975} +{"info/global_step": 976, "train_info/time_within_train_step": 28.09256935119629, "step": 976} +{"train_info/time_between_train_steps": 0.0056705474853515625, "step": 976} +{"info/global_step": 977, "train_info/time_within_train_step": 27.848443746566772, "step": 977} +{"train_info/time_between_train_steps": 0.005953311920166016, "step": 977} +{"info/global_step": 978, "train_info/time_within_train_step": 27.925939321517944, "step": 978} +{"train_info/time_between_train_steps": 0.010326385498046875, "step": 978} +{"info/global_step": 979, "train_info/time_within_train_step": 27.860217094421387, "step": 979} +{"train_info/time_between_train_steps": 0.005727529525756836, "step": 979} +{"info/global_step": 980, "train_info/time_within_train_step": 27.916947841644287, "step": 980} +{"train_info/time_between_train_steps": 0.012720346450805664, "step": 980} +{"info/global_step": 981, "train_info/time_within_train_step": 28.001051425933838, "step": 981} +{"train_info/time_between_train_steps": 0.005682468414306641, "step": 981} +{"info/global_step": 982, "train_info/time_within_train_step": 27.8027286529541, "step": 982} +{"train_info/time_between_train_steps": 0.005828142166137695, "step": 982} +{"info/global_step": 983, "train_info/time_within_train_step": 27.858275890350342, "step": 983} +{"train_info/time_between_train_steps": 0.00570225715637207, "step": 983} +{"info/global_step": 984, "train_info/time_within_train_step": 27.799275159835815, "step": 984} +{"train_info/time_between_train_steps": 0.005280733108520508, "step": 984} +{"info/global_step": 985, "train_info/time_within_train_step": 27.86628818511963, "step": 985} +{"train_info/time_between_train_steps": 0.0056247711181640625, "step": 985} +{"info/global_step": 986, "train_info/time_within_train_step": 27.881243467330933, "step": 986} +{"train_info/time_between_train_steps": 0.010731697082519531, "step": 986} +{"info/global_step": 987, "train_info/time_within_train_step": 27.86670756340027, "step": 987} +{"train_info/time_between_train_steps": 0.005743503570556641, "step": 987} +{"info/global_step": 988, "train_info/time_within_train_step": 27.864272832870483, "step": 988} +{"train_info/time_between_train_steps": 0.008594036102294922, "step": 988} +{"info/global_step": 989, "train_info/time_within_train_step": 27.837058305740356, "step": 989} +{"train_info/time_between_train_steps": 0.005663871765136719, "step": 989} +{"info/global_step": 990, "train_info/time_within_train_step": 27.860526084899902, "step": 990} +{"train_info/time_between_train_steps": 0.0057103633880615234, "step": 990} +{"info/global_step": 991, "train_info/time_within_train_step": 27.851246118545532, "step": 991} +{"train_info/time_between_train_steps": 0.010637521743774414, "step": 991} +{"info/global_step": 992, "train_info/time_within_train_step": 27.87400531768799, "step": 992} +{"train_info/time_between_train_steps": 0.005812406539916992, "step": 992} +{"info/global_step": 993, "train_info/time_within_train_step": 27.861785650253296, "step": 993} +{"train_info/time_between_train_steps": 0.005624294281005859, "step": 993} +{"info/global_step": 994, "train_info/time_within_train_step": 27.880800247192383, "step": 994} +{"train_info/time_between_train_steps": 0.009393930435180664, "step": 994} +{"info/global_step": 995, "train_info/time_within_train_step": 27.852824211120605, "step": 995} +{"train_info/time_between_train_steps": 0.0058383941650390625, "step": 995} +{"info/global_step": 996, "train_info/time_within_train_step": 27.8468074798584, "step": 996} +{"train_info/time_between_train_steps": 0.005615949630737305, "step": 996} +{"info/global_step": 997, "train_info/time_within_train_step": 27.945562601089478, "step": 997} +{"train_info/time_between_train_steps": 0.008113622665405273, "step": 997} +{"info/global_step": 998, "train_info/time_within_train_step": 27.87576389312744, "step": 998} +{"train_info/time_between_train_steps": 0.006328582763671875, "step": 998} +{"info/global_step": 999, "train_info/time_within_train_step": 27.886184453964233, "step": 999} +{"train_info/time_between_train_steps": 0.006242275238037109, "step": 999} +{"train_info/time_between_train_steps": 30.041179418563843, "step": 999} +{"info/global_step": 1000, "train_info/time_within_train_step": 27.8314106464386, "step": 1000} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1740852348, "_runtime": 29154}, "step": 1000} +{"logs": {"train/loss": 3.1955, "train/learning_rate": 0.00011111111111111109, "train/epoch": 37.0, "_timestamp": 1740852348, "_runtime": 29154}, "step": 1000} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1740852352, "_runtime": 29158}, "step": 1000} +{"logs": {"eval/loss": 4.1568193435668945, "eval/runtime": 3.9982, "eval/samples_per_second": 50.773, "eval/steps_per_second": 3.251, "train/epoch": 37.0, "_timestamp": 1740852352, "_runtime": 29158}, "step": 1000} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1740852352, "_runtime": 29158}, "step": 1000} +{"logs": {"eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_loss": 4.1568193435668945, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_ppl": 63.86805684825764, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_runtime": 3.9982, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 50.773, "train/epoch": 37.0, "_timestamp": 1740852352, "_runtime": 29158}, "step": 1000} +{"train_info/time_between_train_steps": 19.857935905456543, "step": 1000} +{"info/global_step": 1001, "train_info/time_within_train_step": 28.048337936401367, "step": 1001} +{"train_info/time_between_train_steps": 0.005533933639526367, "step": 1001} +{"info/global_step": 1002, "train_info/time_within_train_step": 27.810971975326538, "step": 1002} +{"train_info/time_between_train_steps": 0.005565643310546875, "step": 1002} +{"info/global_step": 1003, "train_info/time_within_train_step": 27.928410530090332, "step": 1003} +{"train_info/time_between_train_steps": 0.0058705806732177734, "step": 1003} +{"info/global_step": 1004, "train_info/time_within_train_step": 27.78273606300354, "step": 1004} +{"train_info/time_between_train_steps": 0.005552053451538086, "step": 1004} +{"info/global_step": 1005, "train_info/time_within_train_step": 27.96913480758667, "step": 1005} +{"train_info/time_between_train_steps": 0.005640983581542969, "step": 1005} +{"info/global_step": 1006, "train_info/time_within_train_step": 27.782578945159912, "step": 1006} +{"train_info/time_between_train_steps": 0.005731105804443359, "step": 1006} +{"info/global_step": 1007, "train_info/time_within_train_step": 27.903281211853027, "step": 1007} +{"train_info/time_between_train_steps": 0.0057566165924072266, "step": 1007} +{"info/global_step": 1008, "train_info/time_within_train_step": 27.868484258651733, "step": 1008} +{"train_info/time_between_train_steps": 0.00548553466796875, "step": 1008} +{"info/global_step": 1009, "train_info/time_within_train_step": 27.835386514663696, "step": 1009} +{"train_info/time_between_train_steps": 0.006518840789794922, "step": 1009} +{"info/global_step": 1010, "train_info/time_within_train_step": 27.880090713500977, "step": 1010} +{"train_info/time_between_train_steps": 0.005716562271118164, "step": 1010} +{"info/global_step": 1011, "train_info/time_within_train_step": 27.876535892486572, "step": 1011} +{"train_info/time_between_train_steps": 0.006664752960205078, "step": 1011} +{"info/global_step": 1012, "train_info/time_within_train_step": 28.073477745056152, "step": 1012} +{"train_info/time_between_train_steps": 0.006374359130859375, "step": 1012} +{"info/global_step": 1013, "train_info/time_within_train_step": 27.958845615386963, "step": 1013} +{"train_info/time_between_train_steps": 0.005364418029785156, "step": 1013} +{"info/global_step": 1014, "train_info/time_within_train_step": 27.81444025039673, "step": 1014} +{"train_info/time_between_train_steps": 0.005893230438232422, "step": 1014} +{"info/global_step": 1015, "train_info/time_within_train_step": 27.90073013305664, "step": 1015} +{"train_info/time_between_train_steps": 0.011296987533569336, "step": 1015} +{"info/global_step": 1016, "train_info/time_within_train_step": 27.851481676101685, "step": 1016} +{"train_info/time_between_train_steps": 0.0055084228515625, "step": 1016} +{"info/global_step": 1017, "train_info/time_within_train_step": 27.92061161994934, "step": 1017} +{"train_info/time_between_train_steps": 0.006071805953979492, "step": 1017} +{"info/global_step": 1018, "train_info/time_within_train_step": 27.890007734298706, "step": 1018} +{"train_info/time_between_train_steps": 0.005858659744262695, "step": 1018} +{"info/global_step": 1019, "train_info/time_within_train_step": 27.818965435028076, "step": 1019} +{"train_info/time_between_train_steps": 0.0056264400482177734, "step": 1019} +{"info/global_step": 1020, "train_info/time_within_train_step": 27.890594959259033, "step": 1020} +{"train_info/time_between_train_steps": 0.011065006256103516, "step": 1020} +{"info/global_step": 1021, "train_info/time_within_train_step": 27.88448405265808, "step": 1021} +{"train_info/time_between_train_steps": 0.005992889404296875, "step": 1021} +{"info/global_step": 1022, "train_info/time_within_train_step": 27.865936040878296, "step": 1022} +{"train_info/time_between_train_steps": 0.005408048629760742, "step": 1022} +{"info/global_step": 1023, "train_info/time_within_train_step": 27.86331558227539, "step": 1023} +{"train_info/time_between_train_steps": 0.0059070587158203125, "step": 1023} +{"info/global_step": 1024, "train_info/time_within_train_step": 27.834094285964966, "step": 1024} +{"train_info/time_between_train_steps": 0.005561113357543945, "step": 1024} +{"info/global_step": 1025, "train_info/time_within_train_step": 27.87486481666565, "step": 1025} +{"train_info/time_between_train_steps": 0.006667137145996094, "step": 1025} +{"info/global_step": 1026, "train_info/time_within_train_step": 27.889334201812744, "step": 1026} +{"train_info/time_between_train_steps": 0.005911588668823242, "step": 1026} +{"train_info/time_between_train_steps": 30.24150562286377, "step": 1026} +{"info/global_step": 1027, "train_info/time_within_train_step": 27.85293698310852, "step": 1027} +{"train_info/time_between_train_steps": 0.005644798278808594, "step": 1027} +{"info/global_step": 1028, "train_info/time_within_train_step": 28.022974729537964, "step": 1028} +{"train_info/time_between_train_steps": 0.009018421173095703, "step": 1028} +{"info/global_step": 1029, "train_info/time_within_train_step": 27.944273948669434, "step": 1029} +{"train_info/time_between_train_steps": 0.011678695678710938, "step": 1029} +{"info/global_step": 1030, "train_info/time_within_train_step": 28.052159309387207, "step": 1030} +{"train_info/time_between_train_steps": 0.010064125061035156, "step": 1030} +{"info/global_step": 1031, "train_info/time_within_train_step": 27.86879849433899, "step": 1031} +{"train_info/time_between_train_steps": 0.0056879520416259766, "step": 1031} +{"info/global_step": 1032, "train_info/time_within_train_step": 28.024484634399414, "step": 1032} +{"train_info/time_between_train_steps": 0.005800008773803711, "step": 1032} +{"info/global_step": 1033, "train_info/time_within_train_step": 27.8246910572052, "step": 1033} +{"train_info/time_between_train_steps": 0.005835294723510742, "step": 1033} +{"info/global_step": 1034, "train_info/time_within_train_step": 27.910962104797363, "step": 1034} +{"train_info/time_between_train_steps": 0.0055310726165771484, "step": 1034} +{"info/global_step": 1035, "train_info/time_within_train_step": 27.885451316833496, "step": 1035} +{"train_info/time_between_train_steps": 0.005283832550048828, "step": 1035} +{"info/global_step": 1036, "train_info/time_within_train_step": 27.88440179824829, "step": 1036} +{"train_info/time_between_train_steps": 0.010124683380126953, "step": 1036} +{"info/global_step": 1037, "train_info/time_within_train_step": 27.883018732070923, "step": 1037} +{"train_info/time_between_train_steps": 0.008087158203125, "step": 1037} +{"info/global_step": 1038, "train_info/time_within_train_step": 27.836538791656494, "step": 1038} +{"train_info/time_between_train_steps": 0.006565093994140625, "step": 1038} +{"info/global_step": 1039, "train_info/time_within_train_step": 27.848361492156982, "step": 1039} +{"train_info/time_between_train_steps": 0.009941339492797852, "step": 1039} +{"info/global_step": 1040, "train_info/time_within_train_step": 27.87360453605652, "step": 1040} +{"train_info/time_between_train_steps": 0.009968996047973633, "step": 1040} +{"info/global_step": 1041, "train_info/time_within_train_step": 27.832289218902588, "step": 1041} +{"train_info/time_between_train_steps": 0.005595207214355469, "step": 1041} +{"info/global_step": 1042, "train_info/time_within_train_step": 27.95836591720581, "step": 1042} +{"train_info/time_between_train_steps": 0.005435466766357422, "step": 1042} +{"info/global_step": 1043, "train_info/time_within_train_step": 27.886661052703857, "step": 1043} +{"train_info/time_between_train_steps": 0.008063793182373047, "step": 1043} +{"info/global_step": 1044, "train_info/time_within_train_step": 27.85505986213684, "step": 1044} +{"train_info/time_between_train_steps": 0.005542278289794922, "step": 1044} +{"info/global_step": 1045, "train_info/time_within_train_step": 27.841280221939087, "step": 1045} +{"train_info/time_between_train_steps": 0.008148431777954102, "step": 1045} +{"info/global_step": 1046, "train_info/time_within_train_step": 27.8406023979187, "step": 1046} +{"train_info/time_between_train_steps": 0.00542902946472168, "step": 1046} +{"info/global_step": 1047, "train_info/time_within_train_step": 27.847841024398804, "step": 1047} +{"train_info/time_between_train_steps": 0.005570650100708008, "step": 1047} +{"info/global_step": 1048, "train_info/time_within_train_step": 27.788437366485596, "step": 1048} +{"train_info/time_between_train_steps": 0.005515336990356445, "step": 1048} +{"info/global_step": 1049, "train_info/time_within_train_step": 27.81999659538269, "step": 1049} +{"train_info/time_between_train_steps": 0.005390167236328125, "step": 1049} +{"info/global_step": 1050, "train_info/time_within_train_step": 27.83267045021057, "step": 1050} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1740853793, "_runtime": 30599}, "step": 1050} +{"logs": {"train/loss": 3.0973, "train/learning_rate": 8.333333333333333e-05, "train/epoch": 38.02, "_timestamp": 1740853792, "_runtime": 30598}, "step": 1050} +{"train_info/time_between_train_steps": 0.010538339614868164, "step": 1050} +{"info/global_step": 1051, "train_info/time_within_train_step": 27.90136408805847, "step": 1051} +{"train_info/time_between_train_steps": 0.005872964859008789, "step": 1051} +{"info/global_step": 1052, "train_info/time_within_train_step": 27.88286066055298, "step": 1052} +{"train_info/time_between_train_steps": 0.007204294204711914, "step": 1052} +{"info/global_step": 1053, "train_info/time_within_train_step": 27.87094473838806, "step": 1053} +{"train_info/time_between_train_steps": 0.010210037231445312, "step": 1053} +{"train_info/time_between_train_steps": 29.868587493896484, "step": 1053} +{"info/global_step": 1054, "train_info/time_within_train_step": 27.84761071205139, "step": 1054} +{"train_info/time_between_train_steps": 0.005989789962768555, "step": 1054} +{"info/global_step": 1055, "train_info/time_within_train_step": 27.981733083724976, "step": 1055} +{"train_info/time_between_train_steps": 0.005738496780395508, "step": 1055} +{"info/global_step": 1056, "train_info/time_within_train_step": 28.008524894714355, "step": 1056} +{"train_info/time_between_train_steps": 0.005827188491821289, "step": 1056} +{"info/global_step": 1057, "train_info/time_within_train_step": 27.918771266937256, "step": 1057} +{"train_info/time_between_train_steps": 0.0077936649322509766, "step": 1057} +{"info/global_step": 1058, "train_info/time_within_train_step": 27.835651636123657, "step": 1058} +{"train_info/time_between_train_steps": 0.008724212646484375, "step": 1058} +{"info/global_step": 1059, "train_info/time_within_train_step": 27.96886420249939, "step": 1059} +{"train_info/time_between_train_steps": 0.006765127182006836, "step": 1059} +{"info/global_step": 1060, "train_info/time_within_train_step": 27.818172693252563, "step": 1060} +{"train_info/time_between_train_steps": 0.0058214664459228516, "step": 1060} +{"info/global_step": 1061, "train_info/time_within_train_step": 27.99760675430298, "step": 1061} +{"train_info/time_between_train_steps": 0.0068149566650390625, "step": 1061} +{"info/global_step": 1062, "train_info/time_within_train_step": 27.85865831375122, "step": 1062} +{"train_info/time_between_train_steps": 0.005444765090942383, "step": 1062} +{"info/global_step": 1063, "train_info/time_within_train_step": 27.830352067947388, "step": 1063} +{"train_info/time_between_train_steps": 0.005296468734741211, "step": 1063} +{"info/global_step": 1064, "train_info/time_within_train_step": 27.85270118713379, "step": 1064} +{"train_info/time_between_train_steps": 0.0057260990142822266, "step": 1064} +{"info/global_step": 1065, "train_info/time_within_train_step": 27.802576303482056, "step": 1065} +{"train_info/time_between_train_steps": 0.005568981170654297, "step": 1065} +{"info/global_step": 1066, "train_info/time_within_train_step": 27.797496795654297, "step": 1066} +{"train_info/time_between_train_steps": 0.0060541629791259766, "step": 1066} +{"info/global_step": 1067, "train_info/time_within_train_step": 27.804856300354004, "step": 1067} +{"train_info/time_between_train_steps": 0.005390167236328125, "step": 1067} +{"info/global_step": 1068, "train_info/time_within_train_step": 27.84307599067688, "step": 1068} +{"train_info/time_between_train_steps": 0.005489349365234375, "step": 1068} +{"info/global_step": 1069, "train_info/time_within_train_step": 27.85728406906128, "step": 1069} +{"train_info/time_between_train_steps": 0.010254144668579102, "step": 1069} +{"info/global_step": 1070, "train_info/time_within_train_step": 27.869614601135254, "step": 1070} +{"train_info/time_between_train_steps": 0.008649826049804688, "step": 1070} +{"info/global_step": 1071, "train_info/time_within_train_step": 27.820591688156128, "step": 1071} +{"train_info/time_between_train_steps": 0.005562543869018555, "step": 1071} +{"info/global_step": 1072, "train_info/time_within_train_step": 27.8678240776062, "step": 1072} +{"train_info/time_between_train_steps": 0.005499124526977539, "step": 1072} +{"info/global_step": 1073, "train_info/time_within_train_step": 27.76865315437317, "step": 1073} +{"train_info/time_between_train_steps": 0.00551915168762207, "step": 1073} +{"info/global_step": 1074, "train_info/time_within_train_step": 27.80400824546814, "step": 1074} +{"train_info/time_between_train_steps": 0.007697105407714844, "step": 1074} +{"info/global_step": 1075, "train_info/time_within_train_step": 27.812304496765137, "step": 1075} +{"train_info/time_between_train_steps": 0.005601406097412109, "step": 1075} +{"info/global_step": 1076, "train_info/time_within_train_step": 27.771716117858887, "step": 1076} +{"train_info/time_between_train_steps": 0.005558490753173828, "step": 1076} +{"info/global_step": 1077, "train_info/time_within_train_step": 27.81406331062317, "step": 1077} +{"train_info/time_between_train_steps": 0.005661964416503906, "step": 1077} +{"info/global_step": 1078, "train_info/time_within_train_step": 27.802660942077637, "step": 1078} +{"train_info/time_between_train_steps": 0.009283304214477539, "step": 1078} +{"info/global_step": 1079, "train_info/time_within_train_step": 27.860602378845215, "step": 1079} +{"train_info/time_between_train_steps": 0.0060079097747802734, "step": 1079} +{"info/global_step": 1080, "train_info/time_within_train_step": 27.88377356529236, "step": 1080} +{"train_info/time_between_train_steps": 0.0059049129486083984, "step": 1080} +{"train_info/time_between_train_steps": 30.06297016143799, "step": 1080} +{"info/global_step": 1081, "train_info/time_within_train_step": 27.77727770805359, "step": 1081} +{"train_info/time_between_train_steps": 0.005252361297607422, "step": 1081} +{"info/global_step": 1082, "train_info/time_within_train_step": 27.920539379119873, "step": 1082} +{"train_info/time_between_train_steps": 0.005553722381591797, "step": 1082} +{"info/global_step": 1083, "train_info/time_within_train_step": 27.779698610305786, "step": 1083} +{"train_info/time_between_train_steps": 0.005488395690917969, "step": 1083} +{"info/global_step": 1084, "train_info/time_within_train_step": 27.894184589385986, "step": 1084} +{"train_info/time_between_train_steps": 0.005748271942138672, "step": 1084} +{"info/global_step": 1085, "train_info/time_within_train_step": 27.779906034469604, "step": 1085} +{"train_info/time_between_train_steps": 0.005606412887573242, "step": 1085} +{"info/global_step": 1086, "train_info/time_within_train_step": 27.897347927093506, "step": 1086} +{"train_info/time_between_train_steps": 0.005507230758666992, "step": 1086} +{"info/global_step": 1087, "train_info/time_within_train_step": 27.87580418586731, "step": 1087} +{"train_info/time_between_train_steps": 0.005385637283325195, "step": 1087} +{"info/global_step": 1088, "train_info/time_within_train_step": 27.834713459014893, "step": 1088} +{"train_info/time_between_train_steps": 0.00560450553894043, "step": 1088} +{"info/global_step": 1089, "train_info/time_within_train_step": 27.779847860336304, "step": 1089} +{"train_info/time_between_train_steps": 0.005244255065917969, "step": 1089} +{"info/global_step": 1090, "train_info/time_within_train_step": 27.77865695953369, "step": 1090} +{"train_info/time_between_train_steps": 0.005667209625244141, "step": 1090} +{"info/global_step": 1091, "train_info/time_within_train_step": 27.792031288146973, "step": 1091} +{"train_info/time_between_train_steps": 0.005181074142456055, "step": 1091} +{"info/global_step": 1092, "train_info/time_within_train_step": 27.764505863189697, "step": 1092} +{"train_info/time_between_train_steps": 0.0051844120025634766, "step": 1092} +{"info/global_step": 1093, "train_info/time_within_train_step": 27.76535439491272, "step": 1093} +{"train_info/time_between_train_steps": 0.005152225494384766, "step": 1093} +{"info/global_step": 1094, "train_info/time_within_train_step": 27.803674459457397, "step": 1094} +{"train_info/time_between_train_steps": 0.007837772369384766, "step": 1094} +{"info/global_step": 1095, "train_info/time_within_train_step": 27.80784296989441, "step": 1095} +{"train_info/time_between_train_steps": 0.00652003288269043, "step": 1095} +{"info/global_step": 1096, "train_info/time_within_train_step": 27.838348865509033, "step": 1096} +{"train_info/time_between_train_steps": 0.005803346633911133, "step": 1096} +{"info/global_step": 1097, "train_info/time_within_train_step": 27.79123044013977, "step": 1097} +{"train_info/time_between_train_steps": 0.00938558578491211, "step": 1097} +{"info/global_step": 1098, "train_info/time_within_train_step": 27.835744619369507, "step": 1098} +{"train_info/time_between_train_steps": 0.005692243576049805, "step": 1098} +{"info/global_step": 1099, "train_info/time_within_train_step": 27.842333793640137, "step": 1099} +{"train_info/time_between_train_steps": 0.0053026676177978516, "step": 1099} +{"info/global_step": 1100, "train_info/time_within_train_step": 27.85060739517212, "step": 1100} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1740855245, "_runtime": 32051}, "step": 1100} +{"logs": {"train/loss": 3.1214, "train/learning_rate": 5.5555555555555545e-05, "train/epoch": 40.02, "_timestamp": 1740855245, "_runtime": 32051}, "step": 1100} +{"train_info/time_between_train_steps": 13.409199476242065, "step": 1100} +{"info/global_step": 1101, "train_info/time_within_train_step": 27.8574640750885, "step": 1101} +{"train_info/time_between_train_steps": 0.008037328720092773, "step": 1101} +{"info/global_step": 1102, "train_info/time_within_train_step": 27.93852686882019, "step": 1102} +{"train_info/time_between_train_steps": 0.005505800247192383, "step": 1102} +{"info/global_step": 1103, "train_info/time_within_train_step": 27.83619236946106, "step": 1103} +{"train_info/time_between_train_steps": 0.005312919616699219, "step": 1103} +{"info/global_step": 1104, "train_info/time_within_train_step": 27.832651615142822, "step": 1104} +{"train_info/time_between_train_steps": 0.005477190017700195, "step": 1104} +{"info/global_step": 1105, "train_info/time_within_train_step": 27.854305505752563, "step": 1105} +{"train_info/time_between_train_steps": 0.0056416988372802734, "step": 1105} +{"info/global_step": 1106, "train_info/time_within_train_step": 27.81688690185547, "step": 1106} +{"train_info/time_between_train_steps": 0.006101369857788086, "step": 1106} +{"info/global_step": 1107, "train_info/time_within_train_step": 27.85888171195984, "step": 1107} +{"train_info/time_between_train_steps": 0.005850315093994141, "step": 1107} +{"train_info/time_between_train_steps": 30.239537000656128, "step": 1107} +{"info/global_step": 1108, "train_info/time_within_train_step": 27.873926401138306, "step": 1108} +{"train_info/time_between_train_steps": 0.005606889724731445, "step": 1108} +{"info/global_step": 1109, "train_info/time_within_train_step": 27.990721702575684, "step": 1109} +{"train_info/time_between_train_steps": 0.005495786666870117, "step": 1109} +{"info/global_step": 1110, "train_info/time_within_train_step": 27.85607624053955, "step": 1110} +{"train_info/time_between_train_steps": 0.007938623428344727, "step": 1110} +{"info/global_step": 1111, "train_info/time_within_train_step": 27.904019355773926, "step": 1111} +{"train_info/time_between_train_steps": 0.00583648681640625, "step": 1111} +{"info/global_step": 1112, "train_info/time_within_train_step": 27.835224866867065, "step": 1112} +{"train_info/time_between_train_steps": 0.011509180068969727, "step": 1112} +{"info/global_step": 1113, "train_info/time_within_train_step": 27.925931453704834, "step": 1113} +{"train_info/time_between_train_steps": 0.0057604312896728516, "step": 1113} +{"info/global_step": 1114, "train_info/time_within_train_step": 27.805736541748047, "step": 1114} +{"train_info/time_between_train_steps": 0.005459308624267578, "step": 1114} +{"info/global_step": 1115, "train_info/time_within_train_step": 27.9036283493042, "step": 1115} +{"train_info/time_between_train_steps": 0.010576725006103516, "step": 1115} +{"info/global_step": 1116, "train_info/time_within_train_step": 27.870234489440918, "step": 1116} +{"train_info/time_between_train_steps": 0.005341291427612305, "step": 1116} +{"info/global_step": 1117, "train_info/time_within_train_step": 27.959551334381104, "step": 1117} +{"train_info/time_between_train_steps": 0.005112886428833008, "step": 1117} +{"info/global_step": 1118, "train_info/time_within_train_step": 27.80239963531494, "step": 1118} +{"train_info/time_between_train_steps": 0.00538182258605957, "step": 1118} +{"info/global_step": 1119, "train_info/time_within_train_step": 27.821187019348145, "step": 1119} +{"train_info/time_between_train_steps": 0.005270957946777344, "step": 1119} +{"info/global_step": 1120, "train_info/time_within_train_step": 27.81440567970276, "step": 1120} +{"train_info/time_between_train_steps": 0.0053441524505615234, "step": 1120} +{"info/global_step": 1121, "train_info/time_within_train_step": 27.852421522140503, "step": 1121} +{"train_info/time_between_train_steps": 0.005316972732543945, "step": 1121} +{"info/global_step": 1122, "train_info/time_within_train_step": 27.850664138793945, "step": 1122} +{"train_info/time_between_train_steps": 0.005566596984863281, "step": 1122} +{"info/global_step": 1123, "train_info/time_within_train_step": 27.846206188201904, "step": 1123} +{"train_info/time_between_train_steps": 0.005703926086425781, "step": 1123} +{"info/global_step": 1124, "train_info/time_within_train_step": 27.840383291244507, "step": 1124} +{"train_info/time_between_train_steps": 0.005631446838378906, "step": 1124} +{"info/global_step": 1125, "train_info/time_within_train_step": 27.850632190704346, "step": 1125} +{"train_info/time_between_train_steps": 0.005398750305175781, "step": 1125} +{"info/global_step": 1126, "train_info/time_within_train_step": 27.80298900604248, "step": 1126} +{"train_info/time_between_train_steps": 0.005256175994873047, "step": 1126} +{"info/global_step": 1127, "train_info/time_within_train_step": 27.850841760635376, "step": 1127} +{"train_info/time_between_train_steps": 0.005307435989379883, "step": 1127} +{"info/global_step": 1128, "train_info/time_within_train_step": 27.832008838653564, "step": 1128} +{"train_info/time_between_train_steps": 0.005705118179321289, "step": 1128} +{"info/global_step": 1129, "train_info/time_within_train_step": 27.81386947631836, "step": 1129} +{"train_info/time_between_train_steps": 0.0053501129150390625, "step": 1129} +{"info/global_step": 1130, "train_info/time_within_train_step": 27.875792026519775, "step": 1130} +{"train_info/time_between_train_steps": 0.006457090377807617, "step": 1130} +{"info/global_step": 1131, "train_info/time_within_train_step": 27.855628967285156, "step": 1131} +{"train_info/time_between_train_steps": 0.00597834587097168, "step": 1131} +{"info/global_step": 1132, "train_info/time_within_train_step": 27.940870761871338, "step": 1132} +{"train_info/time_between_train_steps": 0.007845878601074219, "step": 1132} +{"info/global_step": 1133, "train_info/time_within_train_step": 27.95995855331421, "step": 1133} +{"train_info/time_between_train_steps": 0.005542278289794922, "step": 1133} +{"info/global_step": 1134, "train_info/time_within_train_step": 27.901565313339233, "step": 1134} +{"train_info/time_between_train_steps": 0.006130695343017578, "step": 1134} +{"train_info/time_between_train_steps": 29.916805267333984, "step": 1134} +{"info/global_step": 1135, "train_info/time_within_train_step": 27.92678737640381, "step": 1135} +{"train_info/time_between_train_steps": 0.007901191711425781, "step": 1135} +{"info/global_step": 1136, "train_info/time_within_train_step": 27.935197591781616, "step": 1136} +{"train_info/time_between_train_steps": 0.005316972732543945, "step": 1136} +{"info/global_step": 1137, "train_info/time_within_train_step": 27.853543043136597, "step": 1137} +{"train_info/time_between_train_steps": 0.010318994522094727, "step": 1137} +{"info/global_step": 1138, "train_info/time_within_train_step": 27.9877667427063, "step": 1138} +{"train_info/time_between_train_steps": 0.005608797073364258, "step": 1138} +{"info/global_step": 1139, "train_info/time_within_train_step": 27.821016311645508, "step": 1139} +{"train_info/time_between_train_steps": 0.00565648078918457, "step": 1139} +{"info/global_step": 1140, "train_info/time_within_train_step": 27.93944525718689, "step": 1140} +{"train_info/time_between_train_steps": 0.005791425704956055, "step": 1140} +{"info/global_step": 1141, "train_info/time_within_train_step": 27.865041732788086, "step": 1141} +{"train_info/time_between_train_steps": 0.006000995635986328, "step": 1141} +{"info/global_step": 1142, "train_info/time_within_train_step": 27.887163639068604, "step": 1142} +{"train_info/time_between_train_steps": 0.0057756900787353516, "step": 1142} +{"info/global_step": 1143, "train_info/time_within_train_step": 28.02720332145691, "step": 1143} +{"train_info/time_between_train_steps": 0.00540924072265625, "step": 1143} +{"info/global_step": 1144, "train_info/time_within_train_step": 27.80419898033142, "step": 1144} +{"train_info/time_between_train_steps": 0.005329132080078125, "step": 1144} +{"info/global_step": 1145, "train_info/time_within_train_step": 27.77880835533142, "step": 1145} +{"train_info/time_between_train_steps": 0.010830163955688477, "step": 1145} +{"info/global_step": 1146, "train_info/time_within_train_step": 27.816975831985474, "step": 1146} +{"train_info/time_between_train_steps": 0.005637407302856445, "step": 1146} +{"info/global_step": 1147, "train_info/time_within_train_step": 27.918798685073853, "step": 1147} +{"train_info/time_between_train_steps": 0.005360603332519531, "step": 1147} +{"info/global_step": 1148, "train_info/time_within_train_step": 27.830962896347046, "step": 1148} +{"train_info/time_between_train_steps": 0.005576133728027344, "step": 1148} +{"info/global_step": 1149, "train_info/time_within_train_step": 27.778759479522705, "step": 1149} +{"train_info/time_between_train_steps": 0.005391836166381836, "step": 1149} +{"info/global_step": 1150, "train_info/time_within_train_step": 27.781606674194336, "step": 1150} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1740856713, "_runtime": 33519}, "step": 1150} +{"logs": {"train/loss": 3.0907, "train/learning_rate": 2.7777777777777772e-05, "train/epoch": 42.01, "_timestamp": 1740856713, "_runtime": 33519}, "step": 1150} +{"train_info/time_between_train_steps": 0.009503364562988281, "step": 1150} +{"info/global_step": 1151, "train_info/time_within_train_step": 27.848128080368042, "step": 1151} +{"train_info/time_between_train_steps": 0.0062999725341796875, "step": 1151} +{"info/global_step": 1152, "train_info/time_within_train_step": 27.9895977973938, "step": 1152} +{"train_info/time_between_train_steps": 0.006481170654296875, "step": 1152} +{"info/global_step": 1153, "train_info/time_within_train_step": 27.892794609069824, "step": 1153} +{"train_info/time_between_train_steps": 0.005403995513916016, "step": 1153} +{"info/global_step": 1154, "train_info/time_within_train_step": 27.823796272277832, "step": 1154} +{"train_info/time_between_train_steps": 0.0057947635650634766, "step": 1154} +{"info/global_step": 1155, "train_info/time_within_train_step": 27.819885969161987, "step": 1155} +{"train_info/time_between_train_steps": 0.0057218074798583984, "step": 1155} +{"info/global_step": 1156, "train_info/time_within_train_step": 27.831120014190674, "step": 1156} +{"train_info/time_between_train_steps": 0.0073680877685546875, "step": 1156} +{"info/global_step": 1157, "train_info/time_within_train_step": 27.83669900894165, "step": 1157} +{"train_info/time_between_train_steps": 0.010606527328491211, "step": 1157} +{"info/global_step": 1158, "train_info/time_within_train_step": 27.85337209701538, "step": 1158} +{"train_info/time_between_train_steps": 0.006440877914428711, "step": 1158} +{"info/global_step": 1159, "train_info/time_within_train_step": 27.82765769958496, "step": 1159} +{"train_info/time_between_train_steps": 0.005640506744384766, "step": 1159} +{"info/global_step": 1160, "train_info/time_within_train_step": 27.839632511138916, "step": 1160} +{"train_info/time_between_train_steps": 0.009530305862426758, "step": 1160} +{"info/global_step": 1161, "train_info/time_within_train_step": 27.869648694992065, "step": 1161} +{"train_info/time_between_train_steps": 0.006421566009521484, "step": 1161} +{"train_info/time_between_train_steps": 30.00171446800232, "step": 1161} +{"info/global_step": 1162, "train_info/time_within_train_step": 27.9427387714386, "step": 1162} +{"train_info/time_between_train_steps": 0.00850224494934082, "step": 1162} +{"info/global_step": 1163, "train_info/time_within_train_step": 27.95356512069702, "step": 1163} +{"train_info/time_between_train_steps": 0.006925344467163086, "step": 1163} +{"info/global_step": 1164, "train_info/time_within_train_step": 27.81725788116455, "step": 1164} +{"train_info/time_between_train_steps": 0.005643367767333984, "step": 1164} +{"info/global_step": 1165, "train_info/time_within_train_step": 27.90282964706421, "step": 1165} +{"train_info/time_between_train_steps": 0.00567173957824707, "step": 1165} +{"info/global_step": 1166, "train_info/time_within_train_step": 27.813671350479126, "step": 1166} +{"train_info/time_between_train_steps": 0.005513191223144531, "step": 1166} +{"info/global_step": 1167, "train_info/time_within_train_step": 27.908657789230347, "step": 1167} +{"train_info/time_between_train_steps": 0.0053539276123046875, "step": 1167} +{"info/global_step": 1168, "train_info/time_within_train_step": 27.99225664138794, "step": 1168} +{"train_info/time_between_train_steps": 0.009872674942016602, "step": 1168} +{"info/global_step": 1169, "train_info/time_within_train_step": 28.122538566589355, "step": 1169} +{"train_info/time_between_train_steps": 0.007132053375244141, "step": 1169} +{"info/global_step": 1170, "train_info/time_within_train_step": 27.9572811126709, "step": 1170} +{"train_info/time_between_train_steps": 0.0059888362884521484, "step": 1170} +{"info/global_step": 1171, "train_info/time_within_train_step": 27.94282031059265, "step": 1171} +{"train_info/time_between_train_steps": 0.005509376525878906, "step": 1171} +{"info/global_step": 1172, "train_info/time_within_train_step": 27.85767960548401, "step": 1172} +{"train_info/time_between_train_steps": 0.006017208099365234, "step": 1172} +{"info/global_step": 1173, "train_info/time_within_train_step": 27.880156993865967, "step": 1173} +{"train_info/time_between_train_steps": 0.005746126174926758, "step": 1173} +{"info/global_step": 1174, "train_info/time_within_train_step": 27.85010600090027, "step": 1174} +{"train_info/time_between_train_steps": 0.005751848220825195, "step": 1174} +{"info/global_step": 1175, "train_info/time_within_train_step": 27.861918687820435, "step": 1175} +{"train_info/time_between_train_steps": 0.005242109298706055, "step": 1175} +{"info/global_step": 1176, "train_info/time_within_train_step": 27.898066520690918, "step": 1176} +{"train_info/time_between_train_steps": 0.010728597640991211, "step": 1176} +{"info/global_step": 1177, "train_info/time_within_train_step": 27.89689540863037, "step": 1177} +{"train_info/time_between_train_steps": 0.009749650955200195, "step": 1177} +{"info/global_step": 1178, "train_info/time_within_train_step": 28.01725959777832, "step": 1178} +{"train_info/time_between_train_steps": 0.0058481693267822266, "step": 1178} +{"info/global_step": 1179, "train_info/time_within_train_step": 27.908851861953735, "step": 1179} +{"train_info/time_between_train_steps": 0.0058443546295166016, "step": 1179} +{"info/global_step": 1180, "train_info/time_within_train_step": 27.925764322280884, "step": 1180} +{"train_info/time_between_train_steps": 0.0070645809173583984, "step": 1180} +{"info/global_step": 1181, "train_info/time_within_train_step": 27.844152212142944, "step": 1181} +{"train_info/time_between_train_steps": 0.005830287933349609, "step": 1181} +{"info/global_step": 1182, "train_info/time_within_train_step": 27.838812351226807, "step": 1182} +{"train_info/time_between_train_steps": 0.005843162536621094, "step": 1182} +{"info/global_step": 1183, "train_info/time_within_train_step": 27.845596313476562, "step": 1183} +{"train_info/time_between_train_steps": 0.005662202835083008, "step": 1183} +{"info/global_step": 1184, "train_info/time_within_train_step": 27.84494376182556, "step": 1184} +{"train_info/time_between_train_steps": 0.0056569576263427734, "step": 1184} +{"info/global_step": 1185, "train_info/time_within_train_step": 27.896172523498535, "step": 1185} +{"train_info/time_between_train_steps": 0.006241559982299805, "step": 1185} +{"info/global_step": 1186, "train_info/time_within_train_step": 27.84671902656555, "step": 1186} +{"train_info/time_between_train_steps": 0.005837440490722656, "step": 1186} +{"info/global_step": 1187, "train_info/time_within_train_step": 27.863677740097046, "step": 1187} +{"train_info/time_between_train_steps": 0.008666276931762695, "step": 1187} +{"info/global_step": 1188, "train_info/time_within_train_step": 27.980040550231934, "step": 1188} +{"train_info/time_between_train_steps": 0.011496782302856445, "step": 1188} +{"train_info/time_between_train_steps": 30.488991260528564, "step": 1188} +{"info/global_step": 1189, "train_info/time_within_train_step": 27.833794832229614, "step": 1189} +{"train_info/time_between_train_steps": 0.005473613739013672, "step": 1189} +{"info/global_step": 1190, "train_info/time_within_train_step": 27.96000075340271, "step": 1190} +{"train_info/time_between_train_steps": 0.007760047912597656, "step": 1190} +{"info/global_step": 1191, "train_info/time_within_train_step": 27.922945261001587, "step": 1191} +{"train_info/time_between_train_steps": 0.013758659362792969, "step": 1191} +{"info/global_step": 1192, "train_info/time_within_train_step": 28.172298908233643, "step": 1192} +{"train_info/time_between_train_steps": 0.0068204402923583984, "step": 1192} +{"info/global_step": 1193, "train_info/time_within_train_step": 27.88206171989441, "step": 1193} +{"train_info/time_between_train_steps": 0.005966901779174805, "step": 1193} +{"info/global_step": 1194, "train_info/time_within_train_step": 28.02836513519287, "step": 1194} +{"train_info/time_between_train_steps": 0.005765199661254883, "step": 1194} +{"info/global_step": 1195, "train_info/time_within_train_step": 27.822684049606323, "step": 1195} +{"train_info/time_between_train_steps": 0.006014347076416016, "step": 1195} +{"info/global_step": 1196, "train_info/time_within_train_step": 27.887505769729614, "step": 1196} +{"train_info/time_between_train_steps": 0.005860567092895508, "step": 1196} +{"info/global_step": 1197, "train_info/time_within_train_step": 27.812180995941162, "step": 1197} +{"train_info/time_between_train_steps": 0.005559206008911133, "step": 1197} +{"info/global_step": 1198, "train_info/time_within_train_step": 27.802488803863525, "step": 1198} +{"train_info/time_between_train_steps": 0.005672454833984375, "step": 1198} +{"info/global_step": 1199, "train_info/time_within_train_step": 27.790602445602417, "step": 1199} +{"train_info/time_between_train_steps": 0.0053539276123046875, "step": 1199} +{"info/global_step": 1200, "train_info/time_within_train_step": 27.823694705963135, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1740858168, "_runtime": 34974}, "step": 1200} +{"logs": {"train/loss": 3.0653, "train/learning_rate": 0.0, "train/epoch": 44.01, "_timestamp": 1740858168, "_runtime": 34974}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1740858183, "_runtime": 34989}, "step": 1200} +{"logs": {"train/train_runtime": 34990.2325, "train/train_samples_per_second": 17.559, "train/train_steps_per_second": 0.034, "train/total_flos": 3.32365555040256e+17, "train/train_loss": 4.02547514518102, "train/epoch": 44.01, "_timestamp": 1740858183, "_runtime": 34989}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1922.4453125, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1740858192, "_runtime": 34998}, "step": 1200} +{"logs": {"eval/loss": 4.182457447052002, "eval/runtime": 3.987, "eval/samples_per_second": 50.915, "eval/steps_per_second": 3.261, "train/epoch": 44.01, "_timestamp": 1740858192, "_runtime": 34998}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1922.4453125, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1740858192, "_runtime": 34998}, "step": 1200} +{"logs": {"eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_loss": 4.182457447052002, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_ppl": 65.52668387276776, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_runtime": 3.987, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 50.915, "train/epoch": 44.01, "_timestamp": 1740858192, "_runtime": 34998}, "step": 1200} diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/perturb_det_adj_np_num_en_EN_randinit_seed53.log b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/perturb_det_adj_np_num_en_EN_randinit_seed53.log new file mode 100755 index 0000000000000000000000000000000000000000..c123d5f0fd97d1bfdeb0f686c88868c60976c52c --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/perturb_det_adj_np_num_en_EN_randinit_seed53.log @@ -0,0 +1,121 @@ +|=>> 03/01 [10:58:59] - mistral - INFO :: Starting Run: perturb_det_adj_np_num_en_EN_randinit_seed53... +|=>> 03/01 [10:58:59] - mistral - INFO :: Setting Random Seed to 53! +|=>> 03/01 [10:58:59] - mistral - INFO :: Building Tokenize and Initializing `gpt2-small` via AutoModel/AutoConfig... +|=>> 03/01 [10:58:59] - mistral - INFO :: Using Configs For Model From: /scratch/xiulyang/multilingual-LM/mistral/conf/models/gpt2-small-EN.json ... +|=>> 03/01 [10:58:59] - mistral.models.auto - INFO :: Building Hugging Face GPT2Config from provided configs: {'activation_function': 'gelu_new', 'architectures': ['GPT2LMHeadModel'], 'attn_pdrop': 0.1, 'bos_token_id': 50256, 'embd_pdrop': 0.1, 'eos_token_id': 50256, 'initializer_range': 0.02, 'layer_norm_epsilon': 1e-05, 'model_type': 'gpt2', 'n_ctx': 1024, 'n_embd': 768, 'n_head': 12, 'n_inner': None, 'n_layer': 12, 'n_positions': 1024, 'reorder_and_upcast_attn': True, 'resid_pdrop': 0.1, 'scale_attn_by_inverse_layer_idx': True, 'scale_attn_weights': True, 'summary_activation': None, 'summary_first_dropout': 0.2, 'summary_proj_to_labels': True, 'summary_type': 'cls_index', 'summary_use_proj': True, 'task_specific_params': {'text-generation': {'do_sample': True, 'max_length': 1024}}, 'torch_dtype': 'float32', 'transformers_version': '4.35.2', 'use_cache': False, 'vocab_size': 50257} ... +|=>> 03/01 [10:58:59] - mistral.models.auto - INFO :: Fetching Hugging Face [Fast] AutoTokenizer for Model: `gpt2`... +|=>> 03/01 [10:58:59] - mistral.models.auto - INFO :: Using a Pretokenized Dataset +|=>> 03/01 [10:58:59] - mistral.models.auto - INFO :: Initializing Custom GPT-2 Model from Configuration: `gpt2`... +|=>> 03/01 [10:59:03] - mistral - INFO :: Setting Training Arguments from Quinfig... +|=>> 03/01 [10:59:03] - mistral.args.training - INFO :: Setting Gradient Accumulation Steps = `64` [BSZ: 512 World Size: 1 Device BSZ: 8] +|=>> 03/01 [10:59:03] - mistral - INFO :: Downloading and Preprocessing Dataset `/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py`... +|=>> 03/01 [10:59:04] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Generating examples from = /scratch/xiulyang/multilingual-LM/data/multilingual/multilingual_data_perturbed/perturb_det_adj_np_num_en/train +|=>> 03/01 [10:59:04] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Total sentences: 1025689 +|=>> 03/01 [10:59:04] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Loading pre-tokenized data +|=>> 03/01 [10:59:09] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Concatenating tokenized data using EOS token +|=>> 03/01 [10:59:09] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Chunking tokens into sublists of 1024 +|=>> 03/01 [10:59:11] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Writing dataset as space-separated sequences of tokens +|=>> 03/01 [10:59:14] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Generating examples from = /scratch/xiulyang/multilingual-LM/data/multilingual/multilingual_data_perturbed/perturb_det_adj_np_num_en/dev +|=>> 03/01 [10:59:14] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Total sentences: 11410 +|=>> 03/01 [10:59:14] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Loading pre-tokenized data +|=>> 03/01 [10:59:14] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Concatenating tokenized data using EOS token +|=>> 03/01 [10:59:14] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Chunking tokens into sublists of 1024 +|=>> 03/01 [10:59:14] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Writing dataset as space-separated sequences of tokens +|=>> 03/01 [10:59:15] - mistral.corpora.auto - INFO :: Building Tokenized Indexed Dataset for {dataset_id}/{dataset_name}... +|=>> 03/01 [10:59:15] - mistral.corpora.auto - INFO :: Building Indexed Dataset for train +|=>> 03/01 [10:59:44] - mistral.corpora.auto - INFO :: Building Indexed Dataset for validation +|=>> 03/01 [10:59:45] - mistral - INFO :: Initializing Model Trainer... +|=>> 03/01 [10:59:45] - mistral - INFO :: Training Arguments: TrainingArguments( +_n_gpu=1, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +bf16=False, +bf16_full_eval=False, +data_seed=53, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=IntervalStrategy.STEPS, +fp16=True, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +gradient_accumulation_steps=64, +gradient_checkpointing=False, +greater_is_better=None, +group_by_length=False, +half_precision_backend=auto, +hub_model_id=None, +hub_strategy=HubStrategy.EVERY_SAVE, +hub_token=, +ignore_data_skip=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0006, +length_column_name=length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=logs, +logging_first_step=True, +logging_nan_inf_filter=True, +logging_steps=50, +logging_strategy=IntervalStrategy.STEPS, +lr_scheduler_type=SchedulerType.LINEAR, +max_grad_norm=1.0, +max_steps=1200, +metric_for_best_model=None, +mp_parameters=, +no_cuda=False, +num_train_epochs=3.0, +optim=OptimizerNames.ADAMW_HF, +output_dir=//scratch/xiulyang/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53, +overwrite_output_dir=False, +past_index=-1, +per_device_eval_batch_size=16, +per_device_train_batch_size=8, +prediction_loss_only=True, +push_to_hub=False, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +remove_unused_columns=True, +report_to=[], +resume_from_checkpoint=None, +run_name=perturb_det_adj_np_num_en_EN_randinit_seed53, +save_on_each_node=False, +save_steps=1000, +save_strategy=IntervalStrategy.STEPS, +save_total_limit=None, +seed=53, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=120, +weight_decay=0.1, +xpu_backend=None, +) +|=>> 03/01 [10:59:46] - mistral.core.callbacks - INFO :: Setting W&B Project: xiulin-yang-compling +|=>> 03/01 [10:59:53] - mistral - INFO :: Training... +|=>> 03/01 [10:59:53] - mistral.core.callbacks - INFO :: Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" +|=>> 03/01 [20:43:08] - mistral - INFO :: ...and that's all folks! +|=>> 03/01 [20:43:08] - mistral - INFO :: Running final evaluation... diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/pytorch_model.bin b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..79606bdca066f0ffdf09d00decb7a0cc64406fbf --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b6b16cad85cae699e713528f709cb28175652190ccf93624f41b01a74ac113e +size 510396521 diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/special_tokens_map.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/tokenizer_config.json b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/training_args.bin b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..15b98514e767b3fc92f1a33b342cb1dd57904c39 --- /dev/null +++ b/multilingual_models/perturb_det_adj_np_num_en_EN_randinit/babylm_perturb_det_adj_np_num_en_EN_randinit_seed53/runs/perturb_det_adj_np_num_en_EN_randinit_seed53/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfe85938b028b043a65c4e8b12b6788c64a3e8013a7acfbf6b8fb24e8dd9b106 +size 3183