diff --git a/checkpoint-0/config.json b/checkpoint-0/config.json new file mode 100755 index 0000000000000000000000000000000000000000..060b4e655747a8c6abaac05c63d6eacfe62a3ca1 --- /dev/null +++ b/checkpoint-0/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 1, + "embd_pdrop": 0.1, + "eos_token_id": 1, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50262 +} diff --git a/checkpoint-0/pytorch_model.bin b/checkpoint-0/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..c5b163eff6a042091dfcba7e4e60a8d3fee703f7 --- /dev/null +++ b/checkpoint-0/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe75c57528c63ef4683e7a73acb6a67e0449f12b653f8a3260c030600a57ab96 +size 510411881 diff --git a/checkpoint-0/special_tokens_map.json b/checkpoint-0/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-0/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-0/tokenizer_config.json b/checkpoint-0/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-0/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-0/training_args.bin b/checkpoint-0/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..03b2288ee5b328686d8934e3fc4a6b925a9c3ede --- /dev/null +++ b/checkpoint-0/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdfe69ffbf98648e171ef448900d45919fc613a0e3b6af7ac7ce7cdbdf004776 +size 3183 diff --git a/checkpoint-100/config.json b/checkpoint-100/config.json new file mode 100755 index 0000000000000000000000000000000000000000..060b4e655747a8c6abaac05c63d6eacfe62a3ca1 --- /dev/null +++ b/checkpoint-100/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 1, + "embd_pdrop": 0.1, + "eos_token_id": 1, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50262 +} diff --git a/checkpoint-100/optimizer.pt b/checkpoint-100/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..6307cf4524891c2e060bd9d87430beb76005bde5 --- /dev/null +++ b/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:227ebe8ed31a70389a3473445c4ce13d1e422734462f8182b8c763f1577cbfcd +size 995634545 diff --git a/checkpoint-100/pytorch_model.bin b/checkpoint-100/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..5510d8bd0bc3ba83d665b696ae01eae1dac05928 --- /dev/null +++ b/checkpoint-100/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6d061a2d3afd1208f4c3e4566ac8235204c401ad7ebbdd55feb04387b4dea1e +size 510411881 diff --git a/checkpoint-100/rng_state.pth b/checkpoint-100/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..2422efd996fc819b362dc5f62de6b5cf4d75ac7f --- /dev/null +++ b/checkpoint-100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75b3deb6ab401c34ada1c2ab091e3828ec74cfb6b0856f549f83af013f72bc20 +size 14567 diff --git a/checkpoint-100/scaler.pt b/checkpoint-100/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..45cc4a33e17645cb0ed4a911b11c77cb2e7ce7f3 --- /dev/null +++ b/checkpoint-100/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13a3423b2fe42f204bc8fe2c666ff379f9fd753a0f13613064a5e71e86b519e8 +size 559 diff --git a/checkpoint-100/scheduler.pt b/checkpoint-100/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..595eec9094e91b3eb24c2de88c461df2c22026ab --- /dev/null +++ b/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75e9b9d31d11c624d89b0c04ad496adf4b5addd3e703848d2583972c703e8da6 +size 623 diff --git a/checkpoint-100/special_tokens_map.json b/checkpoint-100/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-100/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-100/tokenizer_config.json b/checkpoint-100/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-100/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-100/trainer_state.json b/checkpoint-100/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..8dcc454b0a59cb4a0e58ebbe40b41e59b6ade9f8 --- /dev/null +++ b/checkpoint-100/trainer_state.json @@ -0,0 +1,34 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0083333333333333, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.9361, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.2553, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.7321, + "step": 100 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.709337079808e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-100/training_args.bin b/checkpoint-100/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..03b2288ee5b328686d8934e3fc4a6b925a9c3ede --- /dev/null +++ b/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdfe69ffbf98648e171ef448900d45919fc613a0e3b6af7ac7ce7cdbdf004776 +size 3183 diff --git a/checkpoint-1000/config.json b/checkpoint-1000/config.json new file mode 100755 index 0000000000000000000000000000000000000000..060b4e655747a8c6abaac05c63d6eacfe62a3ca1 --- /dev/null +++ b/checkpoint-1000/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 1, + "embd_pdrop": 0.1, + "eos_token_id": 1, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50262 +} diff --git a/checkpoint-1000/optimizer.pt b/checkpoint-1000/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..f989a2667c475033ec61df1bb972d0b7ca790d0c --- /dev/null +++ b/checkpoint-1000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cb1f4330dab013cfe28b4ccc1390ac750478b2f7fea1782bc889657c7e9a9ae +size 995634737 diff --git a/checkpoint-1000/pytorch_model.bin b/checkpoint-1000/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..88ae4ce377c9e280c350bc1931ba8f8d5b57195a --- /dev/null +++ b/checkpoint-1000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6df532d894c2dd7ac0600aa0bcd9d679b4577ccf5deb639ad777e3445747ab63 +size 510411881 diff --git a/checkpoint-1000/rng_state.pth b/checkpoint-1000/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..4d7e2b2be4d2db792b27f03664a45f6e9cd937be --- /dev/null +++ b/checkpoint-1000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82958bb0970c43b52c26207c9981b234785c376dc70cd764ab838124d3e330cf +size 14567 diff --git a/checkpoint-1000/scaler.pt b/checkpoint-1000/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..b04695d3a30e4bab2b78883d9c849c25c37ef7d7 --- /dev/null +++ b/checkpoint-1000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f810fc7b695697c440d8985f6042b4ba23a9e1027604c265718b518ca29f1b2b +size 559 diff --git a/checkpoint-1000/scheduler.pt b/checkpoint-1000/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..2a616899591a36b66ac0bd1ceeb324087c181a2a --- /dev/null +++ b/checkpoint-1000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0691206f4bd9ca409d6e7104087a4e0eb05df8f8f555a400f6ecc532edba52d8 +size 623 diff --git a/checkpoint-1000/special_tokens_map.json b/checkpoint-1000/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-1000/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-1000/tokenizer_config.json b/checkpoint-1000/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-1000/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-1000/trainer_state.json b/checkpoint-1000/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..312caf32ff8117c13948a48bed1efa6f64592523 --- /dev/null +++ b/checkpoint-1000/trainer_state.json @@ -0,0 +1,158 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 33.00833333333333, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.9361, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.2553, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.7321, + "step": 100 + }, + { + "epoch": 4.03, + "learning_rate": 0.0005833333333333333, + "loss": 6.4515, + "step": 150 + }, + { + "epoch": 6.02, + "learning_rate": 0.0005555555555555556, + "loss": 6.2916, + "step": 200 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005277777777777777, + "loss": 6.103, + "step": 250 + }, + { + "epoch": 9.03, + "learning_rate": 0.0005, + "loss": 5.7716, + "step": 300 + }, + { + "epoch": 11.02, + "learning_rate": 0.00047222222222222224, + "loss": 5.4902, + "step": 350 + }, + { + "epoch": 13.01, + "learning_rate": 0.00044444444444444436, + "loss": 5.1844, + "step": 400 + }, + { + "epoch": 14.03, + "learning_rate": 0.00041666666666666664, + "loss": 4.8669, + "step": 450 + }, + { + "epoch": 16.02, + "learning_rate": 0.00038888888888888887, + "loss": 4.6547, + "step": 500 + }, + { + "epoch": 18.01, + "learning_rate": 0.0003611111111111111, + "loss": 4.4311, + "step": 550 + }, + { + "epoch": 19.02, + "learning_rate": 0.0003333333333333333, + "loss": 4.1923, + "step": 600 + }, + { + "epoch": 21.02, + "learning_rate": 0.00030555555555555555, + "loss": 4.0473, + "step": 650 + }, + { + "epoch": 23.01, + "learning_rate": 0.0002777777777777778, + "loss": 3.9047, + "step": 700 + }, + { + "epoch": 24.02, + "learning_rate": 0.00025, + "loss": 3.7631, + "step": 750 + }, + { + "epoch": 26.02, + "learning_rate": 0.00022222222222222218, + "loss": 3.7075, + "step": 800 + }, + { + "epoch": 28.01, + "learning_rate": 0.00019444444444444443, + "loss": 3.6326, + "step": 850 + }, + { + "epoch": 29.02, + "learning_rate": 0.00016666666666666666, + "loss": 3.5357, + "step": 900 + }, + { + "epoch": 31.02, + "learning_rate": 0.0001388888888888889, + "loss": 3.5105, + "step": 950 + }, + { + "epoch": 33.01, + "learning_rate": 0.00011111111111111109, + "loss": 3.4606, + "step": 1000 + }, + { + "epoch": 33.01, + "eval_loss": 4.271039009094238, + "eval_runtime": 1.2465, + "eval_samples_per_second": 89.851, + "eval_steps_per_second": 5.616, + "step": 1000 + }, + { + "epoch": 33.01, + "eval_/scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_loss": 4.271039009094238, + "eval_/scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_ppl": 71.59598586766586, + "eval_/scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_runtime": 1.2465, + "eval_/scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 89.851, + "step": 1000 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.7127077470208e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1000/training_args.bin b/checkpoint-1000/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..03b2288ee5b328686d8934e3fc4a6b925a9c3ede --- /dev/null +++ b/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdfe69ffbf98648e171ef448900d45919fc613a0e3b6af7ac7ce7cdbdf004776 +size 3183 diff --git a/checkpoint-1100/config.json b/checkpoint-1100/config.json new file mode 100755 index 0000000000000000000000000000000000000000..060b4e655747a8c6abaac05c63d6eacfe62a3ca1 --- /dev/null +++ b/checkpoint-1100/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 1, + "embd_pdrop": 0.1, + "eos_token_id": 1, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50262 +} diff --git a/checkpoint-1100/optimizer.pt b/checkpoint-1100/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..ecd5ed98b8f9e571e9fcb28dd73121459c812489 --- /dev/null +++ b/checkpoint-1100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae6e01773c2d82afb34eac36e4f450c5bb9c9fba7056c9d653875e52628fe0c9 +size 995634737 diff --git a/checkpoint-1100/pytorch_model.bin b/checkpoint-1100/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..7aaed3e27f77897e2c146e93d3f9963b60f40066 --- /dev/null +++ b/checkpoint-1100/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffa3ef26fb999be44279b6c237c53ae6bd2d87d58c8d46acf0b08cf72e711499 +size 510411881 diff --git a/checkpoint-1100/rng_state.pth b/checkpoint-1100/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..14b6cf254f9e7d15d382376f81940090548c6e3b --- /dev/null +++ b/checkpoint-1100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f036abedbb6edc7b5e2255f8c7275fbf11e6e169582d2f99ec6d593949686204 +size 14567 diff --git a/checkpoint-1100/scaler.pt b/checkpoint-1100/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..37bf049fbd5fd721203bf0238edc8ff67dbd8f94 --- /dev/null +++ b/checkpoint-1100/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fb16c30b686aa43e110b0d33f9d46bf3127b7124542ca8dc34831233d4675a0 +size 559 diff --git a/checkpoint-1100/scheduler.pt b/checkpoint-1100/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..c158169ec0bb09620952d544fd4b4edea0cc9cf4 --- /dev/null +++ b/checkpoint-1100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f1b7713e4bb40428f29080b7d08d4a52f779ac863737861e4724292b2cf6c59 +size 623 diff --git a/checkpoint-1100/special_tokens_map.json b/checkpoint-1100/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-1100/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-1100/tokenizer_config.json b/checkpoint-1100/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-1100/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-1100/trainer_state.json b/checkpoint-1100/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..9ce693b7c386a390085853db38eb9d84e0cbaf97 --- /dev/null +++ b/checkpoint-1100/trainer_state.json @@ -0,0 +1,170 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 36.016666666666666, + "global_step": 1100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.9361, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.2553, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.7321, + "step": 100 + }, + { + "epoch": 4.03, + "learning_rate": 0.0005833333333333333, + "loss": 6.4515, + "step": 150 + }, + { + "epoch": 6.02, + "learning_rate": 0.0005555555555555556, + "loss": 6.2916, + "step": 200 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005277777777777777, + "loss": 6.103, + "step": 250 + }, + { + "epoch": 9.03, + "learning_rate": 0.0005, + "loss": 5.7716, + "step": 300 + }, + { + "epoch": 11.02, + "learning_rate": 0.00047222222222222224, + "loss": 5.4902, + "step": 350 + }, + { + "epoch": 13.01, + "learning_rate": 0.00044444444444444436, + "loss": 5.1844, + "step": 400 + }, + { + "epoch": 14.03, + "learning_rate": 0.00041666666666666664, + "loss": 4.8669, + "step": 450 + }, + { + "epoch": 16.02, + "learning_rate": 0.00038888888888888887, + "loss": 4.6547, + "step": 500 + }, + { + "epoch": 18.01, + "learning_rate": 0.0003611111111111111, + "loss": 4.4311, + "step": 550 + }, + { + "epoch": 19.02, + "learning_rate": 0.0003333333333333333, + "loss": 4.1923, + "step": 600 + }, + { + "epoch": 21.02, + "learning_rate": 0.00030555555555555555, + "loss": 4.0473, + "step": 650 + }, + { + "epoch": 23.01, + "learning_rate": 0.0002777777777777778, + "loss": 3.9047, + "step": 700 + }, + { + "epoch": 24.02, + "learning_rate": 0.00025, + "loss": 3.7631, + "step": 750 + }, + { + "epoch": 26.02, + "learning_rate": 0.00022222222222222218, + "loss": 3.7075, + "step": 800 + }, + { + "epoch": 28.01, + "learning_rate": 0.00019444444444444443, + "loss": 3.6326, + "step": 850 + }, + { + "epoch": 29.02, + "learning_rate": 0.00016666666666666666, + "loss": 3.5357, + "step": 900 + }, + { + "epoch": 31.02, + "learning_rate": 0.0001388888888888889, + "loss": 3.5105, + "step": 950 + }, + { + "epoch": 33.01, + "learning_rate": 0.00011111111111111109, + "loss": 3.4606, + "step": 1000 + }, + { + "epoch": 33.01, + "eval_loss": 4.271039009094238, + "eval_runtime": 1.2465, + "eval_samples_per_second": 89.851, + "eval_steps_per_second": 5.616, + "step": 1000 + }, + { + "epoch": 33.01, + "eval_/scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_loss": 4.271039009094238, + "eval_/scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_ppl": 71.59598586766586, + "eval_/scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_runtime": 1.2465, + "eval_/scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 89.851, + "step": 1000 + }, + { + "epoch": 34.02, + "learning_rate": 8.333333333333333e-05, + "loss": 3.3886, + "step": 1050 + }, + { + "epoch": 36.02, + "learning_rate": 5.5555555555555545e-05, + "loss": 3.3823, + "step": 1100 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.9836414550016e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1100/training_args.bin b/checkpoint-1100/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..03b2288ee5b328686d8934e3fc4a6b925a9c3ede --- /dev/null +++ b/checkpoint-1100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdfe69ffbf98648e171ef448900d45919fc613a0e3b6af7ac7ce7cdbdf004776 +size 3183 diff --git a/checkpoint-1200/config.json b/checkpoint-1200/config.json new file mode 100755 index 0000000000000000000000000000000000000000..060b4e655747a8c6abaac05c63d6eacfe62a3ca1 --- /dev/null +++ b/checkpoint-1200/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 1, + "embd_pdrop": 0.1, + "eos_token_id": 1, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50262 +} diff --git a/checkpoint-1200/optimizer.pt b/checkpoint-1200/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..4a3e8213d5ce5e4f00a25c7cda0be696c35dab3b --- /dev/null +++ b/checkpoint-1200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57ac5edc5ebfb6c58cee3a1af12031c52d9adf2ffb3e024123ef20cda6c3e495 +size 995634737 diff --git a/checkpoint-1200/pytorch_model.bin b/checkpoint-1200/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..b73e49eab805485975a501262650b193710009d8 --- /dev/null +++ b/checkpoint-1200/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1b7bf158fcfaa731923bdf157800985d71dd4d6884003787ada360b6b876232 +size 510411881 diff --git a/checkpoint-1200/rng_state.pth b/checkpoint-1200/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..f98e0f91e2e93ece1ea7e290d218abadc52acfde --- /dev/null +++ b/checkpoint-1200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67d3a4eae33f2102dc4dc73cc5ff443d7518dd694bf94b8c45aa3dad80b482c7 +size 14567 diff --git a/checkpoint-1200/scaler.pt b/checkpoint-1200/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..8953dddccbefc4703c09dcda27d83c15add2bade --- /dev/null +++ b/checkpoint-1200/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19c7277eaca0850ae3e9b6790b3d002d820169cce0671185e672c28c8ae8e056 +size 559 diff --git a/checkpoint-1200/scheduler.pt b/checkpoint-1200/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..310d39c17fc616a9c83286ed00f0f4cefba9f5df --- /dev/null +++ b/checkpoint-1200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:935a8fb09a6e9698d9894853b05e181b3f56098deaaecddde08e55f06bf000c4 +size 623 diff --git a/checkpoint-1200/special_tokens_map.json b/checkpoint-1200/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-1200/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-1200/tokenizer_config.json b/checkpoint-1200/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-1200/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-1200/trainer_state.json b/checkpoint-1200/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..723a39bee09047ef17b8f360b73a09da5497004f --- /dev/null +++ b/checkpoint-1200/trainer_state.json @@ -0,0 +1,182 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 39.025, + "global_step": 1200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.9361, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.2553, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.7321, + "step": 100 + }, + { + "epoch": 4.03, + "learning_rate": 0.0005833333333333333, + "loss": 6.4515, + "step": 150 + }, + { + "epoch": 6.02, + "learning_rate": 0.0005555555555555556, + "loss": 6.2916, + "step": 200 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005277777777777777, + "loss": 6.103, + "step": 250 + }, + { + "epoch": 9.03, + "learning_rate": 0.0005, + "loss": 5.7716, + "step": 300 + }, + { + "epoch": 11.02, + "learning_rate": 0.00047222222222222224, + "loss": 5.4902, + "step": 350 + }, + { + "epoch": 13.01, + "learning_rate": 0.00044444444444444436, + "loss": 5.1844, + "step": 400 + }, + { + "epoch": 14.03, + "learning_rate": 0.00041666666666666664, + "loss": 4.8669, + "step": 450 + }, + { + "epoch": 16.02, + "learning_rate": 0.00038888888888888887, + "loss": 4.6547, + "step": 500 + }, + { + "epoch": 18.01, + "learning_rate": 0.0003611111111111111, + "loss": 4.4311, + "step": 550 + }, + { + "epoch": 19.02, + "learning_rate": 0.0003333333333333333, + "loss": 4.1923, + "step": 600 + }, + { + "epoch": 21.02, + "learning_rate": 0.00030555555555555555, + "loss": 4.0473, + "step": 650 + }, + { + "epoch": 23.01, + "learning_rate": 0.0002777777777777778, + "loss": 3.9047, + "step": 700 + }, + { + "epoch": 24.02, + "learning_rate": 0.00025, + "loss": 3.7631, + "step": 750 + }, + { + "epoch": 26.02, + "learning_rate": 0.00022222222222222218, + "loss": 3.7075, + "step": 800 + }, + { + "epoch": 28.01, + "learning_rate": 0.00019444444444444443, + "loss": 3.6326, + "step": 850 + }, + { + "epoch": 29.02, + "learning_rate": 0.00016666666666666666, + "loss": 3.5357, + "step": 900 + }, + { + "epoch": 31.02, + "learning_rate": 0.0001388888888888889, + "loss": 3.5105, + "step": 950 + }, + { + "epoch": 33.01, + "learning_rate": 0.00011111111111111109, + "loss": 3.4606, + "step": 1000 + }, + { + "epoch": 33.01, + "eval_loss": 4.271039009094238, + "eval_runtime": 1.2465, + "eval_samples_per_second": 89.851, + "eval_steps_per_second": 5.616, + "step": 1000 + }, + { + "epoch": 33.01, + "eval_/scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_loss": 4.271039009094238, + "eval_/scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_ppl": 71.59598586766586, + "eval_/scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_runtime": 1.2465, + "eval_/scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 89.851, + "step": 1000 + }, + { + "epoch": 34.02, + "learning_rate": 8.333333333333333e-05, + "loss": 3.3886, + "step": 1050 + }, + { + "epoch": 36.02, + "learning_rate": 5.5555555555555545e-05, + "loss": 3.3823, + "step": 1100 + }, + { + "epoch": 38.01, + "learning_rate": 2.7777777777777772e-05, + "loss": 3.3532, + "step": 1150 + }, + { + "epoch": 39.02, + "learning_rate": 0.0, + "loss": 3.305, + "step": 1200 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 3.2545751629824e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1200/training_args.bin b/checkpoint-1200/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..03b2288ee5b328686d8934e3fc4a6b925a9c3ede --- /dev/null +++ b/checkpoint-1200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdfe69ffbf98648e171ef448900d45919fc613a0e3b6af7ac7ce7cdbdf004776 +size 3183 diff --git a/checkpoint-200/config.json b/checkpoint-200/config.json new file mode 100755 index 0000000000000000000000000000000000000000..060b4e655747a8c6abaac05c63d6eacfe62a3ca1 --- /dev/null +++ b/checkpoint-200/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 1, + "embd_pdrop": 0.1, + "eos_token_id": 1, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50262 +} diff --git a/checkpoint-200/optimizer.pt b/checkpoint-200/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..53d2353f25b79d9930267645676f4e9f553fc14c --- /dev/null +++ b/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2cc090d9201ae81cb399217cf63316dd01922cb7ad321e48106411133568b61 +size 995634545 diff --git a/checkpoint-200/pytorch_model.bin b/checkpoint-200/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..de518801eabb833c19f0358e688a7c66fa3020fc --- /dev/null +++ b/checkpoint-200/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f405e20f1f54d911402196b841dbaa56c1011cd0556ed8e6a97b6a26fb37347 +size 510411881 diff --git a/checkpoint-200/rng_state.pth b/checkpoint-200/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..888f7b73fe0b9db63c3cee4c1e38703e9fe8f48c --- /dev/null +++ b/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:993b141be81f10c60640e3df393e433d1a2ae0ea46083e8469e9e467510ccd0d +size 14567 diff --git a/checkpoint-200/scaler.pt b/checkpoint-200/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..365b52ebf376498237a843f6d7332e5a49b14902 --- /dev/null +++ b/checkpoint-200/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb6982c29cd162f49aeb531674acf574eccd46a8f556bec596040d7c3b95200a +size 559 diff --git a/checkpoint-200/scheduler.pt b/checkpoint-200/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..36cb8da739c80a63971a62f06f781c40ac0fceb2 --- /dev/null +++ b/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a992625ada6d884e508ff9392d16738b4a4163147f8fcbf9f46be82ecae9888 +size 623 diff --git a/checkpoint-200/special_tokens_map.json b/checkpoint-200/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-200/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-200/tokenizer_config.json b/checkpoint-200/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-200/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-200/trainer_state.json b/checkpoint-200/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..1063368154655c16436268467b01f44b4f0da015 --- /dev/null +++ b/checkpoint-200/trainer_state.json @@ -0,0 +1,46 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 6.016666666666667, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.9361, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.2553, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.7321, + "step": 100 + }, + { + "epoch": 4.03, + "learning_rate": 0.0005833333333333333, + "loss": 6.4515, + "step": 150 + }, + { + "epoch": 6.02, + "learning_rate": 0.0005555555555555556, + "loss": 6.2916, + "step": 200 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 5.418674159616e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-200/training_args.bin b/checkpoint-200/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..03b2288ee5b328686d8934e3fc4a6b925a9c3ede --- /dev/null +++ b/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdfe69ffbf98648e171ef448900d45919fc613a0e3b6af7ac7ce7cdbdf004776 +size 3183 diff --git a/checkpoint-300/config.json b/checkpoint-300/config.json new file mode 100755 index 0000000000000000000000000000000000000000..060b4e655747a8c6abaac05c63d6eacfe62a3ca1 --- /dev/null +++ b/checkpoint-300/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 1, + "embd_pdrop": 0.1, + "eos_token_id": 1, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50262 +} diff --git a/checkpoint-300/optimizer.pt b/checkpoint-300/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..b1c64de1ca820d91c25f2fd9c99e8f5b7b63da78 --- /dev/null +++ b/checkpoint-300/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1efc9c959f3af6ee1a1180e6559a4b0b539a1c8d5795210618feff3ed174afd +size 995634737 diff --git a/checkpoint-300/pytorch_model.bin b/checkpoint-300/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..624882ceecc413091761e1621a5f1b95cce01dbd --- /dev/null +++ b/checkpoint-300/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32bf807ccd56e82113a4bddb1a50e9388b65ba8a7bd57ed3f56805f6ff054fcf +size 510411881 diff --git a/checkpoint-300/rng_state.pth b/checkpoint-300/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..9684c997f8000f4732417a06287c5ff07b7b8df6 --- /dev/null +++ b/checkpoint-300/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a648f543fc357be1d32dc8ea81d84963dd941e03a8ab639fc57e7dad629032b +size 14567 diff --git a/checkpoint-300/scaler.pt b/checkpoint-300/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..01066cf4761ea9d2f7962f5181762f7b08690b79 --- /dev/null +++ b/checkpoint-300/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0967b9f865f16344c55f5ccc3cf7d6e8e97ca61dda304e931ca6bad130f48dd1 +size 559 diff --git a/checkpoint-300/scheduler.pt b/checkpoint-300/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..681987f21e79cd4685aac32a5e6b74341e25d936 --- /dev/null +++ b/checkpoint-300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1db899b266916f792a0898ceb27a87eaf76647f10c29cc0c13ce22f12a12efd +size 623 diff --git a/checkpoint-300/special_tokens_map.json b/checkpoint-300/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-300/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-300/tokenizer_config.json b/checkpoint-300/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-300/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-300/trainer_state.json b/checkpoint-300/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..6736a503baa5863b2b74d9f6b3a5c1248f30a49d --- /dev/null +++ b/checkpoint-300/trainer_state.json @@ -0,0 +1,58 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 9.025, + "global_step": 300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.9361, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.2553, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.7321, + "step": 100 + }, + { + "epoch": 4.03, + "learning_rate": 0.0005833333333333333, + "loss": 6.4515, + "step": 150 + }, + { + "epoch": 6.02, + "learning_rate": 0.0005555555555555556, + "loss": 6.2916, + "step": 200 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005277777777777777, + "loss": 6.103, + "step": 250 + }, + { + "epoch": 9.03, + "learning_rate": 0.0005, + "loss": 5.7716, + "step": 300 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 8.128011239424e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-300/training_args.bin b/checkpoint-300/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..03b2288ee5b328686d8934e3fc4a6b925a9c3ede --- /dev/null +++ b/checkpoint-300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdfe69ffbf98648e171ef448900d45919fc613a0e3b6af7ac7ce7cdbdf004776 +size 3183 diff --git a/checkpoint-400/config.json b/checkpoint-400/config.json new file mode 100755 index 0000000000000000000000000000000000000000..060b4e655747a8c6abaac05c63d6eacfe62a3ca1 --- /dev/null +++ b/checkpoint-400/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 1, + "embd_pdrop": 0.1, + "eos_token_id": 1, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50262 +} diff --git a/checkpoint-400/optimizer.pt b/checkpoint-400/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..a3319b14148e1b62e106902a752638b1c8e80b16 --- /dev/null +++ b/checkpoint-400/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:788c6e9112a45f8d4fb61989260df94a0690b75f215a086b9369beae2dc172be +size 995634737 diff --git a/checkpoint-400/pytorch_model.bin b/checkpoint-400/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..cfa747e727221b2d7e5bfe756b5342fe6fabce74 --- /dev/null +++ b/checkpoint-400/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60aa41596e1d5b5bf974084d287c64cf88f25e9ccb42437dd9e05058af1c7643 +size 510411881 diff --git a/checkpoint-400/rng_state.pth b/checkpoint-400/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..7672476c18d28bf676b765446024205a037bc7a4 --- /dev/null +++ b/checkpoint-400/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e89428aa956f1eee85b600838254c64926c8df4a5ba5f342125169a9044e9462 +size 14567 diff --git a/checkpoint-400/scaler.pt b/checkpoint-400/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..9c7aef4199e98d81152810b661dbaffc01963383 --- /dev/null +++ b/checkpoint-400/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:476e510c8ea7edbd2b51d1e76a4e037820a5639381c0d8b5d32dafa492795a1e +size 559 diff --git a/checkpoint-400/scheduler.pt b/checkpoint-400/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..967673cfc91836d239beb5a7ede06992232ba309 --- /dev/null +++ b/checkpoint-400/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db087d678047b5c346bbb8511936612c1fdf223c6fd70321e97369bc31ed76a8 +size 623 diff --git a/checkpoint-400/special_tokens_map.json b/checkpoint-400/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-400/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-400/tokenizer_config.json b/checkpoint-400/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-400/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-400/trainer_state.json b/checkpoint-400/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..ac46c68c7342c883532229abc5eac2031319f02c --- /dev/null +++ b/checkpoint-400/trainer_state.json @@ -0,0 +1,70 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 13.008333333333333, + "global_step": 400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.9361, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.2553, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.7321, + "step": 100 + }, + { + "epoch": 4.03, + "learning_rate": 0.0005833333333333333, + "loss": 6.4515, + "step": 150 + }, + { + "epoch": 6.02, + "learning_rate": 0.0005555555555555556, + "loss": 6.2916, + "step": 200 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005277777777777777, + "loss": 6.103, + "step": 250 + }, + { + "epoch": 9.03, + "learning_rate": 0.0005, + "loss": 5.7716, + "step": 300 + }, + { + "epoch": 11.02, + "learning_rate": 0.00047222222222222224, + "loss": 5.4902, + "step": 350 + }, + { + "epoch": 13.01, + "learning_rate": 0.00044444444444444436, + "loss": 5.1844, + "step": 400 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.0848583876608e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-400/training_args.bin b/checkpoint-400/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..03b2288ee5b328686d8934e3fc4a6b925a9c3ede --- /dev/null +++ b/checkpoint-400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdfe69ffbf98648e171ef448900d45919fc613a0e3b6af7ac7ce7cdbdf004776 +size 3183 diff --git a/checkpoint-500/config.json b/checkpoint-500/config.json new file mode 100755 index 0000000000000000000000000000000000000000..060b4e655747a8c6abaac05c63d6eacfe62a3ca1 --- /dev/null +++ b/checkpoint-500/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 1, + "embd_pdrop": 0.1, + "eos_token_id": 1, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50262 +} diff --git a/checkpoint-500/optimizer.pt b/checkpoint-500/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..c366ca63486d305e93b4219a1741061c7b6b2ecb --- /dev/null +++ b/checkpoint-500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae8293a9a62df1514d9e9e54221f1df45bfaba974f7a97a22e5c79d251b26a22 +size 995634737 diff --git a/checkpoint-500/pytorch_model.bin b/checkpoint-500/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..a9c0a46fe3718c147c226790c02de24180f08c8c --- /dev/null +++ b/checkpoint-500/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0541068c5e683c62734703b55ab754fcdbf031e6ebc3d906002d1896e7f86754 +size 510411881 diff --git a/checkpoint-500/rng_state.pth b/checkpoint-500/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..eca80361e7d60e726f13ced015e37d00770f9d59 --- /dev/null +++ b/checkpoint-500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02e237e2a24b231b41d94fa91cae4541c9920c6fb2ce9b65801e51cd5a886061 +size 14567 diff --git a/checkpoint-500/scaler.pt b/checkpoint-500/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..e8b96c9c2837f2c95b1d07d4fc3f245f9ad1ef62 --- /dev/null +++ b/checkpoint-500/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fa4c7be44c959599b8b43bb9bc3371e9e4e5bbc5758b3ab5afcccfda3e72e67 +size 559 diff --git a/checkpoint-500/scheduler.pt b/checkpoint-500/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..77b958181a5b47b022b96b38a6207f274a1b6604 --- /dev/null +++ b/checkpoint-500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:026fae4a90d56c24de94b10dfa7a75b6ba4e43bd5c1a3fdb2d77356b81cd6f8a +size 623 diff --git a/checkpoint-500/special_tokens_map.json b/checkpoint-500/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-500/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-500/tokenizer_config.json b/checkpoint-500/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-500/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-500/trainer_state.json b/checkpoint-500/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..250e84c8ead2b7036f6ecd3e9341e82ee1ed4b67 --- /dev/null +++ b/checkpoint-500/trainer_state.json @@ -0,0 +1,82 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 16.016666666666666, + "global_step": 500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.9361, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.2553, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.7321, + "step": 100 + }, + { + "epoch": 4.03, + "learning_rate": 0.0005833333333333333, + "loss": 6.4515, + "step": 150 + }, + { + "epoch": 6.02, + "learning_rate": 0.0005555555555555556, + "loss": 6.2916, + "step": 200 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005277777777777777, + "loss": 6.103, + "step": 250 + }, + { + "epoch": 9.03, + "learning_rate": 0.0005, + "loss": 5.7716, + "step": 300 + }, + { + "epoch": 11.02, + "learning_rate": 0.00047222222222222224, + "loss": 5.4902, + "step": 350 + }, + { + "epoch": 13.01, + "learning_rate": 0.00044444444444444436, + "loss": 5.1844, + "step": 400 + }, + { + "epoch": 14.03, + "learning_rate": 0.00041666666666666664, + "loss": 4.8669, + "step": 450 + }, + { + "epoch": 16.02, + "learning_rate": 0.00038888888888888887, + "loss": 4.6547, + "step": 500 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.3557920956416e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-500/training_args.bin b/checkpoint-500/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..03b2288ee5b328686d8934e3fc4a6b925a9c3ede --- /dev/null +++ b/checkpoint-500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdfe69ffbf98648e171ef448900d45919fc613a0e3b6af7ac7ce7cdbdf004776 +size 3183 diff --git a/checkpoint-600/config.json b/checkpoint-600/config.json new file mode 100755 index 0000000000000000000000000000000000000000..060b4e655747a8c6abaac05c63d6eacfe62a3ca1 --- /dev/null +++ b/checkpoint-600/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 1, + "embd_pdrop": 0.1, + "eos_token_id": 1, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50262 +} diff --git a/checkpoint-600/optimizer.pt b/checkpoint-600/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..703274cb03c21130863ae0915cacfb338ca2bca9 --- /dev/null +++ b/checkpoint-600/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ece4b2c96a17b544fb81f453ffa8c26d461af768615e90f4929776002113af2 +size 995634737 diff --git a/checkpoint-600/pytorch_model.bin b/checkpoint-600/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..82f3e7fcd81e436b07ac3f3b78d9ff07e450f394 --- /dev/null +++ b/checkpoint-600/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab744acc087031a908e27ab8f3812d762379c84e08c1ab52ab66949242b0ae20 +size 510411881 diff --git a/checkpoint-600/rng_state.pth b/checkpoint-600/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..fe648c314f682ff8089a763146e0d4c179b771f3 --- /dev/null +++ b/checkpoint-600/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21da583a770b159980f9b81e686c2a65060d70578685cebd14d346d323e03193 +size 14567 diff --git a/checkpoint-600/scaler.pt b/checkpoint-600/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..f95b3e36da01561ec333a83ee8419ad225633e06 --- /dev/null +++ b/checkpoint-600/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e8415b86bbce347c0df306b84a695add049c2a3b2d0b6f4dda3bf036d341150 +size 559 diff --git a/checkpoint-600/scheduler.pt b/checkpoint-600/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..62e5359badd619e7dad2c51d98fa8043d9948f0b --- /dev/null +++ b/checkpoint-600/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:700940432b1c2117248896e2ce5a58d93c051d92ea97707f74d76bf1ef24deee +size 623 diff --git a/checkpoint-600/special_tokens_map.json b/checkpoint-600/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-600/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-600/tokenizer_config.json b/checkpoint-600/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-600/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-600/trainer_state.json b/checkpoint-600/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..5ed197210a5eab7ccce2d9b2dedbd8b82a77f85c --- /dev/null +++ b/checkpoint-600/trainer_state.json @@ -0,0 +1,94 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 19.025, + "global_step": 600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.9361, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.2553, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.7321, + "step": 100 + }, + { + "epoch": 4.03, + "learning_rate": 0.0005833333333333333, + "loss": 6.4515, + "step": 150 + }, + { + "epoch": 6.02, + "learning_rate": 0.0005555555555555556, + "loss": 6.2916, + "step": 200 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005277777777777777, + "loss": 6.103, + "step": 250 + }, + { + "epoch": 9.03, + "learning_rate": 0.0005, + "loss": 5.7716, + "step": 300 + }, + { + "epoch": 11.02, + "learning_rate": 0.00047222222222222224, + "loss": 5.4902, + "step": 350 + }, + { + "epoch": 13.01, + "learning_rate": 0.00044444444444444436, + "loss": 5.1844, + "step": 400 + }, + { + "epoch": 14.03, + "learning_rate": 0.00041666666666666664, + "loss": 4.8669, + "step": 450 + }, + { + "epoch": 16.02, + "learning_rate": 0.00038888888888888887, + "loss": 4.6547, + "step": 500 + }, + { + "epoch": 18.01, + "learning_rate": 0.0003611111111111111, + "loss": 4.4311, + "step": 550 + }, + { + "epoch": 19.02, + "learning_rate": 0.0003333333333333333, + "loss": 4.1923, + "step": 600 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.6267258036224e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-600/training_args.bin b/checkpoint-600/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..03b2288ee5b328686d8934e3fc4a6b925a9c3ede --- /dev/null +++ b/checkpoint-600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdfe69ffbf98648e171ef448900d45919fc613a0e3b6af7ac7ce7cdbdf004776 +size 3183 diff --git a/checkpoint-700/config.json b/checkpoint-700/config.json new file mode 100755 index 0000000000000000000000000000000000000000..060b4e655747a8c6abaac05c63d6eacfe62a3ca1 --- /dev/null +++ b/checkpoint-700/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 1, + "embd_pdrop": 0.1, + "eos_token_id": 1, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50262 +} diff --git a/checkpoint-700/optimizer.pt b/checkpoint-700/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..389ec1b4801e4a8af5749483c8a784a3e55ad35c --- /dev/null +++ b/checkpoint-700/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:508fda792863420a0b3ca8bd3457d65e06e007e3926dec4d17ebd38e45954d15 +size 995634737 diff --git a/checkpoint-700/pytorch_model.bin b/checkpoint-700/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..3df9d61687b65f0530ac7bfd68726788e57653e4 --- /dev/null +++ b/checkpoint-700/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79b276af9a14ac4432ddb743086c1126e3f940b1e61a8a350b56239f93002036 +size 510411881 diff --git a/checkpoint-700/rng_state.pth b/checkpoint-700/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..0561ed82d15c04f859b0aa96893cc915ea014e41 --- /dev/null +++ b/checkpoint-700/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d99ac0a5aa8c5f7e3fd3b9a862020097240df57808ccfbde51c14d5cfb452ce +size 14567 diff --git a/checkpoint-700/scaler.pt b/checkpoint-700/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..12f2b8ec834e54a2bd7cfdd0e07b0c6e125b6490 --- /dev/null +++ b/checkpoint-700/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fb213daf5cce18a5f92167ca14da9df084d907f2b9796efc4666630f312b58c +size 559 diff --git a/checkpoint-700/scheduler.pt b/checkpoint-700/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..42408b6253265af34ae78746144fbba9316e0d7e --- /dev/null +++ b/checkpoint-700/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2d4910fd408e002ebeff50d62bfb043dcae5ef658777d0c3ee4a3bbb515ec15 +size 623 diff --git a/checkpoint-700/special_tokens_map.json b/checkpoint-700/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-700/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-700/tokenizer_config.json b/checkpoint-700/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-700/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-700/trainer_state.json b/checkpoint-700/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..bebcc1b81b47e7c1c2441e44620e8476f4d173e1 --- /dev/null +++ b/checkpoint-700/trainer_state.json @@ -0,0 +1,106 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 23.008333333333333, + "global_step": 700, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.9361, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.2553, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.7321, + "step": 100 + }, + { + "epoch": 4.03, + "learning_rate": 0.0005833333333333333, + "loss": 6.4515, + "step": 150 + }, + { + "epoch": 6.02, + "learning_rate": 0.0005555555555555556, + "loss": 6.2916, + "step": 200 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005277777777777777, + "loss": 6.103, + "step": 250 + }, + { + "epoch": 9.03, + "learning_rate": 0.0005, + "loss": 5.7716, + "step": 300 + }, + { + "epoch": 11.02, + "learning_rate": 0.00047222222222222224, + "loss": 5.4902, + "step": 350 + }, + { + "epoch": 13.01, + "learning_rate": 0.00044444444444444436, + "loss": 5.1844, + "step": 400 + }, + { + "epoch": 14.03, + "learning_rate": 0.00041666666666666664, + "loss": 4.8669, + "step": 450 + }, + { + "epoch": 16.02, + "learning_rate": 0.00038888888888888887, + "loss": 4.6547, + "step": 500 + }, + { + "epoch": 18.01, + "learning_rate": 0.0003611111111111111, + "loss": 4.4311, + "step": 550 + }, + { + "epoch": 19.02, + "learning_rate": 0.0003333333333333333, + "loss": 4.1923, + "step": 600 + }, + { + "epoch": 21.02, + "learning_rate": 0.00030555555555555555, + "loss": 4.0473, + "step": 650 + }, + { + "epoch": 23.01, + "learning_rate": 0.0002777777777777778, + "loss": 3.9047, + "step": 700 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.8987830673408e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-700/training_args.bin b/checkpoint-700/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..03b2288ee5b328686d8934e3fc4a6b925a9c3ede --- /dev/null +++ b/checkpoint-700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdfe69ffbf98648e171ef448900d45919fc613a0e3b6af7ac7ce7cdbdf004776 +size 3183 diff --git a/checkpoint-800/config.json b/checkpoint-800/config.json new file mode 100755 index 0000000000000000000000000000000000000000..060b4e655747a8c6abaac05c63d6eacfe62a3ca1 --- /dev/null +++ b/checkpoint-800/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 1, + "embd_pdrop": 0.1, + "eos_token_id": 1, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50262 +} diff --git a/checkpoint-800/optimizer.pt b/checkpoint-800/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..79c77cf942b64f85649dc8d033b7429ac24b3e62 --- /dev/null +++ b/checkpoint-800/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71fa0a9c138390899e11d92d0a27f9c16c86e61877cb1cf3104e2b07ef3b5395 +size 995634737 diff --git a/checkpoint-800/pytorch_model.bin b/checkpoint-800/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..d9ef4976dd5c2c9bd6478ac939f44899e2f1de8c --- /dev/null +++ b/checkpoint-800/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64a214dc3e67236cd1a3a4e568f414d7db6d8db37d542eda584f15f6fe4630dd +size 510411881 diff --git a/checkpoint-800/rng_state.pth b/checkpoint-800/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..ec1566b87ec7d4c30be8ae7f28870dbd3b5875a8 --- /dev/null +++ b/checkpoint-800/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:678a687eaa0ef6d5c8aaf3de3f0c8da5eea4a5c54908dfd0bef193e54218dfd7 +size 14567 diff --git a/checkpoint-800/scaler.pt b/checkpoint-800/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..b3c73372264156b02df8dada2192ee3c96dd5fc4 --- /dev/null +++ b/checkpoint-800/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c2074cdcefbaa0a39f736d6b0f7bf018c350d49e85648bc8accc4f756ad816e +size 559 diff --git a/checkpoint-800/scheduler.pt b/checkpoint-800/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..75713dbce3771306ca00343ecc497c4f19a01d03 --- /dev/null +++ b/checkpoint-800/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b27fb255c84833fb6ab5d93679cb236a569de9a1c4f805f72a2f60a2bc7c7499 +size 623 diff --git a/checkpoint-800/special_tokens_map.json b/checkpoint-800/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-800/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-800/tokenizer_config.json b/checkpoint-800/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-800/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-800/trainer_state.json b/checkpoint-800/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..6d897167b0d362d3d84d39c0fcdd5942dd413170 --- /dev/null +++ b/checkpoint-800/trainer_state.json @@ -0,0 +1,118 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 26.016666666666666, + "global_step": 800, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.9361, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.2553, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.7321, + "step": 100 + }, + { + "epoch": 4.03, + "learning_rate": 0.0005833333333333333, + "loss": 6.4515, + "step": 150 + }, + { + "epoch": 6.02, + "learning_rate": 0.0005555555555555556, + "loss": 6.2916, + "step": 200 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005277777777777777, + "loss": 6.103, + "step": 250 + }, + { + "epoch": 9.03, + "learning_rate": 0.0005, + "loss": 5.7716, + "step": 300 + }, + { + "epoch": 11.02, + "learning_rate": 0.00047222222222222224, + "loss": 5.4902, + "step": 350 + }, + { + "epoch": 13.01, + "learning_rate": 0.00044444444444444436, + "loss": 5.1844, + "step": 400 + }, + { + "epoch": 14.03, + "learning_rate": 0.00041666666666666664, + "loss": 4.8669, + "step": 450 + }, + { + "epoch": 16.02, + "learning_rate": 0.00038888888888888887, + "loss": 4.6547, + "step": 500 + }, + { + "epoch": 18.01, + "learning_rate": 0.0003611111111111111, + "loss": 4.4311, + "step": 550 + }, + { + "epoch": 19.02, + "learning_rate": 0.0003333333333333333, + "loss": 4.1923, + "step": 600 + }, + { + "epoch": 21.02, + "learning_rate": 0.00030555555555555555, + "loss": 4.0473, + "step": 650 + }, + { + "epoch": 23.01, + "learning_rate": 0.0002777777777777778, + "loss": 3.9047, + "step": 700 + }, + { + "epoch": 24.02, + "learning_rate": 0.00025, + "loss": 3.7631, + "step": 750 + }, + { + "epoch": 26.02, + "learning_rate": 0.00022222222222222218, + "loss": 3.7075, + "step": 800 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.1697167753216e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-800/training_args.bin b/checkpoint-800/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..03b2288ee5b328686d8934e3fc4a6b925a9c3ede --- /dev/null +++ b/checkpoint-800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdfe69ffbf98648e171ef448900d45919fc613a0e3b6af7ac7ce7cdbdf004776 +size 3183 diff --git a/checkpoint-900/config.json b/checkpoint-900/config.json new file mode 100755 index 0000000000000000000000000000000000000000..060b4e655747a8c6abaac05c63d6eacfe62a3ca1 --- /dev/null +++ b/checkpoint-900/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 1, + "embd_pdrop": 0.1, + "eos_token_id": 1, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50262 +} diff --git a/checkpoint-900/optimizer.pt b/checkpoint-900/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..d65d70742ebde370d872633d48c9016bdae07cb8 --- /dev/null +++ b/checkpoint-900/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ff135cb21e9fef2c49c90e7799bdb2b7e904f8ff7fd29b0520924ac01fc2c80 +size 995634737 diff --git a/checkpoint-900/pytorch_model.bin b/checkpoint-900/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..65b24c714d475edfd7422543d1158ae9ad960c23 --- /dev/null +++ b/checkpoint-900/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2854b1920055baec5bcedfa48bd87af7d407928d4740ba46089ce72885dad281 +size 510411881 diff --git a/checkpoint-900/rng_state.pth b/checkpoint-900/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..0ece0d4edfe0522bb483d170e0ae3b18deadb5f3 --- /dev/null +++ b/checkpoint-900/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c88a3cdb324334640dff50e6031c32dba40ca0ccb1cc9d368a79d2df45294ad +size 14567 diff --git a/checkpoint-900/scaler.pt b/checkpoint-900/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..be54cb13c777bc6feccb478ff218e7e21fad482a --- /dev/null +++ b/checkpoint-900/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8695f57df923e22b943b0b0f2b9cc7007008e80b53ccee275b3a35963fe67e9 +size 559 diff --git a/checkpoint-900/scheduler.pt b/checkpoint-900/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..539d7c83ea252818dc9cbffac08cf340bb05a454 --- /dev/null +++ b/checkpoint-900/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2cb09e5db72772a15094286e93cbb61d745d9b63863703cf53da0bcb9827821 +size 623 diff --git a/checkpoint-900/special_tokens_map.json b/checkpoint-900/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-900/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-900/tokenizer_config.json b/checkpoint-900/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-900/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-900/trainer_state.json b/checkpoint-900/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..310f12d32e191e522d10bc1fe868f718c0f7fd8d --- /dev/null +++ b/checkpoint-900/trainer_state.json @@ -0,0 +1,130 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 29.025, + "global_step": 900, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.9361, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.2553, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.7321, + "step": 100 + }, + { + "epoch": 4.03, + "learning_rate": 0.0005833333333333333, + "loss": 6.4515, + "step": 150 + }, + { + "epoch": 6.02, + "learning_rate": 0.0005555555555555556, + "loss": 6.2916, + "step": 200 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005277777777777777, + "loss": 6.103, + "step": 250 + }, + { + "epoch": 9.03, + "learning_rate": 0.0005, + "loss": 5.7716, + "step": 300 + }, + { + "epoch": 11.02, + "learning_rate": 0.00047222222222222224, + "loss": 5.4902, + "step": 350 + }, + { + "epoch": 13.01, + "learning_rate": 0.00044444444444444436, + "loss": 5.1844, + "step": 400 + }, + { + "epoch": 14.03, + "learning_rate": 0.00041666666666666664, + "loss": 4.8669, + "step": 450 + }, + { + "epoch": 16.02, + "learning_rate": 0.00038888888888888887, + "loss": 4.6547, + "step": 500 + }, + { + "epoch": 18.01, + "learning_rate": 0.0003611111111111111, + "loss": 4.4311, + "step": 550 + }, + { + "epoch": 19.02, + "learning_rate": 0.0003333333333333333, + "loss": 4.1923, + "step": 600 + }, + { + "epoch": 21.02, + "learning_rate": 0.00030555555555555555, + "loss": 4.0473, + "step": 650 + }, + { + "epoch": 23.01, + "learning_rate": 0.0002777777777777778, + "loss": 3.9047, + "step": 700 + }, + { + "epoch": 24.02, + "learning_rate": 0.00025, + "loss": 3.7631, + "step": 750 + }, + { + "epoch": 26.02, + "learning_rate": 0.00022222222222222218, + "loss": 3.7075, + "step": 800 + }, + { + "epoch": 28.01, + "learning_rate": 0.00019444444444444443, + "loss": 3.6326, + "step": 850 + }, + { + "epoch": 29.02, + "learning_rate": 0.00016666666666666666, + "loss": 3.5357, + "step": 900 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.4406504833024e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-900/training_args.bin b/checkpoint-900/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..03b2288ee5b328686d8934e3fc4a6b925a9c3ede --- /dev/null +++ b/checkpoint-900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdfe69ffbf98648e171ef448900d45919fc613a0e3b6af7ac7ce7cdbdf004776 +size 3183 diff --git a/config.json b/config.json new file mode 100755 index 0000000000000000000000000000000000000000..060b4e655747a8c6abaac05c63d6eacfe62a3ca1 --- /dev/null +++ b/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 1, + "embd_pdrop": 0.1, + "eos_token_id": 1, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50262 +} diff --git a/metrics.json b/metrics.json new file mode 100755 index 0000000000000000000000000000000000000000..2e8320b705abbfe19cc20692a221f82ff2cbaaa0 --- /dev/null +++ b/metrics.json @@ -0,0 +1,2500 @@ +{"num_parameters": 124443648, "trainable_parameters": 124443648, "step": 0} +{"train_info/time_between_train_steps": 8.624183654785156, "step": 0} +{"info/global_step": 1, "train_info/time_within_train_step": 20.614469289779663, "step": 1} +{"train_info": {"train_info/memory_allocated": 1920.990234375, "train_info/memory_max_allocated": 19761.5078125, "train_info/memory_reserved": 22624.0, "train_info/memory_max_reserved": 22624.0, "_timestamp": 1737366065, "_runtime": 60}, "step": 1} +{"logs": {"train/loss": 10.9361, "train/learning_rate": 4.9999999999999996e-06, "train/epoch": 0.0, "_timestamp": 1737366065, "_runtime": 60}, "step": 1} +{"train_info/time_between_train_steps": 0.07497382164001465, "step": 1} +{"info/global_step": 2, "train_info/time_within_train_step": 13.089757442474365, "step": 2} +{"train_info/time_between_train_steps": 0.0061795711517333984, "step": 2} +{"info/global_step": 3, "train_info/time_within_train_step": 13.011180877685547, "step": 3} +{"train_info/time_between_train_steps": 0.0061876773834228516, "step": 3} +{"info/global_step": 4, "train_info/time_within_train_step": 13.080620288848877, "step": 4} +{"train_info/time_between_train_steps": 0.00603938102722168, "step": 4} +{"info/global_step": 5, "train_info/time_within_train_step": 12.98549485206604, "step": 5} +{"train_info/time_between_train_steps": 0.006004810333251953, "step": 5} +{"info/global_step": 6, "train_info/time_within_train_step": 13.120012760162354, "step": 6} +{"train_info/time_between_train_steps": 0.005980730056762695, "step": 6} +{"info/global_step": 7, "train_info/time_within_train_step": 13.021527767181396, "step": 7} +{"train_info/time_between_train_steps": 0.005632162094116211, "step": 7} +{"info/global_step": 8, "train_info/time_within_train_step": 13.127670049667358, "step": 8} +{"train_info/time_between_train_steps": 0.005761384963989258, "step": 8} +{"info/global_step": 9, "train_info/time_within_train_step": 12.99111557006836, "step": 9} +{"train_info/time_between_train_steps": 0.006056308746337891, "step": 9} +{"info/global_step": 10, "train_info/time_within_train_step": 13.087145805358887, "step": 10} +{"train_info/time_between_train_steps": 0.005677461624145508, "step": 10} +{"info/global_step": 11, "train_info/time_within_train_step": 13.030017614364624, "step": 11} +{"train_info/time_between_train_steps": 0.005667448043823242, "step": 11} +{"info/global_step": 12, "train_info/time_within_train_step": 13.0115807056427, "step": 12} +{"train_info/time_between_train_steps": 0.005311727523803711, "step": 12} +{"info/global_step": 13, "train_info/time_within_train_step": 12.984360218048096, "step": 13} +{"train_info/time_between_train_steps": 0.005504131317138672, "step": 13} +{"info/global_step": 14, "train_info/time_within_train_step": 12.991422414779663, "step": 14} +{"train_info/time_between_train_steps": 0.005570650100708008, "step": 14} +{"info/global_step": 15, "train_info/time_within_train_step": 12.986536979675293, "step": 15} +{"train_info/time_between_train_steps": 0.0056476593017578125, "step": 15} +{"info/global_step": 16, "train_info/time_within_train_step": 13.203462600708008, "step": 16} +{"train_info/time_between_train_steps": 0.005703449249267578, "step": 16} +{"info/global_step": 17, "train_info/time_within_train_step": 12.986371517181396, "step": 17} +{"train_info/time_between_train_steps": 0.005903482437133789, "step": 17} +{"info/global_step": 18, "train_info/time_within_train_step": 12.982122659683228, "step": 18} +{"train_info/time_between_train_steps": 0.005009174346923828, "step": 18} +{"info/global_step": 19, "train_info/time_within_train_step": 13.01166558265686, "step": 19} +{"train_info/time_between_train_steps": 0.005098104476928711, "step": 19} +{"info/global_step": 20, "train_info/time_within_train_step": 12.998291969299316, "step": 20} +{"train_info/time_between_train_steps": 0.005018711090087891, "step": 20} +{"info/global_step": 21, "train_info/time_within_train_step": 12.989188194274902, "step": 21} +{"train_info/time_between_train_steps": 0.0052492618560791016, "step": 21} +{"info/global_step": 22, "train_info/time_within_train_step": 12.986661434173584, "step": 22} +{"train_info/time_between_train_steps": 0.00525355339050293, "step": 22} +{"info/global_step": 23, "train_info/time_within_train_step": 12.973515510559082, "step": 23} +{"train_info/time_between_train_steps": 0.004584312438964844, "step": 23} +{"info/global_step": 24, "train_info/time_within_train_step": 13.047977209091187, "step": 24} +{"train_info/time_between_train_steps": 0.004925966262817383, "step": 24} +{"info/global_step": 25, "train_info/time_within_train_step": 12.97696852684021, "step": 25} +{"train_info/time_between_train_steps": 0.0057926177978515625, "step": 25} +{"info/global_step": 26, "train_info/time_within_train_step": 12.991503238677979, "step": 26} +{"train_info/time_between_train_steps": 0.005011320114135742, "step": 26} +{"info/global_step": 27, "train_info/time_within_train_step": 12.981616258621216, "step": 27} +{"train_info/time_between_train_steps": 0.005195140838623047, "step": 27} +{"info/global_step": 28, "train_info/time_within_train_step": 13.006869554519653, "step": 28} +{"train_info/time_between_train_steps": 0.0056552886962890625, "step": 28} +{"info/global_step": 29, "train_info/time_within_train_step": 12.990041255950928, "step": 29} +{"train_info/time_between_train_steps": 0.006022214889526367, "step": 29} +{"info/global_step": 30, "train_info/time_within_train_step": 13.0190908908844, "step": 30} +{"train_info/time_between_train_steps": 0.006674528121948242, "step": 30} +{"train_info/time_between_train_steps": 7.983334302902222, "step": 30} +{"info/global_step": 31, "train_info/time_within_train_step": 13.034127950668335, "step": 31} +{"train_info/time_between_train_steps": 0.005341291427612305, "step": 31} +{"info/global_step": 32, "train_info/time_within_train_step": 13.150924444198608, "step": 32} +{"train_info/time_between_train_steps": 0.005483150482177734, "step": 32} +{"info/global_step": 33, "train_info/time_within_train_step": 12.963136672973633, "step": 33} +{"train_info/time_between_train_steps": 0.004976749420166016, "step": 33} +{"info/global_step": 34, "train_info/time_within_train_step": 13.087929248809814, "step": 34} +{"train_info/time_between_train_steps": 0.005232572555541992, "step": 34} +{"info/global_step": 35, "train_info/time_within_train_step": 12.985700368881226, "step": 35} +{"train_info/time_between_train_steps": 0.006004810333251953, "step": 35} +{"info/global_step": 36, "train_info/time_within_train_step": 13.082498788833618, "step": 36} +{"train_info/time_between_train_steps": 0.005254030227661133, "step": 36} +{"info/global_step": 37, "train_info/time_within_train_step": 13.059250593185425, "step": 37} +{"train_info/time_between_train_steps": 0.005518674850463867, "step": 37} +{"info/global_step": 38, "train_info/time_within_train_step": 13.067533731460571, "step": 38} +{"train_info/time_between_train_steps": 0.005269289016723633, "step": 38} +{"info/global_step": 39, "train_info/time_within_train_step": 13.011971950531006, "step": 39} +{"train_info/time_between_train_steps": 0.005588054656982422, "step": 39} +{"info/global_step": 40, "train_info/time_within_train_step": 13.055757761001587, "step": 40} +{"train_info/time_between_train_steps": 0.005520343780517578, "step": 40} +{"info/global_step": 41, "train_info/time_within_train_step": 13.000918626785278, "step": 41} +{"train_info/time_between_train_steps": 0.00554203987121582, "step": 41} +{"info/global_step": 42, "train_info/time_within_train_step": 12.96752667427063, "step": 42} +{"train_info/time_between_train_steps": 0.005194664001464844, "step": 42} +{"info/global_step": 43, "train_info/time_within_train_step": 12.98318338394165, "step": 43} +{"train_info/time_between_train_steps": 0.005406618118286133, "step": 43} +{"info/global_step": 44, "train_info/time_within_train_step": 12.962690830230713, "step": 44} +{"train_info/time_between_train_steps": 0.005079746246337891, "step": 44} +{"info/global_step": 45, "train_info/time_within_train_step": 12.964958429336548, "step": 45} +{"train_info/time_between_train_steps": 0.0054013729095458984, "step": 45} +{"info/global_step": 46, "train_info/time_within_train_step": 12.982802152633667, "step": 46} +{"train_info/time_between_train_steps": 0.005320072174072266, "step": 46} +{"info/global_step": 47, "train_info/time_within_train_step": 13.055700302124023, "step": 47} +{"train_info/time_between_train_steps": 0.005585908889770508, "step": 47} +{"info/global_step": 48, "train_info/time_within_train_step": 13.017975568771362, "step": 48} +{"train_info/time_between_train_steps": 0.004712820053100586, "step": 48} +{"info/global_step": 49, "train_info/time_within_train_step": 12.972121000289917, "step": 49} +{"train_info/time_between_train_steps": 0.005066871643066406, "step": 49} +{"info/global_step": 50, "train_info/time_within_train_step": 12.989566564559937, "step": 50} +{"train_info": {"train_info/memory_allocated": 1920.990234375, "train_info/memory_max_allocated": 20713.404296875, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1737366713, "_runtime": 708}, "step": 50} +{"logs": {"train/loss": 8.2553, "train/learning_rate": 0.00025, "train/epoch": 1.02, "_timestamp": 1737366713, "_runtime": 708}, "step": 50} +{"train_info/time_between_train_steps": 0.09197211265563965, "step": 50} +{"info/global_step": 51, "train_info/time_within_train_step": 13.05133056640625, "step": 51} +{"train_info/time_between_train_steps": 0.005122184753417969, "step": 51} +{"info/global_step": 52, "train_info/time_within_train_step": 12.983853340148926, "step": 52} +{"train_info/time_between_train_steps": 0.005301475524902344, "step": 52} +{"info/global_step": 53, "train_info/time_within_train_step": 13.007314443588257, "step": 53} +{"train_info/time_between_train_steps": 0.005457162857055664, "step": 53} +{"info/global_step": 54, "train_info/time_within_train_step": 12.985655546188354, "step": 54} +{"train_info/time_between_train_steps": 0.005654573440551758, "step": 54} +{"info/global_step": 55, "train_info/time_within_train_step": 12.985962867736816, "step": 55} +{"train_info/time_between_train_steps": 0.005125284194946289, "step": 55} +{"info/global_step": 56, "train_info/time_within_train_step": 12.988164901733398, "step": 56} +{"train_info/time_between_train_steps": 0.005770206451416016, "step": 56} +{"info/global_step": 57, "train_info/time_within_train_step": 13.021440505981445, "step": 57} +{"train_info/time_between_train_steps": 0.005779743194580078, "step": 57} +{"info/global_step": 58, "train_info/time_within_train_step": 12.990066051483154, "step": 58} +{"train_info/time_between_train_steps": 0.005614519119262695, "step": 58} +{"info/global_step": 59, "train_info/time_within_train_step": 13.006086826324463, "step": 59} +{"train_info/time_between_train_steps": 0.0059661865234375, "step": 59} +{"info/global_step": 60, "train_info/time_within_train_step": 13.060399532318115, "step": 60} +{"train_info/time_between_train_steps": 0.0059697628021240234, "step": 60} +{"train_info/time_between_train_steps": 7.950210332870483, "step": 60} +{"info/global_step": 61, "train_info/time_within_train_step": 12.97272515296936, "step": 61} +{"train_info/time_between_train_steps": 0.0054073333740234375, "step": 61} +{"info/global_step": 62, "train_info/time_within_train_step": 13.17457628250122, "step": 62} +{"train_info/time_between_train_steps": 0.005917072296142578, "step": 62} +{"info/global_step": 63, "train_info/time_within_train_step": 12.978674173355103, "step": 63} +{"train_info/time_between_train_steps": 0.0051305294036865234, "step": 63} +{"info/global_step": 64, "train_info/time_within_train_step": 13.079577684402466, "step": 64} +{"train_info/time_between_train_steps": 0.005265712738037109, "step": 64} +{"info/global_step": 65, "train_info/time_within_train_step": 12.979259490966797, "step": 65} +{"train_info/time_between_train_steps": 0.0050868988037109375, "step": 65} +{"info/global_step": 66, "train_info/time_within_train_step": 13.093592882156372, "step": 66} +{"train_info/time_between_train_steps": 0.005781412124633789, "step": 66} +{"info/global_step": 67, "train_info/time_within_train_step": 13.012487411499023, "step": 67} +{"train_info/time_between_train_steps": 0.0059299468994140625, "step": 67} +{"info/global_step": 68, "train_info/time_within_train_step": 13.140727758407593, "step": 68} +{"train_info/time_between_train_steps": 0.005791425704956055, "step": 68} +{"info/global_step": 69, "train_info/time_within_train_step": 12.995261192321777, "step": 69} +{"train_info/time_between_train_steps": 0.005829572677612305, "step": 69} +{"info/global_step": 70, "train_info/time_within_train_step": 13.065617799758911, "step": 70} +{"train_info/time_between_train_steps": 0.005663871765136719, "step": 70} +{"info/global_step": 71, "train_info/time_within_train_step": 12.999446392059326, "step": 71} +{"train_info/time_between_train_steps": 0.004969120025634766, "step": 71} +{"info/global_step": 72, "train_info/time_within_train_step": 12.973588228225708, "step": 72} +{"train_info/time_between_train_steps": 0.005454540252685547, "step": 72} +{"info/global_step": 73, "train_info/time_within_train_step": 13.029716730117798, "step": 73} +{"train_info/time_between_train_steps": 0.004878520965576172, "step": 73} +{"info/global_step": 74, "train_info/time_within_train_step": 12.97395658493042, "step": 74} +{"train_info/time_between_train_steps": 0.0051538944244384766, "step": 74} +{"info/global_step": 75, "train_info/time_within_train_step": 12.989373207092285, "step": 75} +{"train_info/time_between_train_steps": 0.00531458854675293, "step": 75} +{"info/global_step": 76, "train_info/time_within_train_step": 12.982717037200928, "step": 76} +{"train_info/time_between_train_steps": 0.005158424377441406, "step": 76} +{"info/global_step": 77, "train_info/time_within_train_step": 12.987114667892456, "step": 77} +{"train_info/time_between_train_steps": 0.005163431167602539, "step": 77} +{"info/global_step": 78, "train_info/time_within_train_step": 13.087860584259033, "step": 78} +{"train_info/time_between_train_steps": 0.005201101303100586, "step": 78} +{"info/global_step": 79, "train_info/time_within_train_step": 12.98122239112854, "step": 79} +{"train_info/time_between_train_steps": 0.005162954330444336, "step": 79} +{"info/global_step": 80, "train_info/time_within_train_step": 12.996147632598877, "step": 80} +{"train_info/time_between_train_steps": 0.00561833381652832, "step": 80} +{"info/global_step": 81, "train_info/time_within_train_step": 12.984272003173828, "step": 81} +{"train_info/time_between_train_steps": 0.00509953498840332, "step": 81} +{"info/global_step": 82, "train_info/time_within_train_step": 12.99940276145935, "step": 82} +{"train_info/time_between_train_steps": 0.005492210388183594, "step": 82} +{"info/global_step": 83, "train_info/time_within_train_step": 12.982632160186768, "step": 83} +{"train_info/time_between_train_steps": 0.004968166351318359, "step": 83} +{"info/global_step": 84, "train_info/time_within_train_step": 12.982250213623047, "step": 84} +{"train_info/time_between_train_steps": 0.005354881286621094, "step": 84} +{"info/global_step": 85, "train_info/time_within_train_step": 12.99566912651062, "step": 85} +{"train_info/time_between_train_steps": 0.005209207534790039, "step": 85} +{"info/global_step": 86, "train_info/time_within_train_step": 13.00606107711792, "step": 86} +{"train_info/time_between_train_steps": 0.005563497543334961, "step": 86} +{"info/global_step": 87, "train_info/time_within_train_step": 13.037090301513672, "step": 87} +{"train_info/time_between_train_steps": 0.005427837371826172, "step": 87} +{"info/global_step": 88, "train_info/time_within_train_step": 13.005820751190186, "step": 88} +{"train_info/time_between_train_steps": 0.005858659744262695, "step": 88} +{"info/global_step": 89, "train_info/time_within_train_step": 13.007030248641968, "step": 89} +{"train_info/time_between_train_steps": 0.006066799163818359, "step": 89} +{"info/global_step": 90, "train_info/time_within_train_step": 13.030298471450806, "step": 90} +{"train_info/time_between_train_steps": 0.005869626998901367, "step": 90} +{"train_info/time_between_train_steps": 8.127737998962402, "step": 90} +{"info/global_step": 91, "train_info/time_within_train_step": 13.055898427963257, "step": 91} +{"train_info/time_between_train_steps": 0.0053288936614990234, "step": 91} +{"info/global_step": 92, "train_info/time_within_train_step": 13.050235986709595, "step": 92} +{"train_info/time_between_train_steps": 0.005445003509521484, "step": 92} +{"info/global_step": 93, "train_info/time_within_train_step": 13.076831817626953, "step": 93} +{"train_info/time_between_train_steps": 0.005295276641845703, "step": 93} +{"info/global_step": 94, "train_info/time_within_train_step": 13.103455543518066, "step": 94} +{"train_info/time_between_train_steps": 0.0052814483642578125, "step": 94} +{"info/global_step": 95, "train_info/time_within_train_step": 13.027986764907837, "step": 95} +{"train_info/time_between_train_steps": 0.005186319351196289, "step": 95} +{"info/global_step": 96, "train_info/time_within_train_step": 13.091334104537964, "step": 96} +{"train_info/time_between_train_steps": 0.0047953128814697266, "step": 96} +{"info/global_step": 97, "train_info/time_within_train_step": 13.021955251693726, "step": 97} +{"train_info/time_between_train_steps": 0.0056569576263427734, "step": 97} +{"info/global_step": 98, "train_info/time_within_train_step": 13.089469909667969, "step": 98} +{"train_info/time_between_train_steps": 0.005995512008666992, "step": 98} +{"info/global_step": 99, "train_info/time_within_train_step": 12.998111486434937, "step": 99} +{"train_info/time_between_train_steps": 0.005044698715209961, "step": 99} +{"info/global_step": 100, "train_info/time_within_train_step": 13.128441333770752, "step": 100} +{"train_info": {"train_info/memory_allocated": 1920.990234375, "train_info/memory_max_allocated": 20713.404296875, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1737367383, "_runtime": 1378}, "step": 100} +{"logs": {"train/loss": 6.7321, "train/learning_rate": 0.0005, "train/epoch": 3.01, "_timestamp": 1737367383, "_runtime": 1378}, "step": 100} +{"train_info/time_between_train_steps": 565.9451339244843, "step": 100} +{"info/global_step": 101, "train_info/time_within_train_step": 12.919581651687622, "step": 101} +{"train_info/time_between_train_steps": 0.004878044128417969, "step": 101} +{"info/global_step": 102, "train_info/time_within_train_step": 14.617851257324219, "step": 102} +{"train_info/time_between_train_steps": 0.005745649337768555, "step": 102} +{"info/global_step": 103, "train_info/time_within_train_step": 12.9154052734375, "step": 103} +{"train_info/time_between_train_steps": 0.005548238754272461, "step": 103} +{"info/global_step": 104, "train_info/time_within_train_step": 12.940033674240112, "step": 104} +{"train_info/time_between_train_steps": 0.005527019500732422, "step": 104} +{"info/global_step": 105, "train_info/time_within_train_step": 12.98922348022461, "step": 105} +{"train_info/time_between_train_steps": 0.005494356155395508, "step": 105} +{"info/global_step": 106, "train_info/time_within_train_step": 12.961042881011963, "step": 106} +{"train_info/time_between_train_steps": 0.005633354187011719, "step": 106} +{"info/global_step": 107, "train_info/time_within_train_step": 12.9746572971344, "step": 107} +{"train_info/time_between_train_steps": 0.0054056644439697266, "step": 107} +{"info/global_step": 108, "train_info/time_within_train_step": 12.971816062927246, "step": 108} +{"train_info/time_between_train_steps": 0.0057981014251708984, "step": 108} +{"info/global_step": 109, "train_info/time_within_train_step": 13.056921005249023, "step": 109} +{"train_info/time_between_train_steps": 0.0053653717041015625, "step": 109} +{"info/global_step": 110, "train_info/time_within_train_step": 12.972769021987915, "step": 110} +{"train_info/time_between_train_steps": 0.005492687225341797, "step": 110} +{"info/global_step": 111, "train_info/time_within_train_step": 12.972801208496094, "step": 111} +{"train_info/time_between_train_steps": 0.0056073665618896484, "step": 111} +{"info/global_step": 112, "train_info/time_within_train_step": 12.977509260177612, "step": 112} +{"train_info/time_between_train_steps": 0.005464792251586914, "step": 112} +{"info/global_step": 113, "train_info/time_within_train_step": 12.97839903831482, "step": 113} +{"train_info/time_between_train_steps": 0.005625724792480469, "step": 113} +{"info/global_step": 114, "train_info/time_within_train_step": 13.024255514144897, "step": 114} +{"train_info/time_between_train_steps": 0.0048983097076416016, "step": 114} +{"info/global_step": 115, "train_info/time_within_train_step": 12.978435277938843, "step": 115} +{"train_info/time_between_train_steps": 0.0055162906646728516, "step": 115} +{"info/global_step": 116, "train_info/time_within_train_step": 13.002750396728516, "step": 116} +{"train_info/time_between_train_steps": 0.005728244781494141, "step": 116} +{"info/global_step": 117, "train_info/time_within_train_step": 12.994593143463135, "step": 117} +{"train_info/time_between_train_steps": 0.005875349044799805, "step": 117} +{"info/global_step": 118, "train_info/time_within_train_step": 13.045655250549316, "step": 118} +{"train_info/time_between_train_steps": 0.005808353424072266, "step": 118} +{"info/global_step": 119, "train_info/time_within_train_step": 13.004816055297852, "step": 119} +{"train_info/time_between_train_steps": 0.005723237991333008, "step": 119} +{"info/global_step": 120, "train_info/time_within_train_step": 13.02737021446228, "step": 120} +{"train_info/time_between_train_steps": 0.0058176517486572266, "step": 120} +{"train_info/time_between_train_steps": 8.290944337844849, "step": 120} +{"info/global_step": 121, "train_info/time_within_train_step": 12.966958999633789, "step": 121} +{"train_info/time_between_train_steps": 0.005134105682373047, "step": 121} +{"info/global_step": 122, "train_info/time_within_train_step": 13.144871950149536, "step": 122} +{"train_info/time_between_train_steps": 0.0045795440673828125, "step": 122} +{"info/global_step": 123, "train_info/time_within_train_step": 12.979240894317627, "step": 123} +{"train_info/time_between_train_steps": 0.005409955978393555, "step": 123} +{"info/global_step": 124, "train_info/time_within_train_step": 13.19912576675415, "step": 124} +{"train_info/time_between_train_steps": 0.0059356689453125, "step": 124} +{"info/global_step": 125, "train_info/time_within_train_step": 13.014389991760254, "step": 125} +{"train_info/time_between_train_steps": 0.005291938781738281, "step": 125} +{"info/global_step": 126, "train_info/time_within_train_step": 13.088328838348389, "step": 126} +{"train_info/time_between_train_steps": 0.005796909332275391, "step": 126} +{"info/global_step": 127, "train_info/time_within_train_step": 13.03750991821289, "step": 127} +{"train_info/time_between_train_steps": 0.005707263946533203, "step": 127} +{"info/global_step": 128, "train_info/time_within_train_step": 13.080901861190796, "step": 128} +{"train_info/time_between_train_steps": 0.005495786666870117, "step": 128} +{"info/global_step": 129, "train_info/time_within_train_step": 12.993460178375244, "step": 129} +{"train_info/time_between_train_steps": 0.00562596321105957, "step": 129} +{"info/global_step": 130, "train_info/time_within_train_step": 13.069301128387451, "step": 130} +{"train_info/time_between_train_steps": 0.005337715148925781, "step": 130} +{"info/global_step": 131, "train_info/time_within_train_step": 13.012453556060791, "step": 131} +{"train_info/time_between_train_steps": 0.005695819854736328, "step": 131} +{"info/global_step": 132, "train_info/time_within_train_step": 12.999943494796753, "step": 132} +{"train_info/time_between_train_steps": 0.005067110061645508, "step": 132} +{"info/global_step": 133, "train_info/time_within_train_step": 12.9832603931427, "step": 133} +{"train_info/time_between_train_steps": 0.005544900894165039, "step": 133} +{"info/global_step": 134, "train_info/time_within_train_step": 12.994736909866333, "step": 134} +{"train_info/time_between_train_steps": 0.005572319030761719, "step": 134} +{"info/global_step": 135, "train_info/time_within_train_step": 12.990219831466675, "step": 135} +{"train_info/time_between_train_steps": 0.0052297115325927734, "step": 135} +{"info/global_step": 136, "train_info/time_within_train_step": 13.019121646881104, "step": 136} +{"train_info/time_between_train_steps": 0.005273342132568359, "step": 136} +{"info/global_step": 137, "train_info/time_within_train_step": 13.040011167526245, "step": 137} +{"train_info/time_between_train_steps": 0.0055027008056640625, "step": 137} +{"info/global_step": 138, "train_info/time_within_train_step": 12.996039628982544, "step": 138} +{"train_info/time_between_train_steps": 0.005109071731567383, "step": 138} +{"info/global_step": 139, "train_info/time_within_train_step": 13.07114839553833, "step": 139} +{"train_info/time_between_train_steps": 0.005527973175048828, "step": 139} +{"info/global_step": 140, "train_info/time_within_train_step": 12.991205930709839, "step": 140} +{"train_info/time_between_train_steps": 0.00511622428894043, "step": 140} +{"info/global_step": 141, "train_info/time_within_train_step": 13.127486228942871, "step": 141} +{"train_info/time_between_train_steps": 0.005437135696411133, "step": 141} +{"info/global_step": 142, "train_info/time_within_train_step": 12.988895654678345, "step": 142} +{"train_info/time_between_train_steps": 0.005745649337768555, "step": 142} +{"info/global_step": 143, "train_info/time_within_train_step": 12.995866537094116, "step": 143} +{"train_info/time_between_train_steps": 0.005035400390625, "step": 143} +{"info/global_step": 144, "train_info/time_within_train_step": 12.98541522026062, "step": 144} +{"train_info/time_between_train_steps": 0.0053997039794921875, "step": 144} +{"info/global_step": 145, "train_info/time_within_train_step": 13.013671159744263, "step": 145} +{"train_info/time_between_train_steps": 0.0053446292877197266, "step": 145} +{"info/global_step": 146, "train_info/time_within_train_step": 12.991801738739014, "step": 146} +{"train_info/time_between_train_steps": 0.005378007888793945, "step": 146} +{"info/global_step": 147, "train_info/time_within_train_step": 12.988093137741089, "step": 147} +{"train_info/time_between_train_steps": 0.005304813385009766, "step": 147} +{"info/global_step": 148, "train_info/time_within_train_step": 13.007078409194946, "step": 148} +{"train_info/time_between_train_steps": 0.00580906867980957, "step": 148} +{"info/global_step": 149, "train_info/time_within_train_step": 13.007384538650513, "step": 149} +{"train_info/time_between_train_steps": 0.006349802017211914, "step": 149} +{"info/global_step": 150, "train_info/time_within_train_step": 13.070388555526733, "step": 150} +{"train_info": {"train_info/memory_allocated": 1920.990234375, "train_info/memory_max_allocated": 20713.404296875, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1737368615, "_runtime": 2610}, "step": 150} +{"logs": {"train/loss": 6.4515, "train/learning_rate": 0.0005833333333333333, "train/epoch": 4.03, "_timestamp": 1737368615, "_runtime": 2610}, "step": 150} +{"train_info/time_between_train_steps": 0.06970620155334473, "step": 150} +{"train_info/time_between_train_steps": 7.951460599899292, "step": 150} +{"info/global_step": 151, "train_info/time_within_train_step": 12.971789598464966, "step": 151} +{"train_info/time_between_train_steps": 0.005320549011230469, "step": 151} +{"info/global_step": 152, "train_info/time_within_train_step": 13.110633611679077, "step": 152} +{"train_info/time_between_train_steps": 0.005372524261474609, "step": 152} +{"info/global_step": 153, "train_info/time_within_train_step": 12.994184732437134, "step": 153} +{"train_info/time_between_train_steps": 0.005872011184692383, "step": 153} +{"info/global_step": 154, "train_info/time_within_train_step": 13.10961103439331, "step": 154} +{"train_info/time_between_train_steps": 0.005899190902709961, "step": 154} +{"info/global_step": 155, "train_info/time_within_train_step": 13.094335079193115, "step": 155} +{"train_info/time_between_train_steps": 0.0057370662689208984, "step": 155} +{"info/global_step": 156, "train_info/time_within_train_step": 13.091546058654785, "step": 156} +{"train_info/time_between_train_steps": 0.005523204803466797, "step": 156} +{"info/global_step": 157, "train_info/time_within_train_step": 13.002914905548096, "step": 157} +{"train_info/time_between_train_steps": 0.0058405399322509766, "step": 157} +{"info/global_step": 158, "train_info/time_within_train_step": 13.129955291748047, "step": 158} +{"train_info/time_between_train_steps": 0.005862236022949219, "step": 158} +{"info/global_step": 159, "train_info/time_within_train_step": 13.012829303741455, "step": 159} +{"train_info/time_between_train_steps": 0.0063495635986328125, "step": 159} +{"info/global_step": 160, "train_info/time_within_train_step": 13.068385124206543, "step": 160} +{"train_info/time_between_train_steps": 0.0055425167083740234, "step": 160} +{"info/global_step": 161, "train_info/time_within_train_step": 13.003636360168457, "step": 161} +{"train_info/time_between_train_steps": 0.0054607391357421875, "step": 161} +{"info/global_step": 162, "train_info/time_within_train_step": 12.981167793273926, "step": 162} +{"train_info/time_between_train_steps": 0.005489826202392578, "step": 162} +{"info/global_step": 163, "train_info/time_within_train_step": 13.059258222579956, "step": 163} +{"train_info/time_between_train_steps": 0.005342960357666016, "step": 163} +{"info/global_step": 164, "train_info/time_within_train_step": 12.991625785827637, "step": 164} +{"train_info/time_between_train_steps": 0.005707979202270508, "step": 164} +{"info/global_step": 165, "train_info/time_within_train_step": 13.007741451263428, "step": 165} +{"train_info/time_between_train_steps": 0.005553483963012695, "step": 165} +{"info/global_step": 166, "train_info/time_within_train_step": 12.99505090713501, "step": 166} +{"train_info/time_between_train_steps": 0.005392313003540039, "step": 166} +{"info/global_step": 167, "train_info/time_within_train_step": 13.005310773849487, "step": 167} +{"train_info/time_between_train_steps": 0.005881071090698242, "step": 167} +{"info/global_step": 168, "train_info/time_within_train_step": 12.987081527709961, "step": 168} +{"train_info/time_between_train_steps": 0.005504608154296875, "step": 168} +{"info/global_step": 169, "train_info/time_within_train_step": 12.986178636550903, "step": 169} +{"train_info/time_between_train_steps": 0.005095005035400391, "step": 169} +{"info/global_step": 170, "train_info/time_within_train_step": 13.074824571609497, "step": 170} +{"train_info/time_between_train_steps": 0.005159139633178711, "step": 170} +{"info/global_step": 171, "train_info/time_within_train_step": 12.994283676147461, "step": 171} +{"train_info/time_between_train_steps": 0.005816221237182617, "step": 171} +{"info/global_step": 172, "train_info/time_within_train_step": 13.010058641433716, "step": 172} +{"train_info/time_between_train_steps": 0.00547337532043457, "step": 172} +{"info/global_step": 173, "train_info/time_within_train_step": 12.995841026306152, "step": 173} +{"train_info/time_between_train_steps": 0.0050356388092041016, "step": 173} +{"info/global_step": 174, "train_info/time_within_train_step": 12.990756750106812, "step": 174} +{"train_info/time_between_train_steps": 0.005582332611083984, "step": 174} +{"info/global_step": 175, "train_info/time_within_train_step": 12.990664720535278, "step": 175} +{"train_info/time_between_train_steps": 0.005774974822998047, "step": 175} +{"info/global_step": 176, "train_info/time_within_train_step": 12.992318868637085, "step": 176} +{"train_info/time_between_train_steps": 0.005532264709472656, "step": 176} +{"info/global_step": 177, "train_info/time_within_train_step": 13.06185507774353, "step": 177} +{"train_info/time_between_train_steps": 0.005957126617431641, "step": 177} +{"info/global_step": 178, "train_info/time_within_train_step": 13.000255107879639, "step": 178} +{"train_info/time_between_train_steps": 0.005742311477661133, "step": 178} +{"info/global_step": 179, "train_info/time_within_train_step": 13.016698837280273, "step": 179} +{"train_info/time_between_train_steps": 0.006262540817260742, "step": 179} +{"info/global_step": 180, "train_info/time_within_train_step": 13.023514032363892, "step": 180} +{"train_info/time_between_train_steps": 0.006226778030395508, "step": 180} +{"train_info/time_between_train_steps": 7.960669040679932, "step": 180} +{"info/global_step": 181, "train_info/time_within_train_step": 13.017351627349854, "step": 181} +{"train_info/time_between_train_steps": 0.0050656795501708984, "step": 181} +{"info/global_step": 182, "train_info/time_within_train_step": 13.070550918579102, "step": 182} +{"train_info/time_between_train_steps": 0.005537271499633789, "step": 182} +{"info/global_step": 183, "train_info/time_within_train_step": 12.993330955505371, "step": 183} +{"train_info/time_between_train_steps": 0.00550079345703125, "step": 183} +{"info/global_step": 184, "train_info/time_within_train_step": 13.102940082550049, "step": 184} +{"train_info/time_between_train_steps": 0.004895210266113281, "step": 184} +{"info/global_step": 185, "train_info/time_within_train_step": 13.086539030075073, "step": 185} +{"train_info/time_between_train_steps": 0.0053462982177734375, "step": 185} +{"info/global_step": 186, "train_info/time_within_train_step": 13.121919631958008, "step": 186} +{"train_info/time_between_train_steps": 0.00576019287109375, "step": 186} +{"info/global_step": 187, "train_info/time_within_train_step": 13.007355690002441, "step": 187} +{"train_info/time_between_train_steps": 0.005657196044921875, "step": 187} +{"info/global_step": 188, "train_info/time_within_train_step": 13.115716695785522, "step": 188} +{"train_info/time_between_train_steps": 0.005731821060180664, "step": 188} +{"info/global_step": 189, "train_info/time_within_train_step": 12.991848707199097, "step": 189} +{"train_info/time_between_train_steps": 0.0051724910736083984, "step": 189} +{"info/global_step": 190, "train_info/time_within_train_step": 13.05286455154419, "step": 190} +{"train_info/time_between_train_steps": 0.0051746368408203125, "step": 190} +{"info/global_step": 191, "train_info/time_within_train_step": 13.005582809448242, "step": 191} +{"train_info/time_between_train_steps": 0.0051822662353515625, "step": 191} +{"info/global_step": 192, "train_info/time_within_train_step": 13.001729011535645, "step": 192} +{"train_info/time_between_train_steps": 0.0049839019775390625, "step": 192} +{"info/global_step": 193, "train_info/time_within_train_step": 12.992319345474243, "step": 193} +{"train_info/time_between_train_steps": 0.005089282989501953, "step": 193} +{"info/global_step": 194, "train_info/time_within_train_step": 12.988303899765015, "step": 194} +{"train_info/time_between_train_steps": 0.004837512969970703, "step": 194} +{"info/global_step": 195, "train_info/time_within_train_step": 13.08985948562622, "step": 195} +{"train_info/time_between_train_steps": 0.005519866943359375, "step": 195} +{"info/global_step": 196, "train_info/time_within_train_step": 12.991483926773071, "step": 196} +{"train_info/time_between_train_steps": 0.005254030227661133, "step": 196} +{"info/global_step": 197, "train_info/time_within_train_step": 13.004175186157227, "step": 197} +{"train_info/time_between_train_steps": 0.00547480583190918, "step": 197} +{"info/global_step": 198, "train_info/time_within_train_step": 13.008556127548218, "step": 198} +{"train_info/time_between_train_steps": 0.00513768196105957, "step": 198} +{"info/global_step": 199, "train_info/time_within_train_step": 13.031446695327759, "step": 199} +{"train_info/time_between_train_steps": 0.005575418472290039, "step": 199} +{"info/global_step": 200, "train_info/time_within_train_step": 12.996391773223877, "step": 200} +{"train_info": {"train_info/memory_allocated": 1920.990234375, "train_info/memory_max_allocated": 20713.404296875, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1737369285, "_runtime": 3280}, "step": 200} +{"logs": {"train/loss": 6.2916, "train/learning_rate": 0.0005555555555555556, "train/epoch": 6.02, "_timestamp": 1737369285, "_runtime": 3280}, "step": 200} +{"train_info/time_between_train_steps": 216.06449127197266, "step": 200} +{"info/global_step": 201, "train_info/time_within_train_step": 12.999178409576416, "step": 201} +{"train_info/time_between_train_steps": 0.004697322845458984, "step": 201} +{"info/global_step": 202, "train_info/time_within_train_step": 12.937870979309082, "step": 202} +{"train_info/time_between_train_steps": 0.004740715026855469, "step": 202} +{"info/global_step": 203, "train_info/time_within_train_step": 13.309951305389404, "step": 203} +{"train_info/time_between_train_steps": 0.004713773727416992, "step": 203} +{"info/global_step": 204, "train_info/time_within_train_step": 12.965944290161133, "step": 204} +{"train_info/time_between_train_steps": 0.0046765804290771484, "step": 204} +{"info/global_step": 205, "train_info/time_within_train_step": 16.18946933746338, "step": 205} +{"train_info/time_between_train_steps": 0.0047113895416259766, "step": 205} +{"info/global_step": 206, "train_info/time_within_train_step": 12.96935510635376, "step": 206} +{"train_info/time_between_train_steps": 0.004526376724243164, "step": 206} +{"info/global_step": 207, "train_info/time_within_train_step": 13.772683143615723, "step": 207} +{"train_info/time_between_train_steps": 0.004841327667236328, "step": 207} +{"info/global_step": 208, "train_info/time_within_train_step": 12.992496013641357, "step": 208} +{"train_info/time_between_train_steps": 0.005028724670410156, "step": 208} +{"info/global_step": 209, "train_info/time_within_train_step": 15.212963819503784, "step": 209} +{"train_info/time_between_train_steps": 0.005914926528930664, "step": 209} +{"info/global_step": 210, "train_info/time_within_train_step": 13.02016282081604, "step": 210} +{"train_info/time_between_train_steps": 0.005714893341064453, "step": 210} +{"train_info/time_between_train_steps": 11.26638150215149, "step": 210} +{"info/global_step": 211, "train_info/time_within_train_step": 12.966145992279053, "step": 211} +{"train_info/time_between_train_steps": 0.0051653385162353516, "step": 211} +{"info/global_step": 212, "train_info/time_within_train_step": 13.066962480545044, "step": 212} +{"train_info/time_between_train_steps": 0.005629062652587891, "step": 212} +{"info/global_step": 213, "train_info/time_within_train_step": 13.0100257396698, "step": 213} +{"train_info/time_between_train_steps": 0.005154848098754883, "step": 213} +{"info/global_step": 214, "train_info/time_within_train_step": 13.132535219192505, "step": 214} +{"train_info/time_between_train_steps": 0.005667209625244141, "step": 214} +{"info/global_step": 215, "train_info/time_within_train_step": 13.044028282165527, "step": 215} +{"train_info/time_between_train_steps": 0.005724906921386719, "step": 215} +{"info/global_step": 216, "train_info/time_within_train_step": 13.23125958442688, "step": 216} +{"train_info/time_between_train_steps": 0.005763053894042969, "step": 216} +{"info/global_step": 217, "train_info/time_within_train_step": 13.069036722183228, "step": 217} +{"train_info/time_between_train_steps": 0.005552053451538086, "step": 217} +{"info/global_step": 218, "train_info/time_within_train_step": 13.120737552642822, "step": 218} +{"train_info/time_between_train_steps": 0.005311489105224609, "step": 218} +{"info/global_step": 219, "train_info/time_within_train_step": 13.025755643844604, "step": 219} +{"train_info/time_between_train_steps": 0.005152225494384766, "step": 219} +{"info/global_step": 220, "train_info/time_within_train_step": 13.08237338066101, "step": 220} +{"train_info/time_between_train_steps": 0.00518035888671875, "step": 220} +{"info/global_step": 221, "train_info/time_within_train_step": 13.11032485961914, "step": 221} +{"train_info/time_between_train_steps": 0.005625724792480469, "step": 221} +{"info/global_step": 222, "train_info/time_within_train_step": 13.057191133499146, "step": 222} +{"train_info/time_between_train_steps": 0.005018949508666992, "step": 222} +{"info/global_step": 223, "train_info/time_within_train_step": 13.015811443328857, "step": 223} +{"train_info/time_between_train_steps": 0.005670785903930664, "step": 223} +{"info/global_step": 224, "train_info/time_within_train_step": 13.030818223953247, "step": 224} +{"train_info/time_between_train_steps": 0.004942417144775391, "step": 224} +{"info/global_step": 225, "train_info/time_within_train_step": 13.016941785812378, "step": 225} +{"train_info/time_between_train_steps": 0.0052356719970703125, "step": 225} +{"info/global_step": 226, "train_info/time_within_train_step": 13.026323556900024, "step": 226} +{"train_info/time_between_train_steps": 0.005025148391723633, "step": 226} +{"info/global_step": 227, "train_info/time_within_train_step": 13.026201248168945, "step": 227} +{"train_info/time_between_train_steps": 0.005073070526123047, "step": 227} +{"info/global_step": 228, "train_info/time_within_train_step": 13.027500629425049, "step": 228} +{"train_info/time_between_train_steps": 0.005605220794677734, "step": 228} +{"info/global_step": 229, "train_info/time_within_train_step": 13.039543390274048, "step": 229} +{"train_info/time_between_train_steps": 0.005251884460449219, "step": 229} +{"info/global_step": 230, "train_info/time_within_train_step": 13.034894466400146, "step": 230} +{"train_info/time_between_train_steps": 0.00506591796875, "step": 230} +{"info/global_step": 231, "train_info/time_within_train_step": 13.028553247451782, "step": 231} +{"train_info/time_between_train_steps": 0.005353212356567383, "step": 231} +{"info/global_step": 232, "train_info/time_within_train_step": 13.106256246566772, "step": 232} +{"train_info/time_between_train_steps": 0.005743265151977539, "step": 232} +{"info/global_step": 233, "train_info/time_within_train_step": 13.036792755126953, "step": 233} +{"train_info/time_between_train_steps": 0.004992246627807617, "step": 233} +{"info/global_step": 234, "train_info/time_within_train_step": 13.03120732307434, "step": 234} +{"train_info/time_between_train_steps": 0.004832744598388672, "step": 234} +{"info/global_step": 235, "train_info/time_within_train_step": 13.022146224975586, "step": 235} +{"train_info/time_between_train_steps": 0.005273342132568359, "step": 235} +{"info/global_step": 236, "train_info/time_within_train_step": 13.039364099502563, "step": 236} +{"train_info/time_between_train_steps": 0.005538463592529297, "step": 236} +{"info/global_step": 237, "train_info/time_within_train_step": 13.030722379684448, "step": 237} +{"train_info/time_between_train_steps": 0.005631923675537109, "step": 237} +{"info/global_step": 238, "train_info/time_within_train_step": 13.043956279754639, "step": 238} +{"train_info/time_between_train_steps": 0.0059969425201416016, "step": 238} +{"info/global_step": 239, "train_info/time_within_train_step": 13.06052541732788, "step": 239} +{"train_info/time_between_train_steps": 0.005672931671142578, "step": 239} +{"info/global_step": 240, "train_info/time_within_train_step": 13.07365369796753, "step": 240} +{"train_info/time_between_train_steps": 0.006412029266357422, "step": 240} +{"train_info/time_between_train_steps": 8.141467571258545, "step": 240} +{"info/global_step": 241, "train_info/time_within_train_step": 13.00973391532898, "step": 241} +{"train_info/time_between_train_steps": 0.004816293716430664, "step": 241} +{"info/global_step": 242, "train_info/time_within_train_step": 13.128451824188232, "step": 242} +{"train_info/time_between_train_steps": 0.005539894104003906, "step": 242} +{"info/global_step": 243, "train_info/time_within_train_step": 13.032074689865112, "step": 243} +{"train_info/time_between_train_steps": 0.005282163619995117, "step": 243} +{"info/global_step": 244, "train_info/time_within_train_step": 13.150158882141113, "step": 244} +{"train_info/time_between_train_steps": 0.005488157272338867, "step": 244} +{"info/global_step": 245, "train_info/time_within_train_step": 13.020349502563477, "step": 245} +{"train_info/time_between_train_steps": 0.0050411224365234375, "step": 245} +{"info/global_step": 246, "train_info/time_within_train_step": 13.10966444015503, "step": 246} +{"train_info/time_between_train_steps": 0.005339622497558594, "step": 246} +{"info/global_step": 247, "train_info/time_within_train_step": 13.104676723480225, "step": 247} +{"train_info/time_between_train_steps": 0.005556344985961914, "step": 247} +{"info/global_step": 248, "train_info/time_within_train_step": 13.12857985496521, "step": 248} +{"train_info/time_between_train_steps": 0.004965543746948242, "step": 248} +{"info/global_step": 249, "train_info/time_within_train_step": 13.03079867362976, "step": 249} +{"train_info/time_between_train_steps": 0.005349397659301758, "step": 249} +{"info/global_step": 250, "train_info/time_within_train_step": 13.098365068435669, "step": 250} +{"train_info": {"train_info/memory_allocated": 1920.990234375, "train_info/memory_max_allocated": 20713.404296875, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1737370194, "_runtime": 4189}, "step": 250} +{"logs": {"train/loss": 6.103, "train/learning_rate": 0.0005277777777777777, "train/epoch": 8.01, "_timestamp": 1737370194, "_runtime": 4189}, "step": 250} +{"train_info/time_between_train_steps": 0.05100727081298828, "step": 250} +{"info/global_step": 251, "train_info/time_within_train_step": 13.046417236328125, "step": 251} +{"train_info/time_between_train_steps": 0.004950284957885742, "step": 251} +{"info/global_step": 252, "train_info/time_within_train_step": 13.020256757736206, "step": 252} +{"train_info/time_between_train_steps": 0.004655122756958008, "step": 252} +{"info/global_step": 253, "train_info/time_within_train_step": 13.037938356399536, "step": 253} +{"train_info/time_between_train_steps": 0.004940032958984375, "step": 253} +{"info/global_step": 254, "train_info/time_within_train_step": 13.018118858337402, "step": 254} +{"train_info/time_between_train_steps": 0.0054149627685546875, "step": 254} +{"info/global_step": 255, "train_info/time_within_train_step": 13.032602548599243, "step": 255} +{"train_info/time_between_train_steps": 0.005206108093261719, "step": 255} +{"info/global_step": 256, "train_info/time_within_train_step": 13.017081499099731, "step": 256} +{"train_info/time_between_train_steps": 0.004747152328491211, "step": 256} +{"info/global_step": 257, "train_info/time_within_train_step": 13.079606294631958, "step": 257} +{"train_info/time_between_train_steps": 0.005398988723754883, "step": 257} +{"info/global_step": 258, "train_info/time_within_train_step": 13.025081157684326, "step": 258} +{"train_info/time_between_train_steps": 0.00521540641784668, "step": 258} +{"info/global_step": 259, "train_info/time_within_train_step": 13.023725509643555, "step": 259} +{"train_info/time_between_train_steps": 0.004969596862792969, "step": 259} +{"info/global_step": 260, "train_info/time_within_train_step": 13.021947860717773, "step": 260} +{"train_info/time_between_train_steps": 0.005064249038696289, "step": 260} +{"info/global_step": 261, "train_info/time_within_train_step": 13.020145654678345, "step": 261} +{"train_info/time_between_train_steps": 0.0052144527435302734, "step": 261} +{"info/global_step": 262, "train_info/time_within_train_step": 13.081105470657349, "step": 262} +{"train_info/time_between_train_steps": 0.005410432815551758, "step": 262} +{"info/global_step": 263, "train_info/time_within_train_step": 13.112056970596313, "step": 263} +{"train_info/time_between_train_steps": 0.005570650100708008, "step": 263} +{"info/global_step": 264, "train_info/time_within_train_step": 13.042046308517456, "step": 264} +{"train_info/time_between_train_steps": 0.004674434661865234, "step": 264} +{"info/global_step": 265, "train_info/time_within_train_step": 13.021575927734375, "step": 265} +{"train_info/time_between_train_steps": 0.0053637027740478516, "step": 265} +{"info/global_step": 266, "train_info/time_within_train_step": 13.037530183792114, "step": 266} +{"train_info/time_between_train_steps": 0.005776166915893555, "step": 266} +{"info/global_step": 267, "train_info/time_within_train_step": 13.052862644195557, "step": 267} +{"train_info/time_between_train_steps": 0.0052220821380615234, "step": 267} +{"info/global_step": 268, "train_info/time_within_train_step": 13.031085729598999, "step": 268} +{"train_info/time_between_train_steps": 0.005225419998168945, "step": 268} +{"info/global_step": 269, "train_info/time_within_train_step": 13.038822174072266, "step": 269} +{"train_info/time_between_train_steps": 0.0064029693603515625, "step": 269} +{"info/global_step": 270, "train_info/time_within_train_step": 13.073322534561157, "step": 270} +{"train_info/time_between_train_steps": 0.0061380863189697266, "step": 270} +{"train_info/time_between_train_steps": 7.928157806396484, "step": 270} +{"info/global_step": 271, "train_info/time_within_train_step": 13.017914772033691, "step": 271} +{"train_info/time_between_train_steps": 0.004653215408325195, "step": 271} +{"info/global_step": 272, "train_info/time_within_train_step": 13.102973461151123, "step": 272} +{"train_info/time_between_train_steps": 0.005328178405761719, "step": 272} +{"info/global_step": 273, "train_info/time_within_train_step": 13.035438299179077, "step": 273} +{"train_info/time_between_train_steps": 0.005491733551025391, "step": 273} +{"info/global_step": 274, "train_info/time_within_train_step": 13.102757453918457, "step": 274} +{"train_info/time_between_train_steps": 0.00568079948425293, "step": 274} +{"info/global_step": 275, "train_info/time_within_train_step": 13.081061601638794, "step": 275} +{"train_info/time_between_train_steps": 0.004873514175415039, "step": 275} +{"info/global_step": 276, "train_info/time_within_train_step": 13.111940145492554, "step": 276} +{"train_info/time_between_train_steps": 0.005003690719604492, "step": 276} +{"info/global_step": 277, "train_info/time_within_train_step": 13.01906442642212, "step": 277} +{"train_info/time_between_train_steps": 0.005707263946533203, "step": 277} +{"info/global_step": 278, "train_info/time_within_train_step": 13.240543127059937, "step": 278} +{"train_info/time_between_train_steps": 0.005614280700683594, "step": 278} +{"info/global_step": 279, "train_info/time_within_train_step": 13.055559158325195, "step": 279} +{"train_info/time_between_train_steps": 0.0055425167083740234, "step": 279} +{"info/global_step": 280, "train_info/time_within_train_step": 13.105509757995605, "step": 280} +{"train_info/time_between_train_steps": 0.005628824234008789, "step": 280} +{"info/global_step": 281, "train_info/time_within_train_step": 13.039863586425781, "step": 281} +{"train_info/time_between_train_steps": 0.005616664886474609, "step": 281} +{"info/global_step": 282, "train_info/time_within_train_step": 13.025969505310059, "step": 282} +{"train_info/time_between_train_steps": 0.004988193511962891, "step": 282} +{"info/global_step": 283, "train_info/time_within_train_step": 13.017066955566406, "step": 283} +{"train_info/time_between_train_steps": 0.0054094791412353516, "step": 283} +{"info/global_step": 284, "train_info/time_within_train_step": 13.070582151412964, "step": 284} +{"train_info/time_between_train_steps": 0.005329132080078125, "step": 284} +{"info/global_step": 285, "train_info/time_within_train_step": 13.015590906143188, "step": 285} +{"train_info/time_between_train_steps": 0.005038261413574219, "step": 285} +{"info/global_step": 286, "train_info/time_within_train_step": 13.01773452758789, "step": 286} +{"train_info/time_between_train_steps": 0.005246162414550781, "step": 286} +{"info/global_step": 287, "train_info/time_within_train_step": 13.028293371200562, "step": 287} +{"train_info/time_between_train_steps": 0.005589485168457031, "step": 287} +{"info/global_step": 288, "train_info/time_within_train_step": 13.032323837280273, "step": 288} +{"train_info/time_between_train_steps": 0.0051076412200927734, "step": 288} +{"info/global_step": 289, "train_info/time_within_train_step": 13.114336967468262, "step": 289} +{"train_info/time_between_train_steps": 0.00545811653137207, "step": 289} +{"info/global_step": 290, "train_info/time_within_train_step": 13.02259373664856, "step": 290} +{"train_info/time_between_train_steps": 0.004776954650878906, "step": 290} +{"info/global_step": 291, "train_info/time_within_train_step": 13.02916169166565, "step": 291} +{"train_info/time_between_train_steps": 0.005666017532348633, "step": 291} +{"info/global_step": 292, "train_info/time_within_train_step": 13.020936489105225, "step": 292} +{"train_info/time_between_train_steps": 0.005105495452880859, "step": 292} +{"info/global_step": 293, "train_info/time_within_train_step": 13.069409847259521, "step": 293} +{"train_info/time_between_train_steps": 0.005625724792480469, "step": 293} +{"info/global_step": 294, "train_info/time_within_train_step": 13.099306106567383, "step": 294} +{"train_info/time_between_train_steps": 0.005514621734619141, "step": 294} +{"info/global_step": 295, "train_info/time_within_train_step": 13.026169776916504, "step": 295} +{"train_info/time_between_train_steps": 0.005964040756225586, "step": 295} +{"info/global_step": 296, "train_info/time_within_train_step": 13.024280071258545, "step": 296} +{"train_info/time_between_train_steps": 0.005601644515991211, "step": 296} +{"info/global_step": 297, "train_info/time_within_train_step": 13.036840677261353, "step": 297} +{"train_info/time_between_train_steps": 0.005303144454956055, "step": 297} +{"info/global_step": 298, "train_info/time_within_train_step": 13.045310735702515, "step": 298} +{"train_info/time_between_train_steps": 0.005815744400024414, "step": 298} +{"info/global_step": 299, "train_info/time_within_train_step": 13.049351453781128, "step": 299} +{"train_info/time_between_train_steps": 0.006030559539794922, "step": 299} +{"info/global_step": 300, "train_info/time_within_train_step": 13.068712711334229, "step": 300} +{"train_info": {"train_info/memory_allocated": 1920.990234375, "train_info/memory_max_allocated": 20713.404296875, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1737370856, "_runtime": 4851}, "step": 300} +{"logs": {"train/loss": 5.7716, "train/learning_rate": 0.0005, "train/epoch": 9.03, "_timestamp": 1737370856, "_runtime": 4851}, "step": 300} +{"train_info/time_between_train_steps": 138.19194793701172, "step": 300} +{"train_info/time_between_train_steps": 156.2590250968933, "step": 300} +{"info/global_step": 301, "train_info/time_within_train_step": 12.94970440864563, "step": 301} +{"train_info/time_between_train_steps": 0.004408359527587891, "step": 301} +{"info/global_step": 302, "train_info/time_within_train_step": 15.762218236923218, "step": 302} +{"train_info/time_between_train_steps": 0.005353689193725586, "step": 302} +{"info/global_step": 303, "train_info/time_within_train_step": 12.989729404449463, "step": 303} +{"train_info/time_between_train_steps": 0.00480341911315918, "step": 303} +{"info/global_step": 304, "train_info/time_within_train_step": 23.68998694419861, "step": 304} +{"train_info/time_between_train_steps": 0.004789590835571289, "step": 304} +{"info/global_step": 305, "train_info/time_within_train_step": 15.9179048538208, "step": 305} +{"train_info/time_between_train_steps": 0.005036830902099609, "step": 305} +{"info/global_step": 306, "train_info/time_within_train_step": 22.978731393814087, "step": 306} +{"train_info/time_between_train_steps": 0.005358219146728516, "step": 306} +{"info/global_step": 307, "train_info/time_within_train_step": 12.962855339050293, "step": 307} +{"train_info/time_between_train_steps": 0.005369901657104492, "step": 307} +{"info/global_step": 308, "train_info/time_within_train_step": 18.29473304748535, "step": 308} +{"train_info/time_between_train_steps": 0.005129098892211914, "step": 308} +{"info/global_step": 309, "train_info/time_within_train_step": 20.02053737640381, "step": 309} +{"train_info/time_between_train_steps": 0.0047190189361572266, "step": 309} +{"info/global_step": 310, "train_info/time_within_train_step": 13.022072076797485, "step": 310} +{"train_info/time_between_train_steps": 0.004611015319824219, "step": 310} +{"info/global_step": 311, "train_info/time_within_train_step": 14.783024311065674, "step": 311} +{"train_info/time_between_train_steps": 0.004908084869384766, "step": 311} +{"info/global_step": 312, "train_info/time_within_train_step": 16.094151258468628, "step": 312} +{"train_info/time_between_train_steps": 0.005052328109741211, "step": 312} +{"info/global_step": 313, "train_info/time_within_train_step": 12.965043067932129, "step": 313} +{"train_info/time_between_train_steps": 0.004599094390869141, "step": 313} +{"info/global_step": 314, "train_info/time_within_train_step": 12.975759029388428, "step": 314} +{"train_info/time_between_train_steps": 0.004529714584350586, "step": 314} +{"info/global_step": 315, "train_info/time_within_train_step": 15.223715782165527, "step": 315} +{"train_info/time_between_train_steps": 0.004632711410522461, "step": 315} +{"info/global_step": 316, "train_info/time_within_train_step": 19.443419694900513, "step": 316} +{"train_info/time_between_train_steps": 0.00480198860168457, "step": 316} +{"info/global_step": 317, "train_info/time_within_train_step": 12.967532634735107, "step": 317} +{"train_info/time_between_train_steps": 0.004694700241088867, "step": 317} +{"info/global_step": 318, "train_info/time_within_train_step": 14.888317584991455, "step": 318} +{"train_info/time_between_train_steps": 0.004789590835571289, "step": 318} +{"info/global_step": 319, "train_info/time_within_train_step": 18.759884119033813, "step": 319} +{"train_info/time_between_train_steps": 0.0046002864837646484, "step": 319} +{"info/global_step": 320, "train_info/time_within_train_step": 12.964306592941284, "step": 320} +{"train_info/time_between_train_steps": 0.004582405090332031, "step": 320} +{"info/global_step": 321, "train_info/time_within_train_step": 12.97156310081482, "step": 321} +{"train_info/time_between_train_steps": 0.004616975784301758, "step": 321} +{"info/global_step": 322, "train_info/time_within_train_step": 14.622868299484253, "step": 322} +{"train_info/time_between_train_steps": 0.0049974918365478516, "step": 322} +{"info/global_step": 323, "train_info/time_within_train_step": 16.43463659286499, "step": 323} +{"train_info/time_between_train_steps": 0.004576206207275391, "step": 323} +{"info/global_step": 324, "train_info/time_within_train_step": 13.041512489318848, "step": 324} +{"train_info/time_between_train_steps": 0.004918098449707031, "step": 324} +{"info/global_step": 325, "train_info/time_within_train_step": 13.221356391906738, "step": 325} +{"train_info/time_between_train_steps": 0.004629373550415039, "step": 325} +{"info/global_step": 326, "train_info/time_within_train_step": 13.563167572021484, "step": 326} +{"train_info/time_between_train_steps": 0.004488945007324219, "step": 326} +{"info/global_step": 327, "train_info/time_within_train_step": 17.353657722473145, "step": 327} +{"train_info/time_between_train_steps": 0.004749298095703125, "step": 327} +{"info/global_step": 328, "train_info/time_within_train_step": 12.991068124771118, "step": 328} +{"train_info/time_between_train_steps": 0.0048847198486328125, "step": 328} +{"info/global_step": 329, "train_info/time_within_train_step": 13.528398990631104, "step": 329} +{"train_info/time_between_train_steps": 0.00591731071472168, "step": 329} +{"info/global_step": 330, "train_info/time_within_train_step": 15.228824138641357, "step": 330} +{"train_info/time_between_train_steps": 0.005729198455810547, "step": 330} +{"train_info/time_between_train_steps": 16.557408094406128, "step": 330} +{"info/global_step": 331, "train_info/time_within_train_step": 12.94352126121521, "step": 331} +{"train_info/time_between_train_steps": 0.004399776458740234, "step": 331} +{"info/global_step": 332, "train_info/time_within_train_step": 14.295925855636597, "step": 332} +{"train_info/time_between_train_steps": 0.0051538944244384766, "step": 332} +{"info/global_step": 333, "train_info/time_within_train_step": 13.321551084518433, "step": 333} +{"train_info/time_between_train_steps": 0.005744218826293945, "step": 333} +{"info/global_step": 334, "train_info/time_within_train_step": 13.116651058197021, "step": 334} +{"train_info/time_between_train_steps": 0.004854679107666016, "step": 334} +{"info/global_step": 335, "train_info/time_within_train_step": 12.97708535194397, "step": 335} +{"train_info/time_between_train_steps": 0.005235195159912109, "step": 335} +{"info/global_step": 336, "train_info/time_within_train_step": 13.076179504394531, "step": 336} +{"train_info/time_between_train_steps": 0.005068778991699219, "step": 336} +{"info/global_step": 337, "train_info/time_within_train_step": 12.986365556716919, "step": 337} +{"train_info/time_between_train_steps": 0.005644083023071289, "step": 337} +{"info/global_step": 338, "train_info/time_within_train_step": 13.082489490509033, "step": 338} +{"train_info/time_between_train_steps": 0.005285739898681641, "step": 338} +{"info/global_step": 339, "train_info/time_within_train_step": 12.990310192108154, "step": 339} +{"train_info/time_between_train_steps": 0.004772186279296875, "step": 339} +{"info/global_step": 340, "train_info/time_within_train_step": 13.154244661331177, "step": 340} +{"train_info/time_between_train_steps": 0.0053899288177490234, "step": 340} +{"info/global_step": 341, "train_info/time_within_train_step": 13.009250164031982, "step": 341} +{"train_info/time_between_train_steps": 0.004957914352416992, "step": 341} +{"info/global_step": 342, "train_info/time_within_train_step": 13.033153533935547, "step": 342} +{"train_info/time_between_train_steps": 0.0052335262298583984, "step": 342} +{"info/global_step": 343, "train_info/time_within_train_step": 13.026583433151245, "step": 343} +{"train_info/time_between_train_steps": 0.0046405792236328125, "step": 343} +{"info/global_step": 344, "train_info/time_within_train_step": 12.980142593383789, "step": 344} +{"train_info/time_between_train_steps": 0.005399942398071289, "step": 344} +{"info/global_step": 345, "train_info/time_within_train_step": 12.982312202453613, "step": 345} +{"train_info/time_between_train_steps": 0.004960298538208008, "step": 345} +{"info/global_step": 346, "train_info/time_within_train_step": 12.986929178237915, "step": 346} +{"train_info/time_between_train_steps": 0.005062103271484375, "step": 346} +{"info/global_step": 347, "train_info/time_within_train_step": 12.99320673942566, "step": 347} +{"train_info/time_between_train_steps": 0.0053479671478271484, "step": 347} +{"info/global_step": 348, "train_info/time_within_train_step": 12.986138343811035, "step": 348} +{"train_info/time_between_train_steps": 0.004930973052978516, "step": 348} +{"info/global_step": 349, "train_info/time_within_train_step": 12.985005378723145, "step": 349} +{"train_info/time_between_train_steps": 0.004794120788574219, "step": 349} +{"info/global_step": 350, "train_info/time_within_train_step": 12.987370014190674, "step": 350} +{"train_info": {"train_info/memory_allocated": 1920.990234375, "train_info/memory_max_allocated": 20713.404296875, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1737371830, "_runtime": 5825}, "step": 350} +{"logs": {"train/loss": 5.4902, "train/learning_rate": 0.00047222222222222224, "train/epoch": 11.02, "_timestamp": 1737371830, "_runtime": 5825}, "step": 350} +{"train_info/time_between_train_steps": 0.049218177795410156, "step": 350} +{"info/global_step": 351, "train_info/time_within_train_step": 12.990253686904907, "step": 351} +{"train_info/time_between_train_steps": 0.005306720733642578, "step": 351} +{"info/global_step": 352, "train_info/time_within_train_step": 13.054157257080078, "step": 352} +{"train_info/time_between_train_steps": 0.005323171615600586, "step": 352} +{"info/global_step": 353, "train_info/time_within_train_step": 12.985799789428711, "step": 353} +{"train_info/time_between_train_steps": 0.0055332183837890625, "step": 353} +{"info/global_step": 354, "train_info/time_within_train_step": 12.985289812088013, "step": 354} +{"train_info/time_between_train_steps": 0.005300998687744141, "step": 354} +{"info/global_step": 355, "train_info/time_within_train_step": 13.069297790527344, "step": 355} +{"train_info/time_between_train_steps": 0.0052564144134521484, "step": 355} +{"info/global_step": 356, "train_info/time_within_train_step": 13.008344173431396, "step": 356} +{"train_info/time_between_train_steps": 0.004790782928466797, "step": 356} +{"info/global_step": 357, "train_info/time_within_train_step": 12.993494510650635, "step": 357} +{"train_info/time_between_train_steps": 0.005689859390258789, "step": 357} +{"info/global_step": 358, "train_info/time_within_train_step": 12.996747493743896, "step": 358} +{"train_info/time_between_train_steps": 0.005336284637451172, "step": 358} +{"info/global_step": 359, "train_info/time_within_train_step": 13.000792980194092, "step": 359} +{"train_info/time_between_train_steps": 0.0051615238189697266, "step": 359} +{"info/global_step": 360, "train_info/time_within_train_step": 13.02516484260559, "step": 360} +{"train_info/time_between_train_steps": 0.006612300872802734, "step": 360} +{"train_info/time_between_train_steps": 7.84567928314209, "step": 360} +{"info/global_step": 361, "train_info/time_within_train_step": 13.01272463798523, "step": 361} +{"train_info/time_between_train_steps": 0.004769563674926758, "step": 361} +{"info/global_step": 362, "train_info/time_within_train_step": 13.109805583953857, "step": 362} +{"train_info/time_between_train_steps": 0.005562543869018555, "step": 362} +{"info/global_step": 363, "train_info/time_within_train_step": 13.009887933731079, "step": 363} +{"train_info/time_between_train_steps": 0.004981279373168945, "step": 363} +{"info/global_step": 364, "train_info/time_within_train_step": 13.090589046478271, "step": 364} +{"train_info/time_between_train_steps": 0.005750417709350586, "step": 364} +{"info/global_step": 365, "train_info/time_within_train_step": 13.006772518157959, "step": 365} +{"train_info/time_between_train_steps": 0.00576019287109375, "step": 365} +{"info/global_step": 366, "train_info/time_within_train_step": 13.095198392868042, "step": 366} +{"train_info/time_between_train_steps": 0.0053119659423828125, "step": 366} +{"info/global_step": 367, "train_info/time_within_train_step": 13.001522779464722, "step": 367} +{"train_info/time_between_train_steps": 0.005767345428466797, "step": 367} +{"info/global_step": 368, "train_info/time_within_train_step": 13.145532131195068, "step": 368} +{"train_info/time_between_train_steps": 0.005481243133544922, "step": 368} +{"info/global_step": 369, "train_info/time_within_train_step": 13.00127911567688, "step": 369} +{"train_info/time_between_train_steps": 0.005165815353393555, "step": 369} +{"info/global_step": 370, "train_info/time_within_train_step": 13.158264398574829, "step": 370} +{"train_info/time_between_train_steps": 0.005521535873413086, "step": 370} +{"info/global_step": 371, "train_info/time_within_train_step": 13.002920627593994, "step": 371} +{"train_info/time_between_train_steps": 0.005243062973022461, "step": 371} +{"info/global_step": 372, "train_info/time_within_train_step": 12.997447967529297, "step": 372} +{"train_info/time_between_train_steps": 0.005243539810180664, "step": 372} +{"info/global_step": 373, "train_info/time_within_train_step": 12.997123718261719, "step": 373} +{"train_info/time_between_train_steps": 0.0052642822265625, "step": 373} +{"info/global_step": 374, "train_info/time_within_train_step": 13.03249716758728, "step": 374} +{"train_info/time_between_train_steps": 0.005861520767211914, "step": 374} +{"info/global_step": 375, "train_info/time_within_train_step": 12.995284795761108, "step": 375} +{"train_info/time_between_train_steps": 0.0046994686126708984, "step": 375} +{"info/global_step": 376, "train_info/time_within_train_step": 12.986523151397705, "step": 376} +{"train_info/time_between_train_steps": 0.005097389221191406, "step": 376} +{"info/global_step": 377, "train_info/time_within_train_step": 13.00222396850586, "step": 377} +{"train_info/time_between_train_steps": 0.005674839019775391, "step": 377} +{"info/global_step": 378, "train_info/time_within_train_step": 12.990314245223999, "step": 378} +{"train_info/time_between_train_steps": 0.0049381256103515625, "step": 378} +{"info/global_step": 379, "train_info/time_within_train_step": 13.011643886566162, "step": 379} +{"train_info/time_between_train_steps": 0.005528926849365234, "step": 379} +{"info/global_step": 380, "train_info/time_within_train_step": 12.993587970733643, "step": 380} +{"train_info/time_between_train_steps": 0.004990816116333008, "step": 380} +{"info/global_step": 381, "train_info/time_within_train_step": 12.992846727371216, "step": 381} +{"train_info/time_between_train_steps": 0.00553584098815918, "step": 381} +{"info/global_step": 382, "train_info/time_within_train_step": 12.994763851165771, "step": 382} +{"train_info/time_between_train_steps": 0.005518674850463867, "step": 382} +{"info/global_step": 383, "train_info/time_within_train_step": 12.992778062820435, "step": 383} +{"train_info/time_between_train_steps": 0.004981040954589844, "step": 383} +{"info/global_step": 384, "train_info/time_within_train_step": 13.067376852035522, "step": 384} +{"train_info/time_between_train_steps": 0.004952430725097656, "step": 384} +{"info/global_step": 385, "train_info/time_within_train_step": 12.99320673942566, "step": 385} +{"train_info/time_between_train_steps": 0.005270719528198242, "step": 385} +{"info/global_step": 386, "train_info/time_within_train_step": 13.069751024246216, "step": 386} +{"train_info/time_between_train_steps": 0.0055084228515625, "step": 386} +{"info/global_step": 387, "train_info/time_within_train_step": 12.998335361480713, "step": 387} +{"train_info/time_between_train_steps": 0.004808187484741211, "step": 387} +{"info/global_step": 388, "train_info/time_within_train_step": 13.00928258895874, "step": 388} +{"train_info/time_between_train_steps": 0.005011796951293945, "step": 388} +{"info/global_step": 389, "train_info/time_within_train_step": 13.008450984954834, "step": 389} +{"train_info/time_between_train_steps": 0.005659580230712891, "step": 389} +{"info/global_step": 390, "train_info/time_within_train_step": 13.039733171463013, "step": 390} +{"train_info/time_between_train_steps": 0.006268978118896484, "step": 390} +{"train_info/time_between_train_steps": 7.96291184425354, "step": 390} +{"info/global_step": 391, "train_info/time_within_train_step": 12.97931456565857, "step": 391} +{"train_info/time_between_train_steps": 0.00484919548034668, "step": 391} +{"info/global_step": 392, "train_info/time_within_train_step": 13.108490467071533, "step": 392} +{"train_info/time_between_train_steps": 0.004777431488037109, "step": 392} +{"info/global_step": 393, "train_info/time_within_train_step": 12.983208179473877, "step": 393} +{"train_info/time_between_train_steps": 0.004852771759033203, "step": 393} +{"info/global_step": 394, "train_info/time_within_train_step": 13.071646451950073, "step": 394} +{"train_info/time_between_train_steps": 0.005591154098510742, "step": 394} +{"info/global_step": 395, "train_info/time_within_train_step": 13.053341388702393, "step": 395} +{"train_info/time_between_train_steps": 0.0056993961334228516, "step": 395} +{"info/global_step": 396, "train_info/time_within_train_step": 13.101495265960693, "step": 396} +{"train_info/time_between_train_steps": 0.0059299468994140625, "step": 396} +{"info/global_step": 397, "train_info/time_within_train_step": 13.00861120223999, "step": 397} +{"train_info/time_between_train_steps": 0.005182027816772461, "step": 397} +{"info/global_step": 398, "train_info/time_within_train_step": 13.0863516330719, "step": 398} +{"train_info/time_between_train_steps": 0.005074977874755859, "step": 398} +{"info/global_step": 399, "train_info/time_within_train_step": 12.990166425704956, "step": 399} +{"train_info/time_between_train_steps": 0.005543231964111328, "step": 399} +{"info/global_step": 400, "train_info/time_within_train_step": 13.077728509902954, "step": 400} +{"train_info": {"train_info/memory_allocated": 1920.990234375, "train_info/memory_max_allocated": 20713.404296875, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1737372499, "_runtime": 6494}, "step": 400} +{"logs": {"train/loss": 5.1844, "train/learning_rate": 0.00044444444444444436, "train/epoch": 13.01, "_timestamp": 1737372499, "_runtime": 6494}, "step": 400} +{"train_info/time_between_train_steps": 83.42349123954773, "step": 400} +{"info/global_step": 401, "train_info/time_within_train_step": 13.053611755371094, "step": 401} +{"train_info/time_between_train_steps": 0.005263805389404297, "step": 401} +{"info/global_step": 402, "train_info/time_within_train_step": 12.941367387771606, "step": 402} +{"train_info/time_between_train_steps": 0.005149126052856445, "step": 402} +{"info/global_step": 403, "train_info/time_within_train_step": 12.947810649871826, "step": 403} +{"train_info/time_between_train_steps": 0.0052263736724853516, "step": 403} +{"info/global_step": 404, "train_info/time_within_train_step": 12.96599555015564, "step": 404} +{"train_info/time_between_train_steps": 0.005551815032958984, "step": 404} +{"info/global_step": 405, "train_info/time_within_train_step": 12.95776081085205, "step": 405} +{"train_info/time_between_train_steps": 0.005056142807006836, "step": 405} +{"info/global_step": 406, "train_info/time_within_train_step": 12.986453771591187, "step": 406} +{"train_info/time_between_train_steps": 0.005521297454833984, "step": 406} +{"info/global_step": 407, "train_info/time_within_train_step": 12.98525333404541, "step": 407} +{"train_info/time_between_train_steps": 0.0047566890716552734, "step": 407} +{"info/global_step": 408, "train_info/time_within_train_step": 12.985404014587402, "step": 408} +{"train_info/time_between_train_steps": 0.004862546920776367, "step": 408} +{"info/global_step": 409, "train_info/time_within_train_step": 12.994638442993164, "step": 409} +{"train_info/time_between_train_steps": 0.0050275325775146484, "step": 409} +{"info/global_step": 410, "train_info/time_within_train_step": 12.984718084335327, "step": 410} +{"train_info/time_between_train_steps": 0.004973173141479492, "step": 410} +{"info/global_step": 411, "train_info/time_within_train_step": 12.990387916564941, "step": 411} +{"train_info/time_between_train_steps": 0.005400657653808594, "step": 411} +{"info/global_step": 412, "train_info/time_within_train_step": 12.991845607757568, "step": 412} +{"train_info/time_between_train_steps": 0.004752159118652344, "step": 412} +{"info/global_step": 413, "train_info/time_within_train_step": 13.00450348854065, "step": 413} +{"train_info/time_between_train_steps": 0.005213260650634766, "step": 413} +{"info/global_step": 414, "train_info/time_within_train_step": 12.99396276473999, "step": 414} +{"train_info/time_between_train_steps": 0.0053861141204833984, "step": 414} +{"info/global_step": 415, "train_info/time_within_train_step": 13.006838083267212, "step": 415} +{"train_info/time_between_train_steps": 0.005327701568603516, "step": 415} +{"info/global_step": 416, "train_info/time_within_train_step": 12.993655443191528, "step": 416} +{"train_info/time_between_train_steps": 0.005345821380615234, "step": 416} +{"info/global_step": 417, "train_info/time_within_train_step": 13.089865446090698, "step": 417} +{"train_info/time_between_train_steps": 0.005090475082397461, "step": 417} +{"info/global_step": 418, "train_info/time_within_train_step": 13.003398895263672, "step": 418} +{"train_info/time_between_train_steps": 0.006065845489501953, "step": 418} +{"info/global_step": 419, "train_info/time_within_train_step": 13.012922048568726, "step": 419} +{"train_info/time_between_train_steps": 0.0062580108642578125, "step": 419} +{"info/global_step": 420, "train_info/time_within_train_step": 13.032811164855957, "step": 420} +{"train_info/time_between_train_steps": 0.0062673091888427734, "step": 420} +{"train_info/time_between_train_steps": 8.02306866645813, "step": 420} +{"info/global_step": 421, "train_info/time_within_train_step": 12.988563060760498, "step": 421} +{"train_info/time_between_train_steps": 0.005570173263549805, "step": 421} +{"info/global_step": 422, "train_info/time_within_train_step": 13.09588885307312, "step": 422} +{"train_info/time_between_train_steps": 0.005513429641723633, "step": 422} +{"info/global_step": 423, "train_info/time_within_train_step": 13.002959251403809, "step": 423} +{"train_info/time_between_train_steps": 0.00562596321105957, "step": 423} +{"info/global_step": 424, "train_info/time_within_train_step": 13.101467609405518, "step": 424} +{"train_info/time_between_train_steps": 0.005222797393798828, "step": 424} +{"info/global_step": 425, "train_info/time_within_train_step": 12.995129346847534, "step": 425} +{"train_info/time_between_train_steps": 0.005254268646240234, "step": 425} +{"info/global_step": 426, "train_info/time_within_train_step": 13.342885255813599, "step": 426} +{"train_info/time_between_train_steps": 0.0048296451568603516, "step": 426} +{"info/global_step": 427, "train_info/time_within_train_step": 12.999758005142212, "step": 427} +{"train_info/time_between_train_steps": 0.00536799430847168, "step": 427} +{"info/global_step": 428, "train_info/time_within_train_step": 13.119301557540894, "step": 428} +{"train_info/time_between_train_steps": 0.005148410797119141, "step": 428} +{"info/global_step": 429, "train_info/time_within_train_step": 13.521798849105835, "step": 429} +{"train_info/time_between_train_steps": 0.005076408386230469, "step": 429} +{"info/global_step": 430, "train_info/time_within_train_step": 17.29166865348816, "step": 430} +{"train_info/time_between_train_steps": 0.00460505485534668, "step": 430} +{"info/global_step": 431, "train_info/time_within_train_step": 12.97112774848938, "step": 431} +{"train_info/time_between_train_steps": 0.005181074142456055, "step": 431} +{"info/global_step": 432, "train_info/time_within_train_step": 13.032966136932373, "step": 432} +{"train_info/time_between_train_steps": 0.005377531051635742, "step": 432} +{"info/global_step": 433, "train_info/time_within_train_step": 13.087759971618652, "step": 433} +{"train_info/time_between_train_steps": 0.004343509674072266, "step": 433} +{"info/global_step": 434, "train_info/time_within_train_step": 13.377941370010376, "step": 434} +{"train_info/time_between_train_steps": 0.004906654357910156, "step": 434} +{"info/global_step": 435, "train_info/time_within_train_step": 13.19054388999939, "step": 435} +{"train_info/time_between_train_steps": 0.005460500717163086, "step": 435} +{"info/global_step": 436, "train_info/time_within_train_step": 13.163052797317505, "step": 436} +{"train_info/time_between_train_steps": 0.004758119583129883, "step": 436} +{"info/global_step": 437, "train_info/time_within_train_step": 13.26543378829956, "step": 437} +{"train_info/time_between_train_steps": 0.005133152008056641, "step": 437} +{"info/global_step": 438, "train_info/time_within_train_step": 13.036325931549072, "step": 438} +{"train_info/time_between_train_steps": 0.0047969818115234375, "step": 438} +{"info/global_step": 439, "train_info/time_within_train_step": 13.01809811592102, "step": 439} +{"train_info/time_between_train_steps": 0.005045175552368164, "step": 439} +{"info/global_step": 440, "train_info/time_within_train_step": 13.012151002883911, "step": 440} +{"train_info/time_between_train_steps": 0.0048542022705078125, "step": 440} +{"info/global_step": 441, "train_info/time_within_train_step": 12.998007535934448, "step": 441} +{"train_info/time_between_train_steps": 0.005870819091796875, "step": 441} +{"info/global_step": 442, "train_info/time_within_train_step": 12.992005825042725, "step": 442} +{"train_info/time_between_train_steps": 0.005585193634033203, "step": 442} +{"info/global_step": 443, "train_info/time_within_train_step": 13.001285076141357, "step": 443} +{"train_info/time_between_train_steps": 0.005602598190307617, "step": 443} +{"info/global_step": 444, "train_info/time_within_train_step": 13.020833015441895, "step": 444} +{"train_info/time_between_train_steps": 0.004467010498046875, "step": 444} +{"info/global_step": 445, "train_info/time_within_train_step": 13.001766204833984, "step": 445} +{"train_info/time_between_train_steps": 0.005715370178222656, "step": 445} +{"info/global_step": 446, "train_info/time_within_train_step": 13.018539667129517, "step": 446} +{"train_info/time_between_train_steps": 0.0056688785552978516, "step": 446} +{"info/global_step": 447, "train_info/time_within_train_step": 12.993725538253784, "step": 447} +{"train_info/time_between_train_steps": 0.005747318267822266, "step": 447} +{"info/global_step": 448, "train_info/time_within_train_step": 13.078865051269531, "step": 448} +{"train_info/time_between_train_steps": 0.0056149959564208984, "step": 448} +{"info/global_step": 449, "train_info/time_within_train_step": 13.009451866149902, "step": 449} +{"train_info/time_between_train_steps": 0.005211353302001953, "step": 449} +{"info/global_step": 450, "train_info/time_within_train_step": 13.026419639587402, "step": 450} +{"train_info": {"train_info/memory_allocated": 1920.990234375, "train_info/memory_max_allocated": 20713.404296875, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1737373261, "_runtime": 7256}, "step": 450} +{"logs": {"train/loss": 4.8669, "train/learning_rate": 0.00041666666666666664, "train/epoch": 14.03, "_timestamp": 1737373261, "_runtime": 7256}, "step": 450} +{"train_info/time_between_train_steps": 0.03581857681274414, "step": 450} +{"train_info/time_between_train_steps": 7.847150802612305, "step": 450} +{"info/global_step": 451, "train_info/time_within_train_step": 12.995656490325928, "step": 451} +{"train_info/time_between_train_steps": 0.0053021907806396484, "step": 451} +{"info/global_step": 452, "train_info/time_within_train_step": 13.113374948501587, "step": 452} +{"train_info/time_between_train_steps": 0.004856109619140625, "step": 452} +{"info/global_step": 453, "train_info/time_within_train_step": 12.99965786933899, "step": 453} +{"train_info/time_between_train_steps": 0.005332469940185547, "step": 453} +{"info/global_step": 454, "train_info/time_within_train_step": 13.119893074035645, "step": 454} +{"train_info/time_between_train_steps": 0.004919290542602539, "step": 454} +{"info/global_step": 455, "train_info/time_within_train_step": 13.006332397460938, "step": 455} +{"train_info/time_between_train_steps": 0.0060422420501708984, "step": 455} +{"info/global_step": 456, "train_info/time_within_train_step": 13.129873275756836, "step": 456} +{"train_info/time_between_train_steps": 0.005319118499755859, "step": 456} +{"info/global_step": 457, "train_info/time_within_train_step": 13.013748168945312, "step": 457} +{"train_info/time_between_train_steps": 0.0050127506256103516, "step": 457} +{"info/global_step": 458, "train_info/time_within_train_step": 13.141806602478027, "step": 458} +{"train_info/time_between_train_steps": 0.005263090133666992, "step": 458} +{"info/global_step": 459, "train_info/time_within_train_step": 12.996234655380249, "step": 459} +{"train_info/time_between_train_steps": 0.0057179927825927734, "step": 459} +{"info/global_step": 460, "train_info/time_within_train_step": 13.04891300201416, "step": 460} +{"train_info/time_between_train_steps": 0.0053637027740478516, "step": 460} +{"info/global_step": 461, "train_info/time_within_train_step": 13.072077751159668, "step": 461} +{"train_info/time_between_train_steps": 0.005705118179321289, "step": 461} +{"info/global_step": 462, "train_info/time_within_train_step": 12.983926773071289, "step": 462} +{"train_info/time_between_train_steps": 0.005449056625366211, "step": 462} +{"info/global_step": 463, "train_info/time_within_train_step": 13.057533740997314, "step": 463} +{"train_info/time_between_train_steps": 0.005469560623168945, "step": 463} +{"info/global_step": 464, "train_info/time_within_train_step": 12.994399070739746, "step": 464} +{"train_info/time_between_train_steps": 0.005594015121459961, "step": 464} +{"info/global_step": 465, "train_info/time_within_train_step": 12.987491846084595, "step": 465} +{"train_info/time_between_train_steps": 0.005505561828613281, "step": 465} +{"info/global_step": 466, "train_info/time_within_train_step": 12.995104551315308, "step": 466} +{"train_info/time_between_train_steps": 0.005379676818847656, "step": 466} +{"info/global_step": 467, "train_info/time_within_train_step": 12.984456062316895, "step": 467} +{"train_info/time_between_train_steps": 0.0048563480377197266, "step": 467} +{"info/global_step": 468, "train_info/time_within_train_step": 12.987407922744751, "step": 468} +{"train_info/time_between_train_steps": 0.004601240158081055, "step": 468} +{"info/global_step": 469, "train_info/time_within_train_step": 12.986046314239502, "step": 469} +{"train_info/time_between_train_steps": 0.004995107650756836, "step": 469} +{"info/global_step": 470, "train_info/time_within_train_step": 12.994079113006592, "step": 470} +{"train_info/time_between_train_steps": 0.004983186721801758, "step": 470} +{"info/global_step": 471, "train_info/time_within_train_step": 13.021398782730103, "step": 471} +{"train_info/time_between_train_steps": 0.0051310062408447266, "step": 471} +{"info/global_step": 472, "train_info/time_within_train_step": 12.987656593322754, "step": 472} +{"train_info/time_between_train_steps": 0.0049974918365478516, "step": 472} +{"info/global_step": 473, "train_info/time_within_train_step": 12.991929769515991, "step": 473} +{"train_info/time_between_train_steps": 0.005432605743408203, "step": 473} +{"info/global_step": 474, "train_info/time_within_train_step": 13.011579990386963, "step": 474} +{"train_info/time_between_train_steps": 0.0057337284088134766, "step": 474} +{"info/global_step": 475, "train_info/time_within_train_step": 13.033477783203125, "step": 475} +{"train_info/time_between_train_steps": 0.005838871002197266, "step": 475} +{"info/global_step": 476, "train_info/time_within_train_step": 12.99491548538208, "step": 476} +{"train_info/time_between_train_steps": 0.004866838455200195, "step": 476} +{"info/global_step": 477, "train_info/time_within_train_step": 12.999471187591553, "step": 477} +{"train_info/time_between_train_steps": 0.00484776496887207, "step": 477} +{"info/global_step": 478, "train_info/time_within_train_step": 13.00838589668274, "step": 478} +{"train_info/time_between_train_steps": 0.005290985107421875, "step": 478} +{"info/global_step": 479, "train_info/time_within_train_step": 13.082292556762695, "step": 479} +{"train_info/time_between_train_steps": 0.0055844783782958984, "step": 479} +{"info/global_step": 480, "train_info/time_within_train_step": 13.094617366790771, "step": 480} +{"train_info/time_between_train_steps": 0.00519871711730957, "step": 480} +{"train_info/time_between_train_steps": 7.918237209320068, "step": 480} +{"info/global_step": 481, "train_info/time_within_train_step": 12.981727123260498, "step": 481} +{"train_info/time_between_train_steps": 0.004809141159057617, "step": 481} +{"info/global_step": 482, "train_info/time_within_train_step": 13.069182634353638, "step": 482} +{"train_info/time_between_train_steps": 0.004492282867431641, "step": 482} +{"info/global_step": 483, "train_info/time_within_train_step": 12.999330997467041, "step": 483} +{"train_info/time_between_train_steps": 0.0046443939208984375, "step": 483} +{"info/global_step": 484, "train_info/time_within_train_step": 13.065222263336182, "step": 484} +{"train_info/time_between_train_steps": 0.00499415397644043, "step": 484} +{"info/global_step": 485, "train_info/time_within_train_step": 12.986253499984741, "step": 485} +{"train_info/time_between_train_steps": 0.005747795104980469, "step": 485} +{"info/global_step": 486, "train_info/time_within_train_step": 13.100454807281494, "step": 486} +{"train_info/time_between_train_steps": 0.005835533142089844, "step": 486} +{"info/global_step": 487, "train_info/time_within_train_step": 13.008774995803833, "step": 487} +{"train_info/time_between_train_steps": 0.005491971969604492, "step": 487} +{"info/global_step": 488, "train_info/time_within_train_step": 13.093940734863281, "step": 488} +{"train_info/time_between_train_steps": 0.0055081844329833984, "step": 488} +{"info/global_step": 489, "train_info/time_within_train_step": 13.006871938705444, "step": 489} +{"train_info/time_between_train_steps": 0.00550389289855957, "step": 489} +{"info/global_step": 490, "train_info/time_within_train_step": 13.047722816467285, "step": 490} +{"train_info/time_between_train_steps": 0.005024433135986328, "step": 490} +{"info/global_step": 491, "train_info/time_within_train_step": 13.017558813095093, "step": 491} +{"train_info/time_between_train_steps": 0.005541563034057617, "step": 491} +{"info/global_step": 492, "train_info/time_within_train_step": 12.982073307037354, "step": 492} +{"train_info/time_between_train_steps": 0.005624532699584961, "step": 492} +{"info/global_step": 493, "train_info/time_within_train_step": 13.057304382324219, "step": 493} +{"train_info/time_between_train_steps": 0.005300760269165039, "step": 493} +{"info/global_step": 494, "train_info/time_within_train_step": 13.074121713638306, "step": 494} +{"train_info/time_between_train_steps": 0.005330085754394531, "step": 494} +{"info/global_step": 495, "train_info/time_within_train_step": 12.987142562866211, "step": 495} +{"train_info/time_between_train_steps": 0.00522613525390625, "step": 495} +{"info/global_step": 496, "train_info/time_within_train_step": 12.996517181396484, "step": 496} +{"train_info/time_between_train_steps": 0.0052912235260009766, "step": 496} +{"info/global_step": 497, "train_info/time_within_train_step": 13.000553131103516, "step": 497} +{"train_info/time_between_train_steps": 0.004935741424560547, "step": 497} +{"info/global_step": 498, "train_info/time_within_train_step": 12.984910249710083, "step": 498} +{"train_info/time_between_train_steps": 0.005040407180786133, "step": 498} +{"info/global_step": 499, "train_info/time_within_train_step": 12.990410566329956, "step": 499} +{"train_info/time_between_train_steps": 0.0049283504486083984, "step": 499} +{"info/global_step": 500, "train_info/time_within_train_step": 12.985008001327515, "step": 500} +{"train_info": {"train_info/memory_allocated": 1920.990234375, "train_info/memory_max_allocated": 20713.404296875, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1737373930, "_runtime": 7925}, "step": 500} +{"logs": {"train/loss": 4.6547, "train/learning_rate": 0.00038888888888888887, "train/epoch": 16.02, "_timestamp": 1737373930, "_runtime": 7925}, "step": 500} +{"train_info/time_between_train_steps": 97.53018021583557, "step": 500} +{"info/global_step": 501, "train_info/time_within_train_step": 12.915252923965454, "step": 501} +{"train_info/time_between_train_steps": 0.005362510681152344, "step": 501} +{"info/global_step": 502, "train_info/time_within_train_step": 13.696547269821167, "step": 502} +{"train_info/time_between_train_steps": 0.0054035186767578125, "step": 502} +{"info/global_step": 503, "train_info/time_within_train_step": 12.92484712600708, "step": 503} +{"train_info/time_between_train_steps": 0.0050201416015625, "step": 503} +{"info/global_step": 504, "train_info/time_within_train_step": 12.946463823318481, "step": 504} +{"train_info/time_between_train_steps": 0.005521535873413086, "step": 504} +{"info/global_step": 505, "train_info/time_within_train_step": 12.955503463745117, "step": 505} +{"train_info/time_between_train_steps": 0.005360603332519531, "step": 505} +{"info/global_step": 506, "train_info/time_within_train_step": 12.982856750488281, "step": 506} +{"train_info/time_between_train_steps": 0.005351543426513672, "step": 506} +{"info/global_step": 507, "train_info/time_within_train_step": 12.969617366790771, "step": 507} +{"train_info/time_between_train_steps": 0.004774808883666992, "step": 507} +{"info/global_step": 508, "train_info/time_within_train_step": 12.998238563537598, "step": 508} +{"train_info/time_between_train_steps": 0.005455970764160156, "step": 508} +{"info/global_step": 509, "train_info/time_within_train_step": 13.07513976097107, "step": 509} +{"train_info/time_between_train_steps": 0.006236076354980469, "step": 509} +{"info/global_step": 510, "train_info/time_within_train_step": 13.01600432395935, "step": 510} +{"train_info/time_between_train_steps": 0.0057489871978759766, "step": 510} +{"train_info/time_between_train_steps": 8.036958694458008, "step": 510} +{"info/global_step": 511, "train_info/time_within_train_step": 13.032197713851929, "step": 511} +{"train_info/time_between_train_steps": 0.005416154861450195, "step": 511} +{"info/global_step": 512, "train_info/time_within_train_step": 13.124595642089844, "step": 512} +{"train_info/time_between_train_steps": 0.004828453063964844, "step": 512} +{"info/global_step": 513, "train_info/time_within_train_step": 12.985609531402588, "step": 513} +{"train_info/time_between_train_steps": 0.005444526672363281, "step": 513} +{"info/global_step": 514, "train_info/time_within_train_step": 13.092640161514282, "step": 514} +{"train_info/time_between_train_steps": 0.005423307418823242, "step": 514} +{"info/global_step": 515, "train_info/time_within_train_step": 12.99545407295227, "step": 515} +{"train_info/time_between_train_steps": 0.005663156509399414, "step": 515} +{"info/global_step": 516, "train_info/time_within_train_step": 13.093101978302002, "step": 516} +{"train_info/time_between_train_steps": 0.0052678585052490234, "step": 516} +{"info/global_step": 517, "train_info/time_within_train_step": 12.994301557540894, "step": 517} +{"train_info/time_between_train_steps": 0.005181074142456055, "step": 517} +{"info/global_step": 518, "train_info/time_within_train_step": 13.080430030822754, "step": 518} +{"train_info/time_between_train_steps": 0.0048139095306396484, "step": 518} +{"info/global_step": 519, "train_info/time_within_train_step": 12.998217821121216, "step": 519} +{"train_info/time_between_train_steps": 0.005313873291015625, "step": 519} +{"info/global_step": 520, "train_info/time_within_train_step": 13.076409578323364, "step": 520} +{"train_info/time_between_train_steps": 0.005126953125, "step": 520} +{"info/global_step": 521, "train_info/time_within_train_step": 13.033825635910034, "step": 521} +{"train_info/time_between_train_steps": 0.0053462982177734375, "step": 521} +{"info/global_step": 522, "train_info/time_within_train_step": 12.989888191223145, "step": 522} +{"train_info/time_between_train_steps": 0.0049896240234375, "step": 522} +{"info/global_step": 523, "train_info/time_within_train_step": 12.978394985198975, "step": 523} +{"train_info/time_between_train_steps": 0.004996776580810547, "step": 523} +{"info/global_step": 524, "train_info/time_within_train_step": 12.985146284103394, "step": 524} +{"train_info/time_between_train_steps": 0.005032777786254883, "step": 524} +{"info/global_step": 525, "train_info/time_within_train_step": 13.07094669342041, "step": 525} +{"train_info/time_between_train_steps": 0.005675077438354492, "step": 525} +{"info/global_step": 526, "train_info/time_within_train_step": 13.054770708084106, "step": 526} +{"train_info/time_between_train_steps": 0.005609989166259766, "step": 526} +{"info/global_step": 527, "train_info/time_within_train_step": 12.981393337249756, "step": 527} +{"train_info/time_between_train_steps": 0.0047528743743896484, "step": 527} +{"info/global_step": 528, "train_info/time_within_train_step": 12.9872887134552, "step": 528} +{"train_info/time_between_train_steps": 0.005091428756713867, "step": 528} +{"info/global_step": 529, "train_info/time_within_train_step": 13.102906703948975, "step": 529} +{"train_info/time_between_train_steps": 0.005090951919555664, "step": 529} +{"info/global_step": 530, "train_info/time_within_train_step": 17.12221884727478, "step": 530} +{"train_info/time_between_train_steps": 0.004839420318603516, "step": 530} +{"info/global_step": 531, "train_info/time_within_train_step": 12.976338863372803, "step": 531} +{"train_info/time_between_train_steps": 0.005479097366333008, "step": 531} +{"info/global_step": 532, "train_info/time_within_train_step": 12.969711542129517, "step": 532} +{"train_info/time_between_train_steps": 0.004672050476074219, "step": 532} +{"info/global_step": 533, "train_info/time_within_train_step": 13.644166946411133, "step": 533} +{"train_info/time_between_train_steps": 0.004583835601806641, "step": 533} +{"info/global_step": 534, "train_info/time_within_train_step": 17.181379079818726, "step": 534} +{"train_info/time_between_train_steps": 0.004693508148193359, "step": 534} +{"info/global_step": 535, "train_info/time_within_train_step": 12.96168065071106, "step": 535} +{"train_info/time_between_train_steps": 0.004950761795043945, "step": 535} +{"info/global_step": 536, "train_info/time_within_train_step": 14.84526801109314, "step": 536} +{"train_info/time_between_train_steps": 0.00507044792175293, "step": 536} +{"info/global_step": 537, "train_info/time_within_train_step": 21.450788021087646, "step": 537} +{"train_info/time_between_train_steps": 0.006265878677368164, "step": 537} +{"info/global_step": 538, "train_info/time_within_train_step": 12.940238952636719, "step": 538} +{"train_info/time_between_train_steps": 0.005219936370849609, "step": 538} +{"info/global_step": 539, "train_info/time_within_train_step": 14.290538549423218, "step": 539} +{"train_info/time_between_train_steps": 0.005461931228637695, "step": 539} +{"info/global_step": 540, "train_info/time_within_train_step": 17.587923049926758, "step": 540} +{"train_info/time_between_train_steps": 0.0058939456939697266, "step": 540} +{"train_info/time_between_train_steps": 13.585054874420166, "step": 540} +{"info/global_step": 541, "train_info/time_within_train_step": 12.967292308807373, "step": 541} +{"train_info/time_between_train_steps": 0.005413055419921875, "step": 541} +{"info/global_step": 542, "train_info/time_within_train_step": 15.193622589111328, "step": 542} +{"train_info/time_between_train_steps": 0.005766153335571289, "step": 542} +{"info/global_step": 543, "train_info/time_within_train_step": 18.728405952453613, "step": 543} +{"train_info/time_between_train_steps": 0.0053844451904296875, "step": 543} +{"info/global_step": 544, "train_info/time_within_train_step": 13.208436489105225, "step": 544} +{"train_info/time_between_train_steps": 0.005153656005859375, "step": 544} +{"info/global_step": 545, "train_info/time_within_train_step": 14.08393406867981, "step": 545} +{"train_info/time_between_train_steps": 0.0058574676513671875, "step": 545} +{"info/global_step": 546, "train_info/time_within_train_step": 16.75355815887451, "step": 546} +{"train_info/time_between_train_steps": 0.0053098201751708984, "step": 546} +{"info/global_step": 547, "train_info/time_within_train_step": 13.655303239822388, "step": 547} +{"train_info/time_between_train_steps": 0.005446672439575195, "step": 547} +{"info/global_step": 548, "train_info/time_within_train_step": 13.169913530349731, "step": 548} +{"train_info/time_between_train_steps": 0.0060367584228515625, "step": 548} +{"info/global_step": 549, "train_info/time_within_train_step": 14.518375635147095, "step": 549} +{"train_info/time_between_train_steps": 0.005568265914916992, "step": 549} +{"info/global_step": 550, "train_info/time_within_train_step": 17.286051511764526, "step": 550} +{"train_info": {"train_info/memory_allocated": 1920.990234375, "train_info/memory_max_allocated": 20713.404296875, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1737374797, "_runtime": 8792}, "step": 550} +{"logs": {"train/loss": 4.4311, "train/learning_rate": 0.0003611111111111111, "train/epoch": 18.01, "_timestamp": 1737374797, "_runtime": 8792}, "step": 550} +{"train_info/time_between_train_steps": 2.547647714614868, "step": 550} +{"info/global_step": 551, "train_info/time_within_train_step": 13.06673526763916, "step": 551} +{"train_info/time_between_train_steps": 0.004912614822387695, "step": 551} +{"info/global_step": 552, "train_info/time_within_train_step": 13.155240058898926, "step": 552} +{"train_info/time_between_train_steps": 0.005418539047241211, "step": 552} +{"info/global_step": 553, "train_info/time_within_train_step": 14.030142068862915, "step": 553} +{"train_info/time_between_train_steps": 0.0048787593841552734, "step": 553} +{"info/global_step": 554, "train_info/time_within_train_step": 16.357003688812256, "step": 554} +{"train_info/time_between_train_steps": 0.004910469055175781, "step": 554} +{"info/global_step": 555, "train_info/time_within_train_step": 13.136779308319092, "step": 555} +{"train_info/time_between_train_steps": 0.004988193511962891, "step": 555} +{"info/global_step": 556, "train_info/time_within_train_step": 13.225025415420532, "step": 556} +{"train_info/time_between_train_steps": 0.004828929901123047, "step": 556} +{"info/global_step": 557, "train_info/time_within_train_step": 13.009939193725586, "step": 557} +{"train_info/time_between_train_steps": 0.005198955535888672, "step": 557} +{"info/global_step": 558, "train_info/time_within_train_step": 13.078250169754028, "step": 558} +{"train_info/time_between_train_steps": 0.005789041519165039, "step": 558} +{"info/global_step": 559, "train_info/time_within_train_step": 13.019439935684204, "step": 559} +{"train_info/time_between_train_steps": 0.005597591400146484, "step": 559} +{"info/global_step": 560, "train_info/time_within_train_step": 13.020514965057373, "step": 560} +{"train_info/time_between_train_steps": 0.00540471076965332, "step": 560} +{"info/global_step": 561, "train_info/time_within_train_step": 13.026920557022095, "step": 561} +{"train_info/time_between_train_steps": 0.005550861358642578, "step": 561} +{"info/global_step": 562, "train_info/time_within_train_step": 13.037400960922241, "step": 562} +{"train_info/time_between_train_steps": 0.005548238754272461, "step": 562} +{"info/global_step": 563, "train_info/time_within_train_step": 13.022163391113281, "step": 563} +{"train_info/time_between_train_steps": 0.004797220230102539, "step": 563} +{"info/global_step": 564, "train_info/time_within_train_step": 13.017566680908203, "step": 564} +{"train_info/time_between_train_steps": 0.005644321441650391, "step": 564} +{"info/global_step": 565, "train_info/time_within_train_step": 12.99001693725586, "step": 565} +{"train_info/time_between_train_steps": 0.00561070442199707, "step": 565} +{"info/global_step": 566, "train_info/time_within_train_step": 13.048974514007568, "step": 566} +{"train_info/time_between_train_steps": 0.0054645538330078125, "step": 566} +{"info/global_step": 567, "train_info/time_within_train_step": 13.072593688964844, "step": 567} +{"train_info/time_between_train_steps": 0.005608558654785156, "step": 567} +{"info/global_step": 568, "train_info/time_within_train_step": 12.997562170028687, "step": 568} +{"train_info/time_between_train_steps": 0.006187915802001953, "step": 568} +{"info/global_step": 569, "train_info/time_within_train_step": 13.00961709022522, "step": 569} +{"train_info/time_between_train_steps": 0.006405353546142578, "step": 569} +{"info/global_step": 570, "train_info/time_within_train_step": 13.029850721359253, "step": 570} +{"train_info/time_between_train_steps": 0.006330728530883789, "step": 570} +{"train_info/time_between_train_steps": 7.962343692779541, "step": 570} +{"info/global_step": 571, "train_info/time_within_train_step": 13.111313581466675, "step": 571} +{"train_info/time_between_train_steps": 0.005220174789428711, "step": 571} +{"info/global_step": 572, "train_info/time_within_train_step": 13.10316252708435, "step": 572} +{"train_info/time_between_train_steps": 0.005541086196899414, "step": 572} +{"info/global_step": 573, "train_info/time_within_train_step": 13.000708103179932, "step": 573} +{"train_info/time_between_train_steps": 0.005983829498291016, "step": 573} +{"info/global_step": 574, "train_info/time_within_train_step": 13.098920583724976, "step": 574} +{"train_info/time_between_train_steps": 0.0051305294036865234, "step": 574} +{"info/global_step": 575, "train_info/time_within_train_step": 12.994088649749756, "step": 575} +{"train_info/time_between_train_steps": 0.0055768489837646484, "step": 575} +{"info/global_step": 576, "train_info/time_within_train_step": 13.162222862243652, "step": 576} +{"train_info/time_between_train_steps": 0.005991458892822266, "step": 576} +{"info/global_step": 577, "train_info/time_within_train_step": 12.994445323944092, "step": 577} +{"train_info/time_between_train_steps": 0.005740165710449219, "step": 577} +{"info/global_step": 578, "train_info/time_within_train_step": 13.085466384887695, "step": 578} +{"train_info/time_between_train_steps": 0.005560398101806641, "step": 578} +{"info/global_step": 579, "train_info/time_within_train_step": 13.027226448059082, "step": 579} +{"train_info/time_between_train_steps": 0.005631208419799805, "step": 579} +{"info/global_step": 580, "train_info/time_within_train_step": 13.100844144821167, "step": 580} +{"train_info/time_between_train_steps": 0.005814075469970703, "step": 580} +{"info/global_step": 581, "train_info/time_within_train_step": 13.006496906280518, "step": 581} +{"train_info/time_between_train_steps": 0.005601167678833008, "step": 581} +{"info/global_step": 582, "train_info/time_within_train_step": 12.997182130813599, "step": 582} +{"train_info/time_between_train_steps": 0.004999399185180664, "step": 582} +{"info/global_step": 583, "train_info/time_within_train_step": 12.98202633857727, "step": 583} +{"train_info/time_between_train_steps": 0.004383087158203125, "step": 583} +{"info/global_step": 584, "train_info/time_within_train_step": 12.992326498031616, "step": 584} +{"train_info/time_between_train_steps": 0.005409717559814453, "step": 584} +{"info/global_step": 585, "train_info/time_within_train_step": 13.048929691314697, "step": 585} +{"train_info/time_between_train_steps": 0.005085945129394531, "step": 585} +{"info/global_step": 586, "train_info/time_within_train_step": 13.062762260437012, "step": 586} +{"train_info/time_between_train_steps": 0.005423069000244141, "step": 586} +{"info/global_step": 587, "train_info/time_within_train_step": 12.982868432998657, "step": 587} +{"train_info/time_between_train_steps": 0.004999399185180664, "step": 587} +{"info/global_step": 588, "train_info/time_within_train_step": 12.988957643508911, "step": 588} +{"train_info/time_between_train_steps": 0.005769968032836914, "step": 588} +{"info/global_step": 589, "train_info/time_within_train_step": 13.012678146362305, "step": 589} +{"train_info/time_between_train_steps": 0.0051860809326171875, "step": 589} +{"info/global_step": 590, "train_info/time_within_train_step": 13.048701286315918, "step": 590} +{"train_info/time_between_train_steps": 0.005048274993896484, "step": 590} +{"info/global_step": 591, "train_info/time_within_train_step": 12.990013122558594, "step": 591} +{"train_info/time_between_train_steps": 0.005559206008911133, "step": 591} +{"info/global_step": 592, "train_info/time_within_train_step": 13.00559663772583, "step": 592} +{"train_info/time_between_train_steps": 0.005397796630859375, "step": 592} +{"info/global_step": 593, "train_info/time_within_train_step": 12.98529577255249, "step": 593} +{"train_info/time_between_train_steps": 0.005293607711791992, "step": 593} +{"info/global_step": 594, "train_info/time_within_train_step": 13.013144969940186, "step": 594} +{"train_info/time_between_train_steps": 0.0055429935455322266, "step": 594} +{"info/global_step": 595, "train_info/time_within_train_step": 12.990709781646729, "step": 595} +{"train_info/time_between_train_steps": 0.005054950714111328, "step": 595} +{"info/global_step": 596, "train_info/time_within_train_step": 12.993837833404541, "step": 596} +{"train_info/time_between_train_steps": 0.005783796310424805, "step": 596} +{"info/global_step": 597, "train_info/time_within_train_step": 13.020291328430176, "step": 597} +{"train_info/time_between_train_steps": 0.005605459213256836, "step": 597} +{"info/global_step": 598, "train_info/time_within_train_step": 13.017490863800049, "step": 598} +{"train_info/time_between_train_steps": 0.00586700439453125, "step": 598} +{"info/global_step": 599, "train_info/time_within_train_step": 13.07261037826538, "step": 599} +{"train_info/time_between_train_steps": 0.005965709686279297, "step": 599} +{"info/global_step": 600, "train_info/time_within_train_step": 13.029133081436157, "step": 600} +{"train_info": {"train_info/memory_allocated": 1920.990234375, "train_info/memory_max_allocated": 20713.404296875, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1737375475, "_runtime": 9470}, "step": 600} +{"logs": {"train/loss": 4.1923, "train/learning_rate": 0.0003333333333333333, "train/epoch": 19.02, "_timestamp": 1737375475, "_runtime": 9470}, "step": 600} +{"train_info/time_between_train_steps": 97.56535935401917, "step": 600} +{"train_info/time_between_train_steps": 108.24602913856506, "step": 600} +{"info/global_step": 601, "train_info/time_within_train_step": 13.001835346221924, "step": 601} +{"train_info/time_between_train_steps": 0.00551152229309082, "step": 601} +{"info/global_step": 602, "train_info/time_within_train_step": 13.356348037719727, "step": 602} +{"train_info/time_between_train_steps": 0.005728006362915039, "step": 602} +{"info/global_step": 603, "train_info/time_within_train_step": 12.961588382720947, "step": 603} +{"train_info/time_between_train_steps": 0.005625009536743164, "step": 603} +{"info/global_step": 604, "train_info/time_within_train_step": 13.069425106048584, "step": 604} +{"train_info/time_between_train_steps": 0.0060575008392333984, "step": 604} +{"info/global_step": 605, "train_info/time_within_train_step": 13.0053391456604, "step": 605} +{"train_info/time_between_train_steps": 0.005615234375, "step": 605} +{"info/global_step": 606, "train_info/time_within_train_step": 13.059074878692627, "step": 606} +{"train_info/time_between_train_steps": 0.005198240280151367, "step": 606} +{"info/global_step": 607, "train_info/time_within_train_step": 12.977851390838623, "step": 607} +{"train_info/time_between_train_steps": 0.0051059722900390625, "step": 607} +{"info/global_step": 608, "train_info/time_within_train_step": 13.104573011398315, "step": 608} +{"train_info/time_between_train_steps": 0.005587100982666016, "step": 608} +{"info/global_step": 609, "train_info/time_within_train_step": 13.011024236679077, "step": 609} +{"train_info/time_between_train_steps": 0.005802154541015625, "step": 609} +{"info/global_step": 610, "train_info/time_within_train_step": 13.062531232833862, "step": 610} +{"train_info/time_between_train_steps": 0.005597114562988281, "step": 610} +{"info/global_step": 611, "train_info/time_within_train_step": 13.000076532363892, "step": 611} +{"train_info/time_between_train_steps": 0.005077838897705078, "step": 611} +{"info/global_step": 612, "train_info/time_within_train_step": 12.977179527282715, "step": 612} +{"train_info/time_between_train_steps": 0.005410671234130859, "step": 612} +{"info/global_step": 613, "train_info/time_within_train_step": 13.022434711456299, "step": 613} +{"train_info/time_between_train_steps": 0.004920244216918945, "step": 613} +{"info/global_step": 614, "train_info/time_within_train_step": 12.980807304382324, "step": 614} +{"train_info/time_between_train_steps": 0.0052645206451416016, "step": 614} +{"info/global_step": 615, "train_info/time_within_train_step": 12.988696336746216, "step": 615} +{"train_info/time_between_train_steps": 0.0053234100341796875, "step": 615} +{"info/global_step": 616, "train_info/time_within_train_step": 12.991041898727417, "step": 616} +{"train_info/time_between_train_steps": 0.00504612922668457, "step": 616} +{"info/global_step": 617, "train_info/time_within_train_step": 13.061331272125244, "step": 617} +{"train_info/time_between_train_steps": 0.004868984222412109, "step": 617} +{"info/global_step": 618, "train_info/time_within_train_step": 13.00107741355896, "step": 618} +{"train_info/time_between_train_steps": 0.005110263824462891, "step": 618} +{"info/global_step": 619, "train_info/time_within_train_step": 12.981475353240967, "step": 619} +{"train_info/time_between_train_steps": 0.005275726318359375, "step": 619} +{"info/global_step": 620, "train_info/time_within_train_step": 12.983936786651611, "step": 620} +{"train_info/time_between_train_steps": 0.004935026168823242, "step": 620} +{"info/global_step": 621, "train_info/time_within_train_step": 13.014482259750366, "step": 621} +{"train_info/time_between_train_steps": 0.005663394927978516, "step": 621} +{"info/global_step": 622, "train_info/time_within_train_step": 13.006995439529419, "step": 622} +{"train_info/time_between_train_steps": 0.0046117305755615234, "step": 622} +{"info/global_step": 623, "train_info/time_within_train_step": 12.986441373825073, "step": 623} +{"train_info/time_between_train_steps": 0.005567312240600586, "step": 623} +{"info/global_step": 624, "train_info/time_within_train_step": 12.989973545074463, "step": 624} +{"train_info/time_between_train_steps": 0.004620552062988281, "step": 624} +{"info/global_step": 625, "train_info/time_within_train_step": 12.994464874267578, "step": 625} +{"train_info/time_between_train_steps": 0.005372047424316406, "step": 625} +{"info/global_step": 626, "train_info/time_within_train_step": 12.99227237701416, "step": 626} +{"train_info/time_between_train_steps": 0.0049135684967041016, "step": 626} +{"info/global_step": 627, "train_info/time_within_train_step": 13.013827323913574, "step": 627} +{"train_info/time_between_train_steps": 0.004840850830078125, "step": 627} +{"info/global_step": 628, "train_info/time_within_train_step": 12.9962158203125, "step": 628} +{"train_info/time_between_train_steps": 0.0054628849029541016, "step": 628} +{"info/global_step": 629, "train_info/time_within_train_step": 13.012884616851807, "step": 629} +{"train_info/time_between_train_steps": 0.006136894226074219, "step": 629} +{"info/global_step": 630, "train_info/time_within_train_step": 13.011823177337646, "step": 630} +{"train_info/time_between_train_steps": 0.00632023811340332, "step": 630} +{"train_info/time_between_train_steps": 7.752305507659912, "step": 630} +{"info/global_step": 631, "train_info/time_within_train_step": 13.03921914100647, "step": 631} +{"train_info/time_between_train_steps": 0.005753517150878906, "step": 631} +{"info/global_step": 632, "train_info/time_within_train_step": 13.136014461517334, "step": 632} +{"train_info/time_between_train_steps": 0.004981517791748047, "step": 632} +{"info/global_step": 633, "train_info/time_within_train_step": 12.982481956481934, "step": 633} +{"train_info/time_between_train_steps": 0.005448579788208008, "step": 633} +{"info/global_step": 634, "train_info/time_within_train_step": 13.808477640151978, "step": 634} +{"train_info/time_between_train_steps": 0.004820823669433594, "step": 634} +{"info/global_step": 635, "train_info/time_within_train_step": 14.977145433425903, "step": 635} +{"train_info/time_between_train_steps": 0.005156517028808594, "step": 635} +{"info/global_step": 636, "train_info/time_within_train_step": 13.385010004043579, "step": 636} +{"train_info/time_between_train_steps": 0.0051386356353759766, "step": 636} +{"info/global_step": 637, "train_info/time_within_train_step": 12.999300479888916, "step": 637} +{"train_info/time_between_train_steps": 0.005875825881958008, "step": 637} +{"info/global_step": 638, "train_info/time_within_train_step": 13.107390642166138, "step": 638} +{"train_info/time_between_train_steps": 0.004797458648681641, "step": 638} +{"info/global_step": 639, "train_info/time_within_train_step": 13.020979642868042, "step": 639} +{"train_info/time_between_train_steps": 0.006104230880737305, "step": 639} +{"info/global_step": 640, "train_info/time_within_train_step": 13.110273599624634, "step": 640} +{"train_info/time_between_train_steps": 0.005135059356689453, "step": 640} +{"info/global_step": 641, "train_info/time_within_train_step": 12.996052980422974, "step": 641} +{"train_info/time_between_train_steps": 0.004997730255126953, "step": 641} +{"info/global_step": 642, "train_info/time_within_train_step": 12.989577531814575, "step": 642} +{"train_info/time_between_train_steps": 0.005152225494384766, "step": 642} +{"info/global_step": 643, "train_info/time_within_train_step": 13.0021390914917, "step": 643} +{"train_info/time_between_train_steps": 0.005422115325927734, "step": 643} +{"info/global_step": 644, "train_info/time_within_train_step": 12.999656200408936, "step": 644} +{"train_info/time_between_train_steps": 0.0051882266998291016, "step": 644} +{"info/global_step": 645, "train_info/time_within_train_step": 12.993528366088867, "step": 645} +{"train_info/time_between_train_steps": 0.005743503570556641, "step": 645} +{"info/global_step": 646, "train_info/time_within_train_step": 12.98902702331543, "step": 646} +{"train_info/time_between_train_steps": 0.004585742950439453, "step": 646} +{"info/global_step": 647, "train_info/time_within_train_step": 12.988132953643799, "step": 647} +{"train_info/time_between_train_steps": 0.005047798156738281, "step": 647} +{"info/global_step": 648, "train_info/time_within_train_step": 13.082112789154053, "step": 648} +{"train_info/time_between_train_steps": 0.004570960998535156, "step": 648} +{"info/global_step": 649, "train_info/time_within_train_step": 13.015982389450073, "step": 649} +{"train_info/time_between_train_steps": 0.0057828426361083984, "step": 649} +{"info/global_step": 650, "train_info/time_within_train_step": 13.061199426651001, "step": 650} +{"train_info": {"train_info/memory_allocated": 1920.990234375, "train_info/memory_max_allocated": 20713.404296875, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1737376255, "_runtime": 10250}, "step": 650} +{"logs": {"train/loss": 4.0473, "train/learning_rate": 0.00030555555555555555, "train/epoch": 21.02, "_timestamp": 1737376255, "_runtime": 10250}, "step": 650} +{"train_info/time_between_train_steps": 0.03436851501464844, "step": 650} +{"info/global_step": 651, "train_info/time_within_train_step": 13.099277973175049, "step": 651} +{"train_info/time_between_train_steps": 0.005098819732666016, "step": 651} +{"info/global_step": 652, "train_info/time_within_train_step": 13.798037767410278, "step": 652} +{"train_info/time_between_train_steps": 0.00516200065612793, "step": 652} +{"info/global_step": 653, "train_info/time_within_train_step": 17.600136756896973, "step": 653} +{"train_info/time_between_train_steps": 0.005268573760986328, "step": 653} +{"info/global_step": 654, "train_info/time_within_train_step": 13.057849884033203, "step": 654} +{"train_info/time_between_train_steps": 0.005158185958862305, "step": 654} +{"info/global_step": 655, "train_info/time_within_train_step": 13.07043719291687, "step": 655} +{"train_info/time_between_train_steps": 0.005290985107421875, "step": 655} +{"info/global_step": 656, "train_info/time_within_train_step": 15.098655462265015, "step": 656} +{"train_info/time_between_train_steps": 0.005166053771972656, "step": 656} +{"info/global_step": 657, "train_info/time_within_train_step": 21.021065950393677, "step": 657} +{"train_info/time_between_train_steps": 0.00556492805480957, "step": 657} +{"info/global_step": 658, "train_info/time_within_train_step": 13.03427243232727, "step": 658} +{"train_info/time_between_train_steps": 0.005241870880126953, "step": 658} +{"info/global_step": 659, "train_info/time_within_train_step": 14.410000085830688, "step": 659} +{"train_info/time_between_train_steps": 0.005934238433837891, "step": 659} +{"info/global_step": 660, "train_info/time_within_train_step": 17.223656177520752, "step": 660} +{"train_info/time_between_train_steps": 0.00554203987121582, "step": 660} +{"train_info/time_between_train_steps": 11.179700374603271, "step": 660} +{"info/global_step": 661, "train_info/time_within_train_step": 13.090837717056274, "step": 661} +{"train_info/time_between_train_steps": 0.006028413772583008, "step": 661} +{"info/global_step": 662, "train_info/time_within_train_step": 14.764509677886963, "step": 662} +{"train_info/time_between_train_steps": 0.005414009094238281, "step": 662} +{"info/global_step": 663, "train_info/time_within_train_step": 16.398781776428223, "step": 663} +{"train_info/time_between_train_steps": 0.006229877471923828, "step": 663} +{"info/global_step": 664, "train_info/time_within_train_step": 13.188380002975464, "step": 664} +{"train_info/time_between_train_steps": 0.005819797515869141, "step": 664} +{"info/global_step": 665, "train_info/time_within_train_step": 13.03195858001709, "step": 665} +{"train_info/time_between_train_steps": 0.0057222843170166016, "step": 665} +{"info/global_step": 666, "train_info/time_within_train_step": 13.266746520996094, "step": 666} +{"train_info/time_between_train_steps": 0.006081104278564453, "step": 666} +{"info/global_step": 667, "train_info/time_within_train_step": 13.066682577133179, "step": 667} +{"train_info/time_between_train_steps": 0.006077766418457031, "step": 667} +{"info/global_step": 668, "train_info/time_within_train_step": 13.237242460250854, "step": 668} +{"train_info/time_between_train_steps": 0.006984233856201172, "step": 668} +{"info/global_step": 669, "train_info/time_within_train_step": 13.070184707641602, "step": 669} +{"train_info/time_between_train_steps": 0.0061380863189697266, "step": 669} +{"info/global_step": 670, "train_info/time_within_train_step": 13.306966066360474, "step": 670} +{"train_info/time_between_train_steps": 0.005936145782470703, "step": 670} +{"info/global_step": 671, "train_info/time_within_train_step": 13.075278520584106, "step": 671} +{"train_info/time_between_train_steps": 0.005800962448120117, "step": 671} +{"info/global_step": 672, "train_info/time_within_train_step": 13.046024084091187, "step": 672} +{"train_info/time_between_train_steps": 0.0051305294036865234, "step": 672} +{"info/global_step": 673, "train_info/time_within_train_step": 13.03504490852356, "step": 673} +{"train_info/time_between_train_steps": 0.005486011505126953, "step": 673} +{"info/global_step": 674, "train_info/time_within_train_step": 13.032551765441895, "step": 674} +{"train_info/time_between_train_steps": 0.006028652191162109, "step": 674} +{"info/global_step": 675, "train_info/time_within_train_step": 13.038336753845215, "step": 675} +{"train_info/time_between_train_steps": 0.005198240280151367, "step": 675} +{"info/global_step": 676, "train_info/time_within_train_step": 13.058257579803467, "step": 676} +{"train_info/time_between_train_steps": 0.006044149398803711, "step": 676} +{"info/global_step": 677, "train_info/time_within_train_step": 13.071940422058105, "step": 677} +{"train_info/time_between_train_steps": 0.0058672428131103516, "step": 677} +{"info/global_step": 678, "train_info/time_within_train_step": 13.038136720657349, "step": 678} +{"train_info/time_between_train_steps": 0.005595684051513672, "step": 678} +{"info/global_step": 679, "train_info/time_within_train_step": 13.11994194984436, "step": 679} +{"train_info/time_between_train_steps": 0.0057353973388671875, "step": 679} +{"info/global_step": 680, "train_info/time_within_train_step": 13.042614698410034, "step": 680} +{"train_info/time_between_train_steps": 0.005408763885498047, "step": 680} +{"info/global_step": 681, "train_info/time_within_train_step": 13.064972162246704, "step": 681} +{"train_info/time_between_train_steps": 0.0052030086517333984, "step": 681} +{"info/global_step": 682, "train_info/time_within_train_step": 13.068347692489624, "step": 682} +{"train_info/time_between_train_steps": 0.005577802658081055, "step": 682} +{"info/global_step": 683, "train_info/time_within_train_step": 13.035770177841187, "step": 683} +{"train_info/time_between_train_steps": 0.004847526550292969, "step": 683} +{"info/global_step": 684, "train_info/time_within_train_step": 13.041262149810791, "step": 684} +{"train_info/time_between_train_steps": 0.004858732223510742, "step": 684} +{"info/global_step": 685, "train_info/time_within_train_step": 13.0538649559021, "step": 685} +{"train_info/time_between_train_steps": 0.005852699279785156, "step": 685} +{"info/global_step": 686, "train_info/time_within_train_step": 13.070004940032959, "step": 686} +{"train_info/time_between_train_steps": 0.0057637691497802734, "step": 686} +{"info/global_step": 687, "train_info/time_within_train_step": 13.040592670440674, "step": 687} +{"train_info/time_between_train_steps": 0.005203962326049805, "step": 687} +{"info/global_step": 688, "train_info/time_within_train_step": 13.0485999584198, "step": 688} +{"train_info/time_between_train_steps": 0.005971193313598633, "step": 688} +{"info/global_step": 689, "train_info/time_within_train_step": 13.061453580856323, "step": 689} +{"train_info/time_between_train_steps": 0.006682395935058594, "step": 689} +{"info/global_step": 690, "train_info/time_within_train_step": 13.111522674560547, "step": 690} +{"train_info/time_between_train_steps": 0.00576329231262207, "step": 690} +{"train_info/time_between_train_steps": 8.030303716659546, "step": 690} +{"info/global_step": 691, "train_info/time_within_train_step": 13.018132209777832, "step": 691} +{"train_info/time_between_train_steps": 0.0053369998931884766, "step": 691} +{"info/global_step": 692, "train_info/time_within_train_step": 13.131333112716675, "step": 692} +{"train_info/time_between_train_steps": 0.005774736404418945, "step": 692} +{"info/global_step": 693, "train_info/time_within_train_step": 13.038434267044067, "step": 693} +{"train_info/time_between_train_steps": 0.0050427913665771484, "step": 693} +{"info/global_step": 694, "train_info/time_within_train_step": 13.204737663269043, "step": 694} +{"train_info/time_between_train_steps": 0.005367755889892578, "step": 694} +{"info/global_step": 695, "train_info/time_within_train_step": 13.066641330718994, "step": 695} +{"train_info/time_between_train_steps": 0.005555868148803711, "step": 695} +{"info/global_step": 696, "train_info/time_within_train_step": 13.148389101028442, "step": 696} +{"train_info/time_between_train_steps": 0.0054416656494140625, "step": 696} +{"info/global_step": 697, "train_info/time_within_train_step": 13.046189546585083, "step": 697} +{"train_info/time_between_train_steps": 0.005805015563964844, "step": 697} +{"info/global_step": 698, "train_info/time_within_train_step": 13.122875452041626, "step": 698} +{"train_info/time_between_train_steps": 0.005231618881225586, "step": 698} +{"info/global_step": 699, "train_info/time_within_train_step": 13.04174280166626, "step": 699} +{"train_info/time_between_train_steps": 0.0057413578033447266, "step": 699} +{"info/global_step": 700, "train_info/time_within_train_step": 13.09583830833435, "step": 700} +{"train_info": {"train_info/memory_allocated": 1920.990234375, "train_info/memory_max_allocated": 20713.404296875, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1737376974, "_runtime": 10969}, "step": 700} +{"logs": {"train/loss": 3.9047, "train/learning_rate": 0.0002777777777777778, "train/epoch": 23.01, "_timestamp": 1737376974, "_runtime": 10969}, "step": 700} +{"train_info/time_between_train_steps": 64.1513340473175, "step": 700} +{"info/global_step": 701, "train_info/time_within_train_step": 12.996925115585327, "step": 701} +{"train_info/time_between_train_steps": 0.005220174789428711, "step": 701} +{"info/global_step": 702, "train_info/time_within_train_step": 13.940896272659302, "step": 702} +{"train_info/time_between_train_steps": 0.005545854568481445, "step": 702} +{"info/global_step": 703, "train_info/time_within_train_step": 14.805267810821533, "step": 703} +{"train_info/time_between_train_steps": 0.004706144332885742, "step": 703} +{"info/global_step": 704, "train_info/time_within_train_step": 12.985496997833252, "step": 704} +{"train_info/time_between_train_steps": 0.0050585269927978516, "step": 704} +{"info/global_step": 705, "train_info/time_within_train_step": 12.993814945220947, "step": 705} +{"train_info/time_between_train_steps": 0.005860567092895508, "step": 705} +{"info/global_step": 706, "train_info/time_within_train_step": 13.002261877059937, "step": 706} +{"train_info/time_between_train_steps": 0.005485057830810547, "step": 706} +{"info/global_step": 707, "train_info/time_within_train_step": 13.050413608551025, "step": 707} +{"train_info/time_between_train_steps": 0.005090236663818359, "step": 707} +{"info/global_step": 708, "train_info/time_within_train_step": 13.00141429901123, "step": 708} +{"train_info/time_between_train_steps": 0.005116462707519531, "step": 708} +{"info/global_step": 709, "train_info/time_within_train_step": 13.016120195388794, "step": 709} +{"train_info/time_between_train_steps": 0.0058765411376953125, "step": 709} +{"info/global_step": 710, "train_info/time_within_train_step": 13.099091053009033, "step": 710} +{"train_info/time_between_train_steps": 0.005083560943603516, "step": 710} +{"info/global_step": 711, "train_info/time_within_train_step": 13.024950981140137, "step": 711} +{"train_info/time_between_train_steps": 0.0050237178802490234, "step": 711} +{"info/global_step": 712, "train_info/time_within_train_step": 13.014950037002563, "step": 712} +{"train_info/time_between_train_steps": 0.005014896392822266, "step": 712} +{"info/global_step": 713, "train_info/time_within_train_step": 13.022314310073853, "step": 713} +{"train_info/time_between_train_steps": 0.0055425167083740234, "step": 713} +{"info/global_step": 714, "train_info/time_within_train_step": 13.03071641921997, "step": 714} +{"train_info/time_between_train_steps": 0.00551295280456543, "step": 714} +{"info/global_step": 715, "train_info/time_within_train_step": 13.02112340927124, "step": 715} +{"train_info/time_between_train_steps": 0.005537509918212891, "step": 715} +{"info/global_step": 716, "train_info/time_within_train_step": 13.055681705474854, "step": 716} +{"train_info/time_between_train_steps": 0.0056056976318359375, "step": 716} +{"info/global_step": 717, "train_info/time_within_train_step": 13.029381036758423, "step": 717} +{"train_info/time_between_train_steps": 0.005065202713012695, "step": 717} +{"info/global_step": 718, "train_info/time_within_train_step": 13.023218154907227, "step": 718} +{"train_info/time_between_train_steps": 0.0059092044830322266, "step": 718} +{"info/global_step": 719, "train_info/time_within_train_step": 13.041162252426147, "step": 719} +{"train_info/time_between_train_steps": 0.0062792301177978516, "step": 719} +{"info/global_step": 720, "train_info/time_within_train_step": 13.069238185882568, "step": 720} +{"train_info/time_between_train_steps": 0.006291866302490234, "step": 720} +{"train_info/time_between_train_steps": 7.7757978439331055, "step": 720} +{"info/global_step": 721, "train_info/time_within_train_step": 13.015496015548706, "step": 721} +{"train_info/time_between_train_steps": 0.004467010498046875, "step": 721} +{"info/global_step": 722, "train_info/time_within_train_step": 13.103568077087402, "step": 722} +{"train_info/time_between_train_steps": 0.004784107208251953, "step": 722} +{"info/global_step": 723, "train_info/time_within_train_step": 13.0208580493927, "step": 723} +{"train_info/time_between_train_steps": 0.005542278289794922, "step": 723} +{"info/global_step": 724, "train_info/time_within_train_step": 13.159507751464844, "step": 724} +{"train_info/time_between_train_steps": 0.005711078643798828, "step": 724} +{"info/global_step": 725, "train_info/time_within_train_step": 13.131777048110962, "step": 725} +{"train_info/time_between_train_steps": 0.005639791488647461, "step": 725} +{"info/global_step": 726, "train_info/time_within_train_step": 13.12767481803894, "step": 726} +{"train_info/time_between_train_steps": 0.0051310062408447266, "step": 726} +{"info/global_step": 727, "train_info/time_within_train_step": 13.029257535934448, "step": 727} +{"train_info/time_between_train_steps": 0.0049021244049072266, "step": 727} +{"info/global_step": 728, "train_info/time_within_train_step": 13.126893043518066, "step": 728} +{"train_info/time_between_train_steps": 0.005562782287597656, "step": 728} +{"info/global_step": 729, "train_info/time_within_train_step": 13.022771835327148, "step": 729} +{"train_info/time_between_train_steps": 0.005052089691162109, "step": 729} +{"info/global_step": 730, "train_info/time_within_train_step": 13.123117446899414, "step": 730} +{"train_info/time_between_train_steps": 0.005715131759643555, "step": 730} +{"info/global_step": 731, "train_info/time_within_train_step": 13.055967807769775, "step": 731} +{"train_info/time_between_train_steps": 0.005131721496582031, "step": 731} +{"info/global_step": 732, "train_info/time_within_train_step": 13.026001214981079, "step": 732} +{"train_info/time_between_train_steps": 0.004919290542602539, "step": 732} +{"info/global_step": 733, "train_info/time_within_train_step": 13.01391053199768, "step": 733} +{"train_info/time_between_train_steps": 0.005682229995727539, "step": 733} +{"info/global_step": 734, "train_info/time_within_train_step": 13.618720531463623, "step": 734} +{"train_info/time_between_train_steps": 0.005525112152099609, "step": 734} +{"info/global_step": 735, "train_info/time_within_train_step": 13.040150165557861, "step": 735} +{"train_info/time_between_train_steps": 0.005045890808105469, "step": 735} +{"info/global_step": 736, "train_info/time_within_train_step": 13.040189027786255, "step": 736} +{"train_info/time_between_train_steps": 0.005318403244018555, "step": 736} +{"info/global_step": 737, "train_info/time_within_train_step": 13.028489351272583, "step": 737} +{"train_info/time_between_train_steps": 0.006199359893798828, "step": 737} +{"info/global_step": 738, "train_info/time_within_train_step": 13.01301622390747, "step": 738} +{"train_info/time_between_train_steps": 0.005455493927001953, "step": 738} +{"info/global_step": 739, "train_info/time_within_train_step": 13.025327444076538, "step": 739} +{"train_info/time_between_train_steps": 0.005467891693115234, "step": 739} +{"info/global_step": 740, "train_info/time_within_train_step": 13.093577861785889, "step": 740} +{"train_info/time_between_train_steps": 0.0056188106536865234, "step": 740} +{"info/global_step": 741, "train_info/time_within_train_step": 13.025347709655762, "step": 741} +{"train_info/time_between_train_steps": 0.005754232406616211, "step": 741} +{"info/global_step": 742, "train_info/time_within_train_step": 13.026169776916504, "step": 742} +{"train_info/time_between_train_steps": 0.005640983581542969, "step": 742} +{"info/global_step": 743, "train_info/time_within_train_step": 13.042688131332397, "step": 743} +{"train_info/time_between_train_steps": 0.00541996955871582, "step": 743} +{"info/global_step": 744, "train_info/time_within_train_step": 13.018393754959106, "step": 744} +{"train_info/time_between_train_steps": 0.0052030086517333984, "step": 744} +{"info/global_step": 745, "train_info/time_within_train_step": 13.012619972229004, "step": 745} +{"train_info/time_between_train_steps": 0.004864692687988281, "step": 745} +{"info/global_step": 746, "train_info/time_within_train_step": 13.037690162658691, "step": 746} +{"train_info/time_between_train_steps": 0.005691051483154297, "step": 746} +{"info/global_step": 747, "train_info/time_within_train_step": 13.053096055984497, "step": 747} +{"train_info/time_between_train_steps": 0.00590205192565918, "step": 747} +{"info/global_step": 748, "train_info/time_within_train_step": 13.058194160461426, "step": 748} +{"train_info/time_between_train_steps": 0.005820751190185547, "step": 748} +{"info/global_step": 749, "train_info/time_within_train_step": 13.05649733543396, "step": 749} +{"train_info/time_between_train_steps": 0.005698680877685547, "step": 749} +{"info/global_step": 750, "train_info/time_within_train_step": 13.068169832229614, "step": 750} +{"train_info": {"train_info/memory_allocated": 1920.990234375, "train_info/memory_max_allocated": 20713.404296875, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1737377710, "_runtime": 11705}, "step": 750} +{"logs": {"train/loss": 3.7631, "train/learning_rate": 0.00025, "train/epoch": 24.02, "_timestamp": 1737377710, "_runtime": 11705}, "step": 750} +{"train_info/time_between_train_steps": 0.07217931747436523, "step": 750} +{"train_info/time_between_train_steps": 7.8975605964660645, "step": 750} +{"info/global_step": 751, "train_info/time_within_train_step": 13.006887674331665, "step": 751} +{"train_info/time_between_train_steps": 0.005469560623168945, "step": 751} +{"info/global_step": 752, "train_info/time_within_train_step": 13.132763147354126, "step": 752} +{"train_info/time_between_train_steps": 0.004843473434448242, "step": 752} +{"info/global_step": 753, "train_info/time_within_train_step": 13.020084619522095, "step": 753} +{"train_info/time_between_train_steps": 0.005345582962036133, "step": 753} +{"info/global_step": 754, "train_info/time_within_train_step": 13.136696815490723, "step": 754} +{"train_info/time_between_train_steps": 0.0058171749114990234, "step": 754} +{"info/global_step": 755, "train_info/time_within_train_step": 13.026458263397217, "step": 755} +{"train_info/time_between_train_steps": 0.005215883255004883, "step": 755} +{"info/global_step": 756, "train_info/time_within_train_step": 13.22672963142395, "step": 756} +{"train_info/time_between_train_steps": 0.005454301834106445, "step": 756} +{"info/global_step": 757, "train_info/time_within_train_step": 13.049198865890503, "step": 757} +{"train_info/time_between_train_steps": 0.005177497863769531, "step": 757} +{"info/global_step": 758, "train_info/time_within_train_step": 13.144203424453735, "step": 758} +{"train_info/time_between_train_steps": 0.005582571029663086, "step": 758} +{"info/global_step": 759, "train_info/time_within_train_step": 13.98815393447876, "step": 759} +{"train_info/time_between_train_steps": 0.005174398422241211, "step": 759} +{"info/global_step": 760, "train_info/time_within_train_step": 14.554157972335815, "step": 760} +{"train_info/time_between_train_steps": 0.00557398796081543, "step": 760} +{"info/global_step": 761, "train_info/time_within_train_step": 13.038557291030884, "step": 761} +{"train_info/time_between_train_steps": 0.005791902542114258, "step": 761} +{"info/global_step": 762, "train_info/time_within_train_step": 13.018309831619263, "step": 762} +{"train_info/time_between_train_steps": 0.004929542541503906, "step": 762} +{"info/global_step": 763, "train_info/time_within_train_step": 14.318722248077393, "step": 763} +{"train_info/time_between_train_steps": 0.0048542022705078125, "step": 763} +{"info/global_step": 764, "train_info/time_within_train_step": 17.981542825698853, "step": 764} +{"train_info/time_between_train_steps": 0.004841327667236328, "step": 764} +{"info/global_step": 765, "train_info/time_within_train_step": 13.020484685897827, "step": 765} +{"train_info/time_between_train_steps": 0.0048406124114990234, "step": 765} +{"info/global_step": 766, "train_info/time_within_train_step": 12.989160299301147, "step": 766} +{"train_info/time_between_train_steps": 0.005423069000244141, "step": 766} +{"info/global_step": 767, "train_info/time_within_train_step": 14.769330739974976, "step": 767} +{"train_info/time_between_train_steps": 0.004799842834472656, "step": 767} +{"info/global_step": 768, "train_info/time_within_train_step": 24.837547302246094, "step": 768} +{"train_info/time_between_train_steps": 0.005187511444091797, "step": 768} +{"info/global_step": 769, "train_info/time_within_train_step": 12.982853889465332, "step": 769} +{"train_info/time_between_train_steps": 0.004880189895629883, "step": 769} +{"info/global_step": 770, "train_info/time_within_train_step": 17.371551275253296, "step": 770} +{"train_info/time_between_train_steps": 0.0048828125, "step": 770} +{"info/global_step": 771, "train_info/time_within_train_step": 25.17817711830139, "step": 771} +{"train_info/time_between_train_steps": 0.0045964717864990234, "step": 771} +{"info/global_step": 772, "train_info/time_within_train_step": 12.977105617523193, "step": 772} +{"train_info/time_between_train_steps": 0.004690885543823242, "step": 772} +{"info/global_step": 773, "train_info/time_within_train_step": 14.545385599136353, "step": 773} +{"train_info/time_between_train_steps": 0.005135774612426758, "step": 773} +{"info/global_step": 774, "train_info/time_within_train_step": 18.07632303237915, "step": 774} +{"train_info/time_between_train_steps": 0.0048367977142333984, "step": 774} +{"info/global_step": 775, "train_info/time_within_train_step": 12.99076223373413, "step": 775} +{"train_info/time_between_train_steps": 0.004615306854248047, "step": 775} +{"info/global_step": 776, "train_info/time_within_train_step": 12.9894380569458, "step": 776} +{"train_info/time_between_train_steps": 0.004767417907714844, "step": 776} +{"info/global_step": 777, "train_info/time_within_train_step": 15.580138683319092, "step": 777} +{"train_info/time_between_train_steps": 0.004963874816894531, "step": 777} +{"info/global_step": 778, "train_info/time_within_train_step": 15.688518762588501, "step": 778} +{"train_info/time_between_train_steps": 0.005362749099731445, "step": 778} +{"info/global_step": 779, "train_info/time_within_train_step": 13.015007495880127, "step": 779} +{"train_info/time_between_train_steps": 0.005426883697509766, "step": 779} +{"info/global_step": 780, "train_info/time_within_train_step": 13.41140365600586, "step": 780} +{"train_info/time_between_train_steps": 0.005662679672241211, "step": 780} +{"train_info/time_between_train_steps": 12.85236406326294, "step": 780} +{"info/global_step": 781, "train_info/time_within_train_step": 12.98040509223938, "step": 781} +{"train_info/time_between_train_steps": 0.005225181579589844, "step": 781} +{"info/global_step": 782, "train_info/time_within_train_step": 13.07252812385559, "step": 782} +{"train_info/time_between_train_steps": 0.005500078201293945, "step": 782} +{"info/global_step": 783, "train_info/time_within_train_step": 12.99971342086792, "step": 783} +{"train_info/time_between_train_steps": 0.005661964416503906, "step": 783} +{"info/global_step": 784, "train_info/time_within_train_step": 13.115437984466553, "step": 784} +{"train_info/time_between_train_steps": 0.005576610565185547, "step": 784} +{"info/global_step": 785, "train_info/time_within_train_step": 13.019968509674072, "step": 785} +{"train_info/time_between_train_steps": 0.005820035934448242, "step": 785} +{"info/global_step": 786, "train_info/time_within_train_step": 13.212112665176392, "step": 786} +{"train_info/time_between_train_steps": 0.0054607391357421875, "step": 786} +{"info/global_step": 787, "train_info/time_within_train_step": 13.069997549057007, "step": 787} +{"train_info/time_between_train_steps": 0.005267143249511719, "step": 787} +{"info/global_step": 788, "train_info/time_within_train_step": 13.110558986663818, "step": 788} +{"train_info/time_between_train_steps": 0.0056378841400146484, "step": 788} +{"info/global_step": 789, "train_info/time_within_train_step": 13.031593799591064, "step": 789} +{"train_info/time_between_train_steps": 0.0053691864013671875, "step": 789} +{"info/global_step": 790, "train_info/time_within_train_step": 13.098177433013916, "step": 790} +{"train_info/time_between_train_steps": 0.0057582855224609375, "step": 790} +{"info/global_step": 791, "train_info/time_within_train_step": 13.062936544418335, "step": 791} +{"train_info/time_between_train_steps": 0.005717754364013672, "step": 791} +{"info/global_step": 792, "train_info/time_within_train_step": 13.008624792098999, "step": 792} +{"train_info/time_between_train_steps": 0.005178928375244141, "step": 792} +{"info/global_step": 793, "train_info/time_within_train_step": 13.021642684936523, "step": 793} +{"train_info/time_between_train_steps": 0.005406856536865234, "step": 793} +{"info/global_step": 794, "train_info/time_within_train_step": 13.027524948120117, "step": 794} +{"train_info/time_between_train_steps": 0.005185842514038086, "step": 794} +{"info/global_step": 795, "train_info/time_within_train_step": 13.019142389297485, "step": 795} +{"train_info/time_between_train_steps": 0.004904985427856445, "step": 795} +{"info/global_step": 796, "train_info/time_within_train_step": 13.01145076751709, "step": 796} +{"train_info/time_between_train_steps": 0.005643606185913086, "step": 796} +{"info/global_step": 797, "train_info/time_within_train_step": 13.015268087387085, "step": 797} +{"train_info/time_between_train_steps": 0.005611896514892578, "step": 797} +{"info/global_step": 798, "train_info/time_within_train_step": 13.01565670967102, "step": 798} +{"train_info/time_between_train_steps": 0.005548715591430664, "step": 798} +{"info/global_step": 799, "train_info/time_within_train_step": 13.032884120941162, "step": 799} +{"train_info/time_between_train_steps": 0.005481719970703125, "step": 799} +{"info/global_step": 800, "train_info/time_within_train_step": 13.068138360977173, "step": 800} +{"train_info": {"train_info/memory_allocated": 1920.990234375, "train_info/memory_max_allocated": 20713.404296875, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1737378473, "_runtime": 12468}, "step": 800} +{"logs": {"train/loss": 3.7075, "train/learning_rate": 0.00022222222222222218, "train/epoch": 26.02, "_timestamp": 1737378473, "_runtime": 12468}, "step": 800} +{"train_info/time_between_train_steps": 104.04005742073059, "step": 800} +{"info/global_step": 801, "train_info/time_within_train_step": 14.976124048233032, "step": 801} +{"train_info/time_between_train_steps": 0.0047969818115234375, "step": 801} +{"info/global_step": 802, "train_info/time_within_train_step": 13.05647087097168, "step": 802} +{"train_info/time_between_train_steps": 0.0050008296966552734, "step": 802} +{"info/global_step": 803, "train_info/time_within_train_step": 13.507124662399292, "step": 803} +{"train_info/time_between_train_steps": 0.005304574966430664, "step": 803} +{"info/global_step": 804, "train_info/time_within_train_step": 12.987210273742676, "step": 804} +{"train_info/time_between_train_steps": 0.0054738521575927734, "step": 804} +{"info/global_step": 805, "train_info/time_within_train_step": 13.04529619216919, "step": 805} +{"train_info/time_between_train_steps": 0.004929542541503906, "step": 805} +{"info/global_step": 806, "train_info/time_within_train_step": 12.992074251174927, "step": 806} +{"train_info/time_between_train_steps": 0.005488872528076172, "step": 806} +{"info/global_step": 807, "train_info/time_within_train_step": 13.01206374168396, "step": 807} +{"train_info/time_between_train_steps": 0.005219459533691406, "step": 807} +{"info/global_step": 808, "train_info/time_within_train_step": 13.015626668930054, "step": 808} +{"train_info/time_between_train_steps": 0.005750417709350586, "step": 808} +{"info/global_step": 809, "train_info/time_within_train_step": 13.02626085281372, "step": 809} +{"train_info/time_between_train_steps": 0.006180286407470703, "step": 809} +{"info/global_step": 810, "train_info/time_within_train_step": 13.070327520370483, "step": 810} +{"train_info/time_between_train_steps": 0.0054357051849365234, "step": 810} +{"train_info/time_between_train_steps": 8.104780912399292, "step": 810} +{"info/global_step": 811, "train_info/time_within_train_step": 12.963130235671997, "step": 811} +{"train_info/time_between_train_steps": 0.005083322525024414, "step": 811} +{"info/global_step": 812, "train_info/time_within_train_step": 13.087512493133545, "step": 812} +{"train_info/time_between_train_steps": 0.0053310394287109375, "step": 812} +{"info/global_step": 813, "train_info/time_within_train_step": 13.059963941574097, "step": 813} +{"train_info/time_between_train_steps": 0.0052187442779541016, "step": 813} +{"info/global_step": 814, "train_info/time_within_train_step": 13.122923135757446, "step": 814} +{"train_info/time_between_train_steps": 0.005319356918334961, "step": 814} +{"info/global_step": 815, "train_info/time_within_train_step": 12.99011492729187, "step": 815} +{"train_info/time_between_train_steps": 0.0051157474517822266, "step": 815} +{"info/global_step": 816, "train_info/time_within_train_step": 13.087156057357788, "step": 816} +{"train_info/time_between_train_steps": 0.0050356388092041016, "step": 816} +{"info/global_step": 817, "train_info/time_within_train_step": 13.072556018829346, "step": 817} +{"train_info/time_between_train_steps": 0.005244255065917969, "step": 817} +{"info/global_step": 818, "train_info/time_within_train_step": 13.107254266738892, "step": 818} +{"train_info/time_between_train_steps": 0.005467414855957031, "step": 818} +{"info/global_step": 819, "train_info/time_within_train_step": 12.986628293991089, "step": 819} +{"train_info/time_between_train_steps": 0.005409717559814453, "step": 819} +{"info/global_step": 820, "train_info/time_within_train_step": 13.196943998336792, "step": 820} +{"train_info/time_between_train_steps": 0.005675315856933594, "step": 820} +{"info/global_step": 821, "train_info/time_within_train_step": 13.013270139694214, "step": 821} +{"train_info/time_between_train_steps": 0.00498509407043457, "step": 821} +{"info/global_step": 822, "train_info/time_within_train_step": 12.991003513336182, "step": 822} +{"train_info/time_between_train_steps": 0.0053424835205078125, "step": 822} +{"info/global_step": 823, "train_info/time_within_train_step": 13.005790710449219, "step": 823} +{"train_info/time_between_train_steps": 0.0055730342864990234, "step": 823} +{"info/global_step": 824, "train_info/time_within_train_step": 12.995970010757446, "step": 824} +{"train_info/time_between_train_steps": 0.0050313472747802734, "step": 824} +{"info/global_step": 825, "train_info/time_within_train_step": 12.991931438446045, "step": 825} +{"train_info/time_between_train_steps": 0.005223512649536133, "step": 825} +{"info/global_step": 826, "train_info/time_within_train_step": 12.993844032287598, "step": 826} +{"train_info/time_between_train_steps": 0.00529789924621582, "step": 826} +{"info/global_step": 827, "train_info/time_within_train_step": 13.013662815093994, "step": 827} +{"train_info/time_between_train_steps": 0.005787849426269531, "step": 827} +{"info/global_step": 828, "train_info/time_within_train_step": 12.996577739715576, "step": 828} +{"train_info/time_between_train_steps": 0.005330324172973633, "step": 828} +{"info/global_step": 829, "train_info/time_within_train_step": 12.996402263641357, "step": 829} +{"train_info/time_between_train_steps": 0.004890918731689453, "step": 829} +{"info/global_step": 830, "train_info/time_within_train_step": 13.004582643508911, "step": 830} +{"train_info/time_between_train_steps": 0.0053920745849609375, "step": 830} +{"info/global_step": 831, "train_info/time_within_train_step": 17.97073531150818, "step": 831} +{"train_info/time_between_train_steps": 0.004546642303466797, "step": 831} +{"info/global_step": 832, "train_info/time_within_train_step": 12.977284908294678, "step": 832} +{"train_info/time_between_train_steps": 0.0046977996826171875, "step": 832} +{"info/global_step": 833, "train_info/time_within_train_step": 13.03803300857544, "step": 833} +{"train_info/time_between_train_steps": 0.005457878112792969, "step": 833} +{"info/global_step": 834, "train_info/time_within_train_step": 17.818655490875244, "step": 834} +{"train_info/time_between_train_steps": 0.004740238189697266, "step": 834} +{"info/global_step": 835, "train_info/time_within_train_step": 16.547306537628174, "step": 835} +{"train_info/time_between_train_steps": 0.004760265350341797, "step": 835} +{"info/global_step": 836, "train_info/time_within_train_step": 12.97019910812378, "step": 836} +{"train_info/time_between_train_steps": 0.0050275325775146484, "step": 836} +{"info/global_step": 837, "train_info/time_within_train_step": 13.409333229064941, "step": 837} +{"train_info/time_between_train_steps": 0.00521087646484375, "step": 837} +{"info/global_step": 838, "train_info/time_within_train_step": 12.978322267532349, "step": 838} +{"train_info/time_between_train_steps": 0.006067037582397461, "step": 838} +{"info/global_step": 839, "train_info/time_within_train_step": 13.010533571243286, "step": 839} +{"train_info/time_between_train_steps": 0.006243228912353516, "step": 839} +{"info/global_step": 840, "train_info/time_within_train_step": 13.018042802810669, "step": 840} +{"train_info/time_between_train_steps": 0.00623321533203125, "step": 840} +{"train_info/time_between_train_steps": 7.742202281951904, "step": 840} +{"info/global_step": 841, "train_info/time_within_train_step": 12.955927848815918, "step": 841} +{"train_info/time_between_train_steps": 0.004967927932739258, "step": 841} +{"info/global_step": 842, "train_info/time_within_train_step": 13.071014165878296, "step": 842} +{"train_info/time_between_train_steps": 0.004935741424560547, "step": 842} +{"info/global_step": 843, "train_info/time_within_train_step": 13.052449226379395, "step": 843} +{"train_info/time_between_train_steps": 0.005728006362915039, "step": 843} +{"info/global_step": 844, "train_info/time_within_train_step": 13.119825839996338, "step": 844} +{"train_info/time_between_train_steps": 0.0052225589752197266, "step": 844} +{"info/global_step": 845, "train_info/time_within_train_step": 13.008897066116333, "step": 845} +{"train_info/time_between_train_steps": 0.005749225616455078, "step": 845} +{"info/global_step": 846, "train_info/time_within_train_step": 13.105140686035156, "step": 846} +{"train_info/time_between_train_steps": 0.004935503005981445, "step": 846} +{"info/global_step": 847, "train_info/time_within_train_step": 13.021302700042725, "step": 847} +{"train_info/time_between_train_steps": 0.005076408386230469, "step": 847} +{"info/global_step": 848, "train_info/time_within_train_step": 13.166959524154663, "step": 848} +{"train_info/time_between_train_steps": 0.005465507507324219, "step": 848} +{"info/global_step": 849, "train_info/time_within_train_step": 13.008014440536499, "step": 849} +{"train_info/time_between_train_steps": 0.004785776138305664, "step": 849} +{"info/global_step": 850, "train_info/time_within_train_step": 13.06976056098938, "step": 850} +{"train_info": {"train_info/memory_allocated": 1920.990234375, "train_info/memory_max_allocated": 20713.404296875, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1737379278, "_runtime": 13273}, "step": 850} +{"logs": {"train/loss": 3.6326, "train/learning_rate": 0.00019444444444444443, "train/epoch": 28.01, "_timestamp": 1737379278, "_runtime": 13273}, "step": 850} +{"train_info/time_between_train_steps": 0.09793972969055176, "step": 850} +{"info/global_step": 851, "train_info/time_within_train_step": 13.047325611114502, "step": 851} +{"train_info/time_between_train_steps": 0.005675077438354492, "step": 851} +{"info/global_step": 852, "train_info/time_within_train_step": 12.998951196670532, "step": 852} +{"train_info/time_between_train_steps": 0.004910469055175781, "step": 852} +{"info/global_step": 853, "train_info/time_within_train_step": 12.999556541442871, "step": 853} +{"train_info/time_between_train_steps": 0.005083799362182617, "step": 853} +{"info/global_step": 854, "train_info/time_within_train_step": 13.016993999481201, "step": 854} +{"train_info/time_between_train_steps": 0.0053827762603759766, "step": 854} +{"info/global_step": 855, "train_info/time_within_train_step": 14.12927794456482, "step": 855} +{"train_info/time_between_train_steps": 0.005544900894165039, "step": 855} +{"info/global_step": 856, "train_info/time_within_train_step": 18.927215814590454, "step": 856} +{"train_info/time_between_train_steps": 0.005062103271484375, "step": 856} +{"info/global_step": 857, "train_info/time_within_train_step": 12.974202394485474, "step": 857} +{"train_info/time_between_train_steps": 0.004990816116333008, "step": 857} +{"info/global_step": 858, "train_info/time_within_train_step": 14.84046220779419, "step": 858} +{"train_info/time_between_train_steps": 0.0052089691162109375, "step": 858} +{"info/global_step": 859, "train_info/time_within_train_step": 19.868870735168457, "step": 859} +{"train_info/time_between_train_steps": 0.005003690719604492, "step": 859} +{"info/global_step": 860, "train_info/time_within_train_step": 12.958912134170532, "step": 860} +{"train_info/time_between_train_steps": 0.004863262176513672, "step": 860} +{"info/global_step": 861, "train_info/time_within_train_step": 12.953283548355103, "step": 861} +{"train_info/time_between_train_steps": 0.005440950393676758, "step": 861} +{"info/global_step": 862, "train_info/time_within_train_step": 12.998624563217163, "step": 862} +{"train_info/time_between_train_steps": 0.0049517154693603516, "step": 862} +{"info/global_step": 863, "train_info/time_within_train_step": 12.995371341705322, "step": 863} +{"train_info/time_between_train_steps": 0.004851341247558594, "step": 863} +{"info/global_step": 864, "train_info/time_within_train_step": 13.07816457748413, "step": 864} +{"train_info/time_between_train_steps": 0.0056455135345458984, "step": 864} +{"info/global_step": 865, "train_info/time_within_train_step": 12.978121280670166, "step": 865} +{"train_info/time_between_train_steps": 0.004862308502197266, "step": 865} +{"info/global_step": 866, "train_info/time_within_train_step": 15.333416938781738, "step": 866} +{"train_info/time_between_train_steps": 0.005713701248168945, "step": 866} +{"info/global_step": 867, "train_info/time_within_train_step": 22.018741369247437, "step": 867} +{"train_info/time_between_train_steps": 0.005173444747924805, "step": 867} +{"info/global_step": 868, "train_info/time_within_train_step": 12.96665620803833, "step": 868} +{"train_info/time_between_train_steps": 0.005368709564208984, "step": 868} +{"info/global_step": 869, "train_info/time_within_train_step": 12.981268644332886, "step": 869} +{"train_info/time_between_train_steps": 0.00576019287109375, "step": 869} +{"info/global_step": 870, "train_info/time_within_train_step": 20.194130897521973, "step": 870} +{"train_info/time_between_train_steps": 0.005994081497192383, "step": 870} +{"train_info/time_between_train_steps": 29.290184020996094, "step": 870} +{"info/global_step": 871, "train_info/time_within_train_step": 16.5028338432312, "step": 871} +{"train_info/time_between_train_steps": 0.004796028137207031, "step": 871} +{"info/global_step": 872, "train_info/time_within_train_step": 16.53736686706543, "step": 872} +{"train_info/time_between_train_steps": 0.004581451416015625, "step": 872} +{"info/global_step": 873, "train_info/time_within_train_step": 12.937049627304077, "step": 873} +{"train_info/time_between_train_steps": 0.005746126174926758, "step": 873} +{"info/global_step": 874, "train_info/time_within_train_step": 13.036332130432129, "step": 874} +{"train_info/time_between_train_steps": 0.006243705749511719, "step": 874} +{"info/global_step": 875, "train_info/time_within_train_step": 20.252922773361206, "step": 875} +{"train_info/time_between_train_steps": 0.00489497184753418, "step": 875} +{"info/global_step": 876, "train_info/time_within_train_step": 17.55943202972412, "step": 876} +{"train_info/time_between_train_steps": 0.004997730255126953, "step": 876} +{"info/global_step": 877, "train_info/time_within_train_step": 12.944718360900879, "step": 877} +{"train_info/time_between_train_steps": 0.005136966705322266, "step": 877} +{"info/global_step": 878, "train_info/time_within_train_step": 13.847899675369263, "step": 878} +{"train_info/time_between_train_steps": 0.004711151123046875, "step": 878} +{"info/global_step": 879, "train_info/time_within_train_step": 18.379008769989014, "step": 879} +{"train_info/time_between_train_steps": 0.0046999454498291016, "step": 879} +{"info/global_step": 880, "train_info/time_within_train_step": 12.998568534851074, "step": 880} +{"train_info/time_between_train_steps": 0.005343437194824219, "step": 880} +{"info/global_step": 881, "train_info/time_within_train_step": 12.984831809997559, "step": 881} +{"train_info/time_between_train_steps": 0.0045435428619384766, "step": 881} +{"info/global_step": 882, "train_info/time_within_train_step": 17.436264753341675, "step": 882} +{"train_info/time_between_train_steps": 0.004925966262817383, "step": 882} +{"info/global_step": 883, "train_info/time_within_train_step": 14.252572059631348, "step": 883} +{"train_info/time_between_train_steps": 0.004922389984130859, "step": 883} +{"info/global_step": 884, "train_info/time_within_train_step": 12.940516233444214, "step": 884} +{"train_info/time_between_train_steps": 0.0050182342529296875, "step": 884} +{"info/global_step": 885, "train_info/time_within_train_step": 13.224451541900635, "step": 885} +{"train_info/time_between_train_steps": 0.004608154296875, "step": 885} +{"info/global_step": 886, "train_info/time_within_train_step": 14.941817283630371, "step": 886} +{"train_info/time_between_train_steps": 0.004807233810424805, "step": 886} +{"info/global_step": 887, "train_info/time_within_train_step": 13.405850648880005, "step": 887} +{"train_info/time_between_train_steps": 0.004706144332885742, "step": 887} +{"info/global_step": 888, "train_info/time_within_train_step": 12.969630241394043, "step": 888} +{"train_info/time_between_train_steps": 0.004746675491333008, "step": 888} +{"info/global_step": 889, "train_info/time_within_train_step": 13.122013330459595, "step": 889} +{"train_info/time_between_train_steps": 0.00494384765625, "step": 889} +{"info/global_step": 890, "train_info/time_within_train_step": 13.198813915252686, "step": 890} +{"train_info/time_between_train_steps": 0.004918813705444336, "step": 890} +{"info/global_step": 891, "train_info/time_within_train_step": 12.965839624404907, "step": 891} +{"train_info/time_between_train_steps": 0.004578828811645508, "step": 891} +{"info/global_step": 892, "train_info/time_within_train_step": 12.978394985198975, "step": 892} +{"train_info/time_between_train_steps": 0.004869222640991211, "step": 892} +{"info/global_step": 893, "train_info/time_within_train_step": 12.976624250411987, "step": 893} +{"train_info/time_between_train_steps": 0.004752635955810547, "step": 893} +{"info/global_step": 894, "train_info/time_within_train_step": 12.979640245437622, "step": 894} +{"train_info/time_between_train_steps": 0.004961490631103516, "step": 894} +{"info/global_step": 895, "train_info/time_within_train_step": 13.066974878311157, "step": 895} +{"train_info/time_between_train_steps": 0.005345344543457031, "step": 895} +{"info/global_step": 896, "train_info/time_within_train_step": 12.9967200756073, "step": 896} +{"train_info/time_between_train_steps": 0.005043745040893555, "step": 896} +{"info/global_step": 897, "train_info/time_within_train_step": 12.98847484588623, "step": 897} +{"train_info/time_between_train_steps": 0.005612850189208984, "step": 897} +{"info/global_step": 898, "train_info/time_within_train_step": 12.990020513534546, "step": 898} +{"train_info/time_between_train_steps": 0.0055658817291259766, "step": 898} +{"info/global_step": 899, "train_info/time_within_train_step": 13.023046493530273, "step": 899} +{"train_info/time_between_train_steps": 0.005481719970703125, "step": 899} +{"info/global_step": 900, "train_info/time_within_train_step": 13.0292489528656, "step": 900} +{"train_info": {"train_info/memory_allocated": 1920.990234375, "train_info/memory_max_allocated": 20713.404296875, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1737380088, "_runtime": 14083}, "step": 900} +{"logs": {"train/loss": 3.5357, "train/learning_rate": 0.00016666666666666666, "train/epoch": 29.02, "_timestamp": 1737380088, "_runtime": 14083}, "step": 900} +{"train_info/time_between_train_steps": 97.5184178352356, "step": 900} +{"train_info/time_between_train_steps": 108.79406595230103, "step": 900} +{"info/global_step": 901, "train_info/time_within_train_step": 15.209330558776855, "step": 901} +{"train_info/time_between_train_steps": 0.005406856536865234, "step": 901} +{"info/global_step": 902, "train_info/time_within_train_step": 13.019289016723633, "step": 902} +{"train_info/time_between_train_steps": 0.005003929138183594, "step": 902} +{"info/global_step": 903, "train_info/time_within_train_step": 12.93882942199707, "step": 903} +{"train_info/time_between_train_steps": 0.005110740661621094, "step": 903} +{"info/global_step": 904, "train_info/time_within_train_step": 13.05979609489441, "step": 904} +{"train_info/time_between_train_steps": 0.005387783050537109, "step": 904} +{"info/global_step": 905, "train_info/time_within_train_step": 12.959559917449951, "step": 905} +{"train_info/time_between_train_steps": 0.004556894302368164, "step": 905} +{"info/global_step": 906, "train_info/time_within_train_step": 13.04895281791687, "step": 906} +{"train_info/time_between_train_steps": 0.00475001335144043, "step": 906} +{"info/global_step": 907, "train_info/time_within_train_step": 12.967908382415771, "step": 907} +{"train_info/time_between_train_steps": 0.0048618316650390625, "step": 907} +{"info/global_step": 908, "train_info/time_within_train_step": 13.06679105758667, "step": 908} +{"train_info/time_between_train_steps": 0.004555225372314453, "step": 908} +{"info/global_step": 909, "train_info/time_within_train_step": 12.992424249649048, "step": 909} +{"train_info/time_between_train_steps": 0.0060727596282958984, "step": 909} +{"info/global_step": 910, "train_info/time_within_train_step": 13.124534368515015, "step": 910} +{"train_info/time_between_train_steps": 0.005280256271362305, "step": 910} +{"info/global_step": 911, "train_info/time_within_train_step": 12.996397495269775, "step": 911} +{"train_info/time_between_train_steps": 0.004651546478271484, "step": 911} +{"info/global_step": 912, "train_info/time_within_train_step": 12.981605291366577, "step": 912} +{"train_info/time_between_train_steps": 0.004569292068481445, "step": 912} +{"info/global_step": 913, "train_info/time_within_train_step": 12.9762544631958, "step": 913} +{"train_info/time_between_train_steps": 0.005220174789428711, "step": 913} +{"info/global_step": 914, "train_info/time_within_train_step": 12.999034643173218, "step": 914} +{"train_info/time_between_train_steps": 0.0047016143798828125, "step": 914} +{"info/global_step": 915, "train_info/time_within_train_step": 12.978872060775757, "step": 915} +{"train_info/time_between_train_steps": 0.004557132720947266, "step": 915} +{"info/global_step": 916, "train_info/time_within_train_step": 12.977731466293335, "step": 916} +{"train_info/time_between_train_steps": 0.005190372467041016, "step": 916} +{"info/global_step": 917, "train_info/time_within_train_step": 12.99418330192566, "step": 917} +{"train_info/time_between_train_steps": 0.0049664974212646484, "step": 917} +{"info/global_step": 918, "train_info/time_within_train_step": 12.989026546478271, "step": 918} +{"train_info/time_between_train_steps": 0.004972696304321289, "step": 918} +{"info/global_step": 919, "train_info/time_within_train_step": 12.979138374328613, "step": 919} +{"train_info/time_between_train_steps": 0.005524158477783203, "step": 919} +{"info/global_step": 920, "train_info/time_within_train_step": 12.98080039024353, "step": 920} +{"train_info/time_between_train_steps": 0.0046231746673583984, "step": 920} +{"info/global_step": 921, "train_info/time_within_train_step": 12.97853970527649, "step": 921} +{"train_info/time_between_train_steps": 0.005130767822265625, "step": 921} +{"info/global_step": 922, "train_info/time_within_train_step": 12.98583984375, "step": 922} +{"train_info/time_between_train_steps": 0.005378007888793945, "step": 922} +{"info/global_step": 923, "train_info/time_within_train_step": 13.00447964668274, "step": 923} +{"train_info/time_between_train_steps": 0.005758523941040039, "step": 923} +{"info/global_step": 924, "train_info/time_within_train_step": 12.99502944946289, "step": 924} +{"train_info/time_between_train_steps": 0.0054340362548828125, "step": 924} +{"info/global_step": 925, "train_info/time_within_train_step": 13.0593581199646, "step": 925} +{"train_info/time_between_train_steps": 0.005410909652709961, "step": 925} +{"info/global_step": 926, "train_info/time_within_train_step": 12.984760522842407, "step": 926} +{"train_info/time_between_train_steps": 0.005249977111816406, "step": 926} +{"info/global_step": 927, "train_info/time_within_train_step": 12.986696481704712, "step": 927} +{"train_info/time_between_train_steps": 0.005579233169555664, "step": 927} +{"info/global_step": 928, "train_info/time_within_train_step": 13.098809003829956, "step": 928} +{"train_info/time_between_train_steps": 0.0049550533294677734, "step": 928} +{"info/global_step": 929, "train_info/time_within_train_step": 12.994952917098999, "step": 929} +{"train_info/time_between_train_steps": 0.0054547786712646484, "step": 929} +{"info/global_step": 930, "train_info/time_within_train_step": 13.00856065750122, "step": 930} +{"train_info/time_between_train_steps": 0.005033254623413086, "step": 930} +{"train_info/time_between_train_steps": 7.760239124298096, "step": 930} +{"info/global_step": 931, "train_info/time_within_train_step": 15.871581315994263, "step": 931} +{"train_info/time_between_train_steps": 0.005221366882324219, "step": 931} +{"info/global_step": 932, "train_info/time_within_train_step": 14.886690616607666, "step": 932} +{"train_info/time_between_train_steps": 0.005044221878051758, "step": 932} +{"info/global_step": 933, "train_info/time_within_train_step": 12.984921216964722, "step": 933} +{"train_info/time_between_train_steps": 0.0050199031829833984, "step": 933} +{"info/global_step": 934, "train_info/time_within_train_step": 13.412532806396484, "step": 934} +{"train_info/time_between_train_steps": 0.005299568176269531, "step": 934} +{"info/global_step": 935, "train_info/time_within_train_step": 18.430609226226807, "step": 935} +{"train_info/time_between_train_steps": 0.004746437072753906, "step": 935} +{"info/global_step": 936, "train_info/time_within_train_step": 13.094284534454346, "step": 936} +{"train_info/time_between_train_steps": 0.0049474239349365234, "step": 936} +{"info/global_step": 937, "train_info/time_within_train_step": 12.983896493911743, "step": 937} +{"train_info/time_between_train_steps": 0.004945278167724609, "step": 937} +{"info/global_step": 938, "train_info/time_within_train_step": 13.091728925704956, "step": 938} +{"train_info/time_between_train_steps": 0.0049283504486083984, "step": 938} +{"info/global_step": 939, "train_info/time_within_train_step": 12.993985891342163, "step": 939} +{"train_info/time_between_train_steps": 0.005033731460571289, "step": 939} +{"info/global_step": 940, "train_info/time_within_train_step": 13.077574253082275, "step": 940} +{"train_info/time_between_train_steps": 0.005440950393676758, "step": 940} +{"info/global_step": 941, "train_info/time_within_train_step": 13.087146997451782, "step": 941} +{"train_info/time_between_train_steps": 0.005228996276855469, "step": 941} +{"info/global_step": 942, "train_info/time_within_train_step": 12.989482641220093, "step": 942} +{"train_info/time_between_train_steps": 0.005139350891113281, "step": 942} +{"info/global_step": 943, "train_info/time_within_train_step": 12.998308658599854, "step": 943} +{"train_info/time_between_train_steps": 0.004790067672729492, "step": 943} +{"info/global_step": 944, "train_info/time_within_train_step": 13.012748718261719, "step": 944} +{"train_info/time_between_train_steps": 0.005132913589477539, "step": 944} +{"info/global_step": 945, "train_info/time_within_train_step": 13.035101652145386, "step": 945} +{"train_info/time_between_train_steps": 0.005374431610107422, "step": 945} +{"info/global_step": 946, "train_info/time_within_train_step": 13.003156900405884, "step": 946} +{"train_info/time_between_train_steps": 0.005410909652709961, "step": 946} +{"info/global_step": 947, "train_info/time_within_train_step": 13.001580238342285, "step": 947} +{"train_info/time_between_train_steps": 0.005656719207763672, "step": 947} +{"info/global_step": 948, "train_info/time_within_train_step": 12.991198778152466, "step": 948} +{"train_info/time_between_train_steps": 0.005516767501831055, "step": 948} +{"info/global_step": 949, "train_info/time_within_train_step": 13.004507780075073, "step": 949} +{"train_info/time_between_train_steps": 0.0054285526275634766, "step": 949} +{"info/global_step": 950, "train_info/time_within_train_step": 13.004866600036621, "step": 950} +{"train_info": {"train_info/memory_allocated": 1920.990234375, "train_info/memory_max_allocated": 20713.404296875, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1737380880, "_runtime": 14875}, "step": 950} +{"logs": {"train/loss": 3.5105, "train/learning_rate": 0.0001388888888888889, "train/epoch": 31.02, "_timestamp": 1737380880, "_runtime": 14875}, "step": 950} +{"train_info/time_between_train_steps": 0.07033419609069824, "step": 950} +{"info/global_step": 951, "train_info/time_within_train_step": 13.019597291946411, "step": 951} +{"train_info/time_between_train_steps": 0.004663944244384766, "step": 951} +{"info/global_step": 952, "train_info/time_within_train_step": 13.022769212722778, "step": 952} +{"train_info/time_between_train_steps": 0.004503726959228516, "step": 952} +{"info/global_step": 953, "train_info/time_within_train_step": 13.019172668457031, "step": 953} +{"train_info/time_between_train_steps": 0.005099773406982422, "step": 953} +{"info/global_step": 954, "train_info/time_within_train_step": 13.024963855743408, "step": 954} +{"train_info/time_between_train_steps": 0.005053281784057617, "step": 954} +{"info/global_step": 955, "train_info/time_within_train_step": 13.024720191955566, "step": 955} +{"train_info/time_between_train_steps": 0.0049169063568115234, "step": 955} +{"info/global_step": 956, "train_info/time_within_train_step": 13.110880374908447, "step": 956} +{"train_info/time_between_train_steps": 0.005030393600463867, "step": 956} +{"info/global_step": 957, "train_info/time_within_train_step": 15.448240280151367, "step": 957} +{"train_info/time_between_train_steps": 0.004950046539306641, "step": 957} +{"info/global_step": 958, "train_info/time_within_train_step": 17.64100980758667, "step": 958} +{"train_info/time_between_train_steps": 0.005597591400146484, "step": 958} +{"info/global_step": 959, "train_info/time_within_train_step": 13.013387441635132, "step": 959} +{"train_info/time_between_train_steps": 0.005404233932495117, "step": 959} +{"info/global_step": 960, "train_info/time_within_train_step": 13.03057312965393, "step": 960} +{"train_info/time_between_train_steps": 0.0064296722412109375, "step": 960} +{"train_info/time_between_train_steps": 7.8790364265441895, "step": 960} +{"info/global_step": 961, "train_info/time_within_train_step": 13.00424313545227, "step": 961} +{"train_info/time_between_train_steps": 0.005298614501953125, "step": 961} +{"info/global_step": 962, "train_info/time_within_train_step": 13.139286756515503, "step": 962} +{"train_info/time_between_train_steps": 0.005266666412353516, "step": 962} +{"info/global_step": 963, "train_info/time_within_train_step": 13.010072231292725, "step": 963} +{"train_info/time_between_train_steps": 0.005111217498779297, "step": 963} +{"info/global_step": 964, "train_info/time_within_train_step": 13.134515047073364, "step": 964} +{"train_info/time_between_train_steps": 0.005750417709350586, "step": 964} +{"info/global_step": 965, "train_info/time_within_train_step": 13.012860774993896, "step": 965} +{"train_info/time_between_train_steps": 0.0055620670318603516, "step": 965} +{"info/global_step": 966, "train_info/time_within_train_step": 13.489717245101929, "step": 966} +{"train_info/time_between_train_steps": 0.005533933639526367, "step": 966} +{"info/global_step": 967, "train_info/time_within_train_step": 13.029857873916626, "step": 967} +{"train_info/time_between_train_steps": 0.004834651947021484, "step": 967} +{"info/global_step": 968, "train_info/time_within_train_step": 13.117643594741821, "step": 968} +{"train_info/time_between_train_steps": 0.005114078521728516, "step": 968} +{"info/global_step": 969, "train_info/time_within_train_step": 13.889352083206177, "step": 969} +{"train_info/time_between_train_steps": 0.005484580993652344, "step": 969} +{"info/global_step": 970, "train_info/time_within_train_step": 16.80661630630493, "step": 970} +{"train_info/time_between_train_steps": 0.004736423492431641, "step": 970} +{"info/global_step": 971, "train_info/time_within_train_step": 13.097126722335815, "step": 971} +{"train_info/time_between_train_steps": 0.0051403045654296875, "step": 971} +{"info/global_step": 972, "train_info/time_within_train_step": 12.989434957504272, "step": 972} +{"train_info/time_between_train_steps": 0.0047910213470458984, "step": 972} +{"info/global_step": 973, "train_info/time_within_train_step": 13.008618116378784, "step": 973} +{"train_info/time_between_train_steps": 0.004900693893432617, "step": 973} +{"info/global_step": 974, "train_info/time_within_train_step": 13.021658897399902, "step": 974} +{"train_info/time_between_train_steps": 0.004973649978637695, "step": 974} +{"info/global_step": 975, "train_info/time_within_train_step": 13.02013874053955, "step": 975} +{"train_info/time_between_train_steps": 0.00568389892578125, "step": 975} +{"info/global_step": 976, "train_info/time_within_train_step": 13.013357639312744, "step": 976} +{"train_info/time_between_train_steps": 0.005705118179321289, "step": 976} +{"info/global_step": 977, "train_info/time_within_train_step": 13.010288715362549, "step": 977} +{"train_info/time_between_train_steps": 0.004911899566650391, "step": 977} +{"info/global_step": 978, "train_info/time_within_train_step": 13.01778793334961, "step": 978} +{"train_info/time_between_train_steps": 0.004681825637817383, "step": 978} +{"info/global_step": 979, "train_info/time_within_train_step": 13.050071716308594, "step": 979} +{"train_info/time_between_train_steps": 0.0045604705810546875, "step": 979} +{"info/global_step": 980, "train_info/time_within_train_step": 13.012672424316406, "step": 980} +{"train_info/time_between_train_steps": 0.0048351287841796875, "step": 980} +{"info/global_step": 981, "train_info/time_within_train_step": 13.011839151382446, "step": 981} +{"train_info/time_between_train_steps": 0.004598379135131836, "step": 981} +{"info/global_step": 982, "train_info/time_within_train_step": 13.017557382583618, "step": 982} +{"train_info/time_between_train_steps": 0.005310535430908203, "step": 982} +{"info/global_step": 983, "train_info/time_within_train_step": 15.07542109489441, "step": 983} +{"train_info/time_between_train_steps": 0.005362033843994141, "step": 983} +{"info/global_step": 984, "train_info/time_within_train_step": 14.620221138000488, "step": 984} +{"train_info/time_between_train_steps": 0.0046749114990234375, "step": 984} +{"info/global_step": 985, "train_info/time_within_train_step": 12.99897313117981, "step": 985} +{"train_info/time_between_train_steps": 0.004975318908691406, "step": 985} +{"info/global_step": 986, "train_info/time_within_train_step": 13.00103211402893, "step": 986} +{"train_info/time_between_train_steps": 0.004671335220336914, "step": 986} +{"info/global_step": 987, "train_info/time_within_train_step": 15.702970027923584, "step": 987} +{"train_info/time_between_train_steps": 0.005192279815673828, "step": 987} +{"info/global_step": 988, "train_info/time_within_train_step": 13.951901912689209, "step": 988} +{"train_info/time_between_train_steps": 0.0049517154693603516, "step": 988} +{"info/global_step": 989, "train_info/time_within_train_step": 13.005828857421875, "step": 989} +{"train_info/time_between_train_steps": 0.005350351333618164, "step": 989} +{"info/global_step": 990, "train_info/time_within_train_step": 13.030831098556519, "step": 990} +{"train_info/time_between_train_steps": 0.0053539276123046875, "step": 990} +{"train_info/time_between_train_steps": 10.482110977172852, "step": 990} +{"info/global_step": 991, "train_info/time_within_train_step": 24.62568163871765, "step": 991} +{"train_info/time_between_train_steps": 0.005822896957397461, "step": 991} +{"info/global_step": 992, "train_info/time_within_train_step": 13.091625213623047, "step": 992} +{"train_info/time_between_train_steps": 0.0051860809326171875, "step": 992} +{"info/global_step": 993, "train_info/time_within_train_step": 12.99634075164795, "step": 993} +{"train_info/time_between_train_steps": 0.0052337646484375, "step": 993} +{"info/global_step": 994, "train_info/time_within_train_step": 13.093665838241577, "step": 994} +{"train_info/time_between_train_steps": 0.005414724349975586, "step": 994} +{"info/global_step": 995, "train_info/time_within_train_step": 13.007099390029907, "step": 995} +{"train_info/time_between_train_steps": 0.004776954650878906, "step": 995} +{"info/global_step": 996, "train_info/time_within_train_step": 13.09564208984375, "step": 996} +{"train_info/time_between_train_steps": 0.00486302375793457, "step": 996} +{"info/global_step": 997, "train_info/time_within_train_step": 13.00138545036316, "step": 997} +{"train_info/time_between_train_steps": 0.0050699710845947266, "step": 997} +{"info/global_step": 998, "train_info/time_within_train_step": 13.111112594604492, "step": 998} +{"train_info/time_between_train_steps": 0.0050144195556640625, "step": 998} +{"info/global_step": 999, "train_info/time_within_train_step": 13.032111644744873, "step": 999} +{"train_info/time_between_train_steps": 0.005561351776123047, "step": 999} +{"info/global_step": 1000, "train_info/time_within_train_step": 13.083123445510864, "step": 1000} +{"train_info": {"train_info/memory_allocated": 1920.990234375, "train_info/memory_max_allocated": 20713.404296875, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1737381600, "_runtime": 15595}, "step": 1000} +{"logs": {"train/loss": 3.4606, "train/learning_rate": 0.00011111111111111109, "train/epoch": 33.01, "_timestamp": 1737381600, "_runtime": 15595}, "step": 1000} +{"train_info": {"train_info/memory_allocated": 1920.990234375, "train_info/memory_max_allocated": 20713.404296875, "train_info/memory_reserved": 27338.0, "train_info/memory_max_reserved": 27338.0, "_timestamp": 1737381601, "_runtime": 15596}, "step": 1000} +{"logs": {"eval/loss": 4.271039009094238, "eval/runtime": 1.2465, "eval/samples_per_second": 89.851, "eval/steps_per_second": 5.616, "train/epoch": 33.01, "_timestamp": 1737381601, "_runtime": 15596}, "step": 1000} +{"train_info": {"train_info/memory_allocated": 1920.990234375, "train_info/memory_max_allocated": 20713.404296875, "train_info/memory_reserved": 27338.0, "train_info/memory_max_reserved": 27338.0, "_timestamp": 1737381601, "_runtime": 15596}, "step": 1000} +{"logs": {"eval//scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_loss": 4.271039009094238, "eval//scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_ppl": 71.59598586766586, "eval//scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_runtime": 1.2465, "eval//scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 89.851, "train/epoch": 33.01, "_timestamp": 1737381601, "_runtime": 15596}, "step": 1000} +{"train_info/time_between_train_steps": 55.89421486854553, "step": 1000} +{"info/global_step": 1001, "train_info/time_within_train_step": 12.96484375, "step": 1001} +{"train_info/time_between_train_steps": 0.004584789276123047, "step": 1001} +{"info/global_step": 1002, "train_info/time_within_train_step": 13.022959470748901, "step": 1002} +{"train_info/time_between_train_steps": 0.0045299530029296875, "step": 1002} +{"info/global_step": 1003, "train_info/time_within_train_step": 13.543765783309937, "step": 1003} +{"train_info/time_between_train_steps": 0.0049855709075927734, "step": 1003} +{"info/global_step": 1004, "train_info/time_within_train_step": 17.560782432556152, "step": 1004} +{"train_info/time_between_train_steps": 0.005259037017822266, "step": 1004} +{"info/global_step": 1005, "train_info/time_within_train_step": 13.167454719543457, "step": 1005} +{"train_info/time_between_train_steps": 0.0059051513671875, "step": 1005} +{"info/global_step": 1006, "train_info/time_within_train_step": 12.977365016937256, "step": 1006} +{"train_info/time_between_train_steps": 0.005530834197998047, "step": 1006} +{"info/global_step": 1007, "train_info/time_within_train_step": 12.984801292419434, "step": 1007} +{"train_info/time_between_train_steps": 0.004853248596191406, "step": 1007} +{"info/global_step": 1008, "train_info/time_within_train_step": 12.996977806091309, "step": 1008} +{"train_info/time_between_train_steps": 0.005154132843017578, "step": 1008} +{"info/global_step": 1009, "train_info/time_within_train_step": 12.99088978767395, "step": 1009} +{"train_info/time_between_train_steps": 0.004900455474853516, "step": 1009} +{"info/global_step": 1010, "train_info/time_within_train_step": 12.992008447647095, "step": 1010} +{"train_info/time_between_train_steps": 0.004723072052001953, "step": 1010} +{"info/global_step": 1011, "train_info/time_within_train_step": 12.966505289077759, "step": 1011} +{"train_info/time_between_train_steps": 0.0050754547119140625, "step": 1011} +{"info/global_step": 1012, "train_info/time_within_train_step": 12.965208530426025, "step": 1012} +{"train_info/time_between_train_steps": 0.004634380340576172, "step": 1012} +{"info/global_step": 1013, "train_info/time_within_train_step": 12.990440368652344, "step": 1013} +{"train_info/time_between_train_steps": 0.005256175994873047, "step": 1013} +{"info/global_step": 1014, "train_info/time_within_train_step": 12.983600854873657, "step": 1014} +{"train_info/time_between_train_steps": 0.0053272247314453125, "step": 1014} +{"info/global_step": 1015, "train_info/time_within_train_step": 12.977890014648438, "step": 1015} +{"train_info/time_between_train_steps": 0.004677295684814453, "step": 1015} +{"info/global_step": 1016, "train_info/time_within_train_step": 12.98127794265747, "step": 1016} +{"train_info/time_between_train_steps": 0.004837989807128906, "step": 1016} +{"info/global_step": 1017, "train_info/time_within_train_step": 12.994198322296143, "step": 1017} +{"train_info/time_between_train_steps": 0.004790544509887695, "step": 1017} +{"info/global_step": 1018, "train_info/time_within_train_step": 13.071579217910767, "step": 1018} +{"train_info/time_between_train_steps": 0.0051822662353515625, "step": 1018} +{"info/global_step": 1019, "train_info/time_within_train_step": 13.001445531845093, "step": 1019} +{"train_info/time_between_train_steps": 0.005575895309448242, "step": 1019} +{"info/global_step": 1020, "train_info/time_within_train_step": 13.016724586486816, "step": 1020} +{"train_info/time_between_train_steps": 0.005597114562988281, "step": 1020} +{"train_info/time_between_train_steps": 7.751669645309448, "step": 1020} +{"info/global_step": 1021, "train_info/time_within_train_step": 12.982275247573853, "step": 1021} +{"train_info/time_between_train_steps": 0.005219459533691406, "step": 1021} +{"info/global_step": 1022, "train_info/time_within_train_step": 13.12811803817749, "step": 1022} +{"train_info/time_between_train_steps": 0.00478363037109375, "step": 1022} +{"info/global_step": 1023, "train_info/time_within_train_step": 12.985982179641724, "step": 1023} +{"train_info/time_between_train_steps": 0.005207061767578125, "step": 1023} +{"info/global_step": 1024, "train_info/time_within_train_step": 13.08373498916626, "step": 1024} +{"train_info/time_between_train_steps": 0.0052945613861083984, "step": 1024} +{"info/global_step": 1025, "train_info/time_within_train_step": 12.985385417938232, "step": 1025} +{"train_info/time_between_train_steps": 0.00497889518737793, "step": 1025} +{"info/global_step": 1026, "train_info/time_within_train_step": 13.079392671585083, "step": 1026} +{"train_info/time_between_train_steps": 0.00567936897277832, "step": 1026} +{"info/global_step": 1027, "train_info/time_within_train_step": 13.000275373458862, "step": 1027} +{"train_info/time_between_train_steps": 0.00591278076171875, "step": 1027} +{"info/global_step": 1028, "train_info/time_within_train_step": 13.108968734741211, "step": 1028} +{"train_info/time_between_train_steps": 0.005148887634277344, "step": 1028} +{"info/global_step": 1029, "train_info/time_within_train_step": 13.006683826446533, "step": 1029} +{"train_info/time_between_train_steps": 0.004996776580810547, "step": 1029} +{"info/global_step": 1030, "train_info/time_within_train_step": 13.060031175613403, "step": 1030} +{"train_info/time_between_train_steps": 0.005805492401123047, "step": 1030} +{"info/global_step": 1031, "train_info/time_within_train_step": 13.012592315673828, "step": 1031} +{"train_info/time_between_train_steps": 0.004908323287963867, "step": 1031} +{"info/global_step": 1032, "train_info/time_within_train_step": 12.974350214004517, "step": 1032} +{"train_info/time_between_train_steps": 0.004758596420288086, "step": 1032} +{"info/global_step": 1033, "train_info/time_within_train_step": 13.042951107025146, "step": 1033} +{"train_info/time_between_train_steps": 0.0050432682037353516, "step": 1033} +{"info/global_step": 1034, "train_info/time_within_train_step": 21.085678339004517, "step": 1034} +{"train_info/time_between_train_steps": 0.0048334598541259766, "step": 1034} +{"info/global_step": 1035, "train_info/time_within_train_step": 16.308227062225342, "step": 1035} +{"train_info/time_between_train_steps": 0.0046198368072509766, "step": 1035} +{"info/global_step": 1036, "train_info/time_within_train_step": 12.947733402252197, "step": 1036} +{"train_info/time_between_train_steps": 0.005056858062744141, "step": 1036} +{"info/global_step": 1037, "train_info/time_within_train_step": 18.66360878944397, "step": 1037} +{"train_info/time_between_train_steps": 0.004712343215942383, "step": 1037} +{"info/global_step": 1038, "train_info/time_within_train_step": 15.538410425186157, "step": 1038} +{"train_info/time_between_train_steps": 0.004992008209228516, "step": 1038} +{"info/global_step": 1039, "train_info/time_within_train_step": 12.939581632614136, "step": 1039} +{"train_info/time_between_train_steps": 0.005254030227661133, "step": 1039} +{"info/global_step": 1040, "train_info/time_within_train_step": 19.958616256713867, "step": 1040} +{"train_info/time_between_train_steps": 0.004706621170043945, "step": 1040} +{"info/global_step": 1041, "train_info/time_within_train_step": 14.92493224143982, "step": 1041} +{"train_info/time_between_train_steps": 0.0056056976318359375, "step": 1041} +{"info/global_step": 1042, "train_info/time_within_train_step": 12.953864812850952, "step": 1042} +{"train_info/time_between_train_steps": 0.004671812057495117, "step": 1042} +{"info/global_step": 1043, "train_info/time_within_train_step": 12.950650691986084, "step": 1043} +{"train_info/time_between_train_steps": 0.0050389766693115234, "step": 1043} +{"info/global_step": 1044, "train_info/time_within_train_step": 12.973092079162598, "step": 1044} +{"train_info/time_between_train_steps": 0.004967689514160156, "step": 1044} +{"info/global_step": 1045, "train_info/time_within_train_step": 12.971765041351318, "step": 1045} +{"train_info/time_between_train_steps": 0.005218029022216797, "step": 1045} +{"info/global_step": 1046, "train_info/time_within_train_step": 12.968783378601074, "step": 1046} +{"train_info/time_between_train_steps": 0.0050466060638427734, "step": 1046} +{"info/global_step": 1047, "train_info/time_within_train_step": 12.971156120300293, "step": 1047} +{"train_info/time_between_train_steps": 0.004831075668334961, "step": 1047} +{"info/global_step": 1048, "train_info/time_within_train_step": 12.985141515731812, "step": 1048} +{"train_info/time_between_train_steps": 0.005770206451416016, "step": 1048} +{"info/global_step": 1049, "train_info/time_within_train_step": 13.120660066604614, "step": 1049} +{"train_info/time_between_train_steps": 0.006305694580078125, "step": 1049} +{"info/global_step": 1050, "train_info/time_within_train_step": 13.060141324996948, "step": 1050} +{"train_info": {"train_info/memory_allocated": 1920.990234375, "train_info/memory_max_allocated": 20713.404296875, "train_info/memory_reserved": 27338.0, "train_info/memory_max_reserved": 27338.0, "_timestamp": 1737382381, "_runtime": 16376}, "step": 1050} +{"logs": {"train/loss": 3.3886, "train/learning_rate": 8.333333333333333e-05, "train/epoch": 34.02, "_timestamp": 1737382381, "_runtime": 16376}, "step": 1050} +{"train_info/time_between_train_steps": 0.18546581268310547, "step": 1050} +{"train_info/time_between_train_steps": 8.128305673599243, "step": 1050} +{"info/global_step": 1051, "train_info/time_within_train_step": 12.95780873298645, "step": 1051} +{"train_info/time_between_train_steps": 0.0048220157623291016, "step": 1051} +{"info/global_step": 1052, "train_info/time_within_train_step": 13.049619197845459, "step": 1052} +{"train_info/time_between_train_steps": 0.005459785461425781, "step": 1052} +{"info/global_step": 1053, "train_info/time_within_train_step": 12.97144103050232, "step": 1053} +{"train_info/time_between_train_steps": 0.004984855651855469, "step": 1053} +{"info/global_step": 1054, "train_info/time_within_train_step": 13.06489610671997, "step": 1054} +{"train_info/time_between_train_steps": 0.005433082580566406, "step": 1054} +{"info/global_step": 1055, "train_info/time_within_train_step": 12.981934547424316, "step": 1055} +{"train_info/time_between_train_steps": 0.005464315414428711, "step": 1055} +{"info/global_step": 1056, "train_info/time_within_train_step": 13.101092100143433, "step": 1056} +{"train_info/time_between_train_steps": 0.00539708137512207, "step": 1056} +{"info/global_step": 1057, "train_info/time_within_train_step": 12.996011018753052, "step": 1057} +{"train_info/time_between_train_steps": 0.005300998687744141, "step": 1057} +{"info/global_step": 1058, "train_info/time_within_train_step": 16.004194498062134, "step": 1058} +{"train_info/time_between_train_steps": 0.005306720733642578, "step": 1058} +{"info/global_step": 1059, "train_info/time_within_train_step": 14.578592538833618, "step": 1059} +{"train_info/time_between_train_steps": 0.005830287933349609, "step": 1059} +{"info/global_step": 1060, "train_info/time_within_train_step": 13.690372705459595, "step": 1060} +{"train_info/time_between_train_steps": 0.004980564117431641, "step": 1060} +{"info/global_step": 1061, "train_info/time_within_train_step": 16.725980043411255, "step": 1061} +{"train_info/time_between_train_steps": 0.004992246627807617, "step": 1061} +{"info/global_step": 1062, "train_info/time_within_train_step": 14.336603879928589, "step": 1062} +{"train_info/time_between_train_steps": 0.004722118377685547, "step": 1062} +{"info/global_step": 1063, "train_info/time_within_train_step": 14.369404792785645, "step": 1063} +{"train_info/time_between_train_steps": 0.00497746467590332, "step": 1063} +{"info/global_step": 1064, "train_info/time_within_train_step": 25.803964376449585, "step": 1064} +{"train_info/time_between_train_steps": 0.005387783050537109, "step": 1064} +{"info/global_step": 1065, "train_info/time_within_train_step": 12.914358854293823, "step": 1065} +{"train_info/time_between_train_steps": 0.004831790924072266, "step": 1065} +{"info/global_step": 1066, "train_info/time_within_train_step": 21.491650581359863, "step": 1066} +{"train_info/time_between_train_steps": 0.005334377288818359, "step": 1066} +{"info/global_step": 1067, "train_info/time_within_train_step": 15.60078501701355, "step": 1067} +{"train_info/time_between_train_steps": 0.004937648773193359, "step": 1067} +{"info/global_step": 1068, "train_info/time_within_train_step": 12.9308762550354, "step": 1068} +{"train_info/time_between_train_steps": 0.005174875259399414, "step": 1068} +{"info/global_step": 1069, "train_info/time_within_train_step": 15.833050966262817, "step": 1069} +{"train_info/time_between_train_steps": 0.0048389434814453125, "step": 1069} +{"info/global_step": 1070, "train_info/time_within_train_step": 15.55846095085144, "step": 1070} +{"train_info/time_between_train_steps": 0.005086660385131836, "step": 1070} +{"info/global_step": 1071, "train_info/time_within_train_step": 13.642351388931274, "step": 1071} +{"train_info/time_between_train_steps": 0.004740238189697266, "step": 1071} +{"info/global_step": 1072, "train_info/time_within_train_step": 14.059087991714478, "step": 1072} +{"train_info/time_between_train_steps": 0.004862785339355469, "step": 1072} +{"info/global_step": 1073, "train_info/time_within_train_step": 17.44878339767456, "step": 1073} +{"train_info/time_between_train_steps": 0.0047223567962646484, "step": 1073} +{"info/global_step": 1074, "train_info/time_within_train_step": 13.874286890029907, "step": 1074} +{"train_info/time_between_train_steps": 0.005437135696411133, "step": 1074} +{"info/global_step": 1075, "train_info/time_within_train_step": 12.951531648635864, "step": 1075} +{"train_info/time_between_train_steps": 0.005896091461181641, "step": 1075} +{"info/global_step": 1076, "train_info/time_within_train_step": 12.97058391571045, "step": 1076} +{"train_info/time_between_train_steps": 0.0052585601806640625, "step": 1076} +{"info/global_step": 1077, "train_info/time_within_train_step": 12.972520112991333, "step": 1077} +{"train_info/time_between_train_steps": 0.004862308502197266, "step": 1077} +{"info/global_step": 1078, "train_info/time_within_train_step": 12.982154607772827, "step": 1078} +{"train_info/time_between_train_steps": 0.0060007572174072266, "step": 1078} +{"info/global_step": 1079, "train_info/time_within_train_step": 13.068527936935425, "step": 1079} +{"train_info/time_between_train_steps": 0.006001472473144531, "step": 1079} +{"info/global_step": 1080, "train_info/time_within_train_step": 13.001272916793823, "step": 1080} +{"train_info/time_between_train_steps": 0.005095243453979492, "step": 1080} +{"train_info/time_between_train_steps": 8.044975519180298, "step": 1080} +{"info/global_step": 1081, "train_info/time_within_train_step": 12.98448133468628, "step": 1081} +{"train_info/time_between_train_steps": 0.00543975830078125, "step": 1081} +{"info/global_step": 1082, "train_info/time_within_train_step": 13.04606819152832, "step": 1082} +{"train_info/time_between_train_steps": 0.004942417144775391, "step": 1082} +{"info/global_step": 1083, "train_info/time_within_train_step": 12.981835842132568, "step": 1083} +{"train_info/time_between_train_steps": 0.005556821823120117, "step": 1083} +{"info/global_step": 1084, "train_info/time_within_train_step": 13.096299409866333, "step": 1084} +{"train_info/time_between_train_steps": 0.005454063415527344, "step": 1084} +{"info/global_step": 1085, "train_info/time_within_train_step": 12.997427940368652, "step": 1085} +{"train_info/time_between_train_steps": 0.00524592399597168, "step": 1085} +{"info/global_step": 1086, "train_info/time_within_train_step": 13.097858905792236, "step": 1086} +{"train_info/time_between_train_steps": 0.005638837814331055, "step": 1086} +{"info/global_step": 1087, "train_info/time_within_train_step": 12.994800090789795, "step": 1087} +{"train_info/time_between_train_steps": 0.005225658416748047, "step": 1087} +{"info/global_step": 1088, "train_info/time_within_train_step": 13.081128597259521, "step": 1088} +{"train_info/time_between_train_steps": 0.00497126579284668, "step": 1088} +{"info/global_step": 1089, "train_info/time_within_train_step": 13.25584077835083, "step": 1089} +{"train_info/time_between_train_steps": 0.005167484283447266, "step": 1089} +{"info/global_step": 1090, "train_info/time_within_train_step": 19.116396188735962, "step": 1090} +{"train_info/time_between_train_steps": 0.005078792572021484, "step": 1090} +{"info/global_step": 1091, "train_info/time_within_train_step": 15.1242835521698, "step": 1091} +{"train_info/time_between_train_steps": 0.005192756652832031, "step": 1091} +{"info/global_step": 1092, "train_info/time_within_train_step": 12.952292442321777, "step": 1092} +{"train_info/time_between_train_steps": 0.00477290153503418, "step": 1092} +{"info/global_step": 1093, "train_info/time_within_train_step": 20.640093088150024, "step": 1093} +{"train_info/time_between_train_steps": 0.004723548889160156, "step": 1093} +{"info/global_step": 1094, "train_info/time_within_train_step": 13.466550588607788, "step": 1094} +{"train_info/time_between_train_steps": 0.004817962646484375, "step": 1094} +{"info/global_step": 1095, "train_info/time_within_train_step": 13.76700210571289, "step": 1095} +{"train_info/time_between_train_steps": 0.004946231842041016, "step": 1095} +{"info/global_step": 1096, "train_info/time_within_train_step": 15.660110712051392, "step": 1096} +{"train_info/time_between_train_steps": 0.005036830902099609, "step": 1096} +{"info/global_step": 1097, "train_info/time_within_train_step": 18.048476219177246, "step": 1097} +{"train_info/time_between_train_steps": 0.00484013557434082, "step": 1097} +{"info/global_step": 1098, "train_info/time_within_train_step": 15.59214735031128, "step": 1098} +{"train_info/time_between_train_steps": 0.004673957824707031, "step": 1098} +{"info/global_step": 1099, "train_info/time_within_train_step": 14.092544078826904, "step": 1099} +{"train_info/time_between_train_steps": 0.004839420318603516, "step": 1099} +{"info/global_step": 1100, "train_info/time_within_train_step": 18.238890886306763, "step": 1100} +{"train_info": {"train_info/memory_allocated": 1920.990234375, "train_info/memory_max_allocated": 20713.404296875, "train_info/memory_reserved": 27338.0, "train_info/memory_max_reserved": 27338.0, "_timestamp": 1737383218, "_runtime": 17213}, "step": 1100} +{"logs": {"train/loss": 3.3823, "train/learning_rate": 5.5555555555555545e-05, "train/epoch": 36.02, "_timestamp": 1737383218, "_runtime": 17213}, "step": 1100} +{"train_info/time_between_train_steps": 355.32918667793274, "step": 1100} +{"info/global_step": 1101, "train_info/time_within_train_step": 14.82668948173523, "step": 1101} +{"train_info/time_between_train_steps": 0.0050733089447021484, "step": 1101} +{"info/global_step": 1102, "train_info/time_within_train_step": 13.981108665466309, "step": 1102} +{"train_info/time_between_train_steps": 0.0050506591796875, "step": 1102} +{"info/global_step": 1103, "train_info/time_within_train_step": 12.918333053588867, "step": 1103} +{"train_info/time_between_train_steps": 0.00459742546081543, "step": 1103} +{"info/global_step": 1104, "train_info/time_within_train_step": 12.946370124816895, "step": 1104} +{"train_info/time_between_train_steps": 0.005541324615478516, "step": 1104} +{"info/global_step": 1105, "train_info/time_within_train_step": 12.94343614578247, "step": 1105} +{"train_info/time_between_train_steps": 0.0053479671478271484, "step": 1105} +{"info/global_step": 1106, "train_info/time_within_train_step": 12.960786581039429, "step": 1106} +{"train_info/time_between_train_steps": 0.004911184310913086, "step": 1106} +{"info/global_step": 1107, "train_info/time_within_train_step": 12.981526136398315, "step": 1107} +{"train_info/time_between_train_steps": 0.0055887699127197266, "step": 1107} +{"info/global_step": 1108, "train_info/time_within_train_step": 13.003767728805542, "step": 1108} +{"train_info/time_between_train_steps": 0.005801200866699219, "step": 1108} +{"info/global_step": 1109, "train_info/time_within_train_step": 12.994712352752686, "step": 1109} +{"train_info/time_between_train_steps": 0.006066322326660156, "step": 1109} +{"info/global_step": 1110, "train_info/time_within_train_step": 13.094972848892212, "step": 1110} +{"train_info/time_between_train_steps": 0.006064653396606445, "step": 1110} +{"train_info/time_between_train_steps": 7.7253193855285645, "step": 1110} +{"info/global_step": 1111, "train_info/time_within_train_step": 12.961437463760376, "step": 1111} +{"train_info/time_between_train_steps": 0.004663705825805664, "step": 1111} +{"info/global_step": 1112, "train_info/time_within_train_step": 13.092648267745972, "step": 1112} +{"train_info/time_between_train_steps": 0.004751682281494141, "step": 1112} +{"info/global_step": 1113, "train_info/time_within_train_step": 12.999999761581421, "step": 1113} +{"train_info/time_between_train_steps": 0.005230426788330078, "step": 1113} +{"info/global_step": 1114, "train_info/time_within_train_step": 13.081169128417969, "step": 1114} +{"train_info/time_between_train_steps": 0.00537109375, "step": 1114} +{"info/global_step": 1115, "train_info/time_within_train_step": 12.99896502494812, "step": 1115} +{"train_info/time_between_train_steps": 0.004854440689086914, "step": 1115} +{"info/global_step": 1116, "train_info/time_within_train_step": 13.087678670883179, "step": 1116} +{"train_info/time_between_train_steps": 0.004931926727294922, "step": 1116} +{"info/global_step": 1117, "train_info/time_within_train_step": 13.004485845565796, "step": 1117} +{"train_info/time_between_train_steps": 0.005622386932373047, "step": 1117} +{"info/global_step": 1118, "train_info/time_within_train_step": 13.102959632873535, "step": 1118} +{"train_info/time_between_train_steps": 0.005547285079956055, "step": 1118} +{"info/global_step": 1119, "train_info/time_within_train_step": 13.002318859100342, "step": 1119} +{"train_info/time_between_train_steps": 0.005200624465942383, "step": 1119} +{"info/global_step": 1120, "train_info/time_within_train_step": 13.073040008544922, "step": 1120} +{"train_info/time_between_train_steps": 0.005202054977416992, "step": 1120} +{"info/global_step": 1121, "train_info/time_within_train_step": 13.036006927490234, "step": 1121} +{"train_info/time_between_train_steps": 0.00482630729675293, "step": 1121} +{"info/global_step": 1122, "train_info/time_within_train_step": 12.9869863986969, "step": 1122} +{"train_info/time_between_train_steps": 0.005491733551025391, "step": 1122} +{"info/global_step": 1123, "train_info/time_within_train_step": 12.991819620132446, "step": 1123} +{"train_info/time_between_train_steps": 0.004653215408325195, "step": 1123} +{"info/global_step": 1124, "train_info/time_within_train_step": 14.67367696762085, "step": 1124} +{"train_info/time_between_train_steps": 0.0049991607666015625, "step": 1124} +{"info/global_step": 1125, "train_info/time_within_train_step": 13.241225719451904, "step": 1125} +{"train_info/time_between_train_steps": 0.005008220672607422, "step": 1125} +{"info/global_step": 1126, "train_info/time_within_train_step": 15.568839311599731, "step": 1126} +{"train_info/time_between_train_steps": 0.004808902740478516, "step": 1126} +{"info/global_step": 1127, "train_info/time_within_train_step": 12.960343837738037, "step": 1127} +{"train_info/time_between_train_steps": 0.0047070980072021484, "step": 1127} +{"info/global_step": 1128, "train_info/time_within_train_step": 13.605014562606812, "step": 1128} +{"train_info/time_between_train_steps": 0.005112409591674805, "step": 1128} +{"info/global_step": 1129, "train_info/time_within_train_step": 16.861393928527832, "step": 1129} +{"train_info/time_between_train_steps": 0.004675388336181641, "step": 1129} +{"info/global_step": 1130, "train_info/time_within_train_step": 12.960307359695435, "step": 1130} +{"train_info/time_between_train_steps": 0.005484580993652344, "step": 1130} +{"info/global_step": 1131, "train_info/time_within_train_step": 12.978284358978271, "step": 1131} +{"train_info/time_between_train_steps": 0.005544900894165039, "step": 1131} +{"info/global_step": 1132, "train_info/time_within_train_step": 12.986331224441528, "step": 1132} +{"train_info/time_between_train_steps": 0.005130290985107422, "step": 1132} +{"info/global_step": 1133, "train_info/time_within_train_step": 12.979499578475952, "step": 1133} +{"train_info/time_between_train_steps": 0.004739284515380859, "step": 1133} +{"info/global_step": 1134, "train_info/time_within_train_step": 12.992958307266235, "step": 1134} +{"train_info/time_between_train_steps": 0.0050334930419921875, "step": 1134} +{"info/global_step": 1135, "train_info/time_within_train_step": 12.982861518859863, "step": 1135} +{"train_info/time_between_train_steps": 0.005418062210083008, "step": 1135} +{"info/global_step": 1136, "train_info/time_within_train_step": 12.98556900024414, "step": 1136} +{"train_info/time_between_train_steps": 0.005045652389526367, "step": 1136} +{"info/global_step": 1137, "train_info/time_within_train_step": 12.984040975570679, "step": 1137} +{"train_info/time_between_train_steps": 0.00559544563293457, "step": 1137} +{"info/global_step": 1138, "train_info/time_within_train_step": 12.98617959022522, "step": 1138} +{"train_info/time_between_train_steps": 0.005900382995605469, "step": 1138} +{"info/global_step": 1139, "train_info/time_within_train_step": 13.004401206970215, "step": 1139} +{"train_info/time_between_train_steps": 0.005541563034057617, "step": 1139} +{"info/global_step": 1140, "train_info/time_within_train_step": 13.01514720916748, "step": 1140} +{"train_info/time_between_train_steps": 0.005773305892944336, "step": 1140} +{"train_info/time_between_train_steps": 7.876269340515137, "step": 1140} +{"info/global_step": 1141, "train_info/time_within_train_step": 13.0572030544281, "step": 1141} +{"train_info/time_between_train_steps": 0.0054721832275390625, "step": 1141} +{"info/global_step": 1142, "train_info/time_within_train_step": 13.092036247253418, "step": 1142} +{"train_info/time_between_train_steps": 0.00480198860168457, "step": 1142} +{"info/global_step": 1143, "train_info/time_within_train_step": 12.987952709197998, "step": 1143} +{"train_info/time_between_train_steps": 0.00471043586730957, "step": 1143} +{"info/global_step": 1144, "train_info/time_within_train_step": 13.08034896850586, "step": 1144} +{"train_info/time_between_train_steps": 0.005674839019775391, "step": 1144} +{"info/global_step": 1145, "train_info/time_within_train_step": 12.98876690864563, "step": 1145} +{"train_info/time_between_train_steps": 0.005095481872558594, "step": 1145} +{"info/global_step": 1146, "train_info/time_within_train_step": 13.074775457382202, "step": 1146} +{"train_info/time_between_train_steps": 0.0046079158782958984, "step": 1146} +{"info/global_step": 1147, "train_info/time_within_train_step": 12.993247747421265, "step": 1147} +{"train_info/time_between_train_steps": 0.00523829460144043, "step": 1147} +{"info/global_step": 1148, "train_info/time_within_train_step": 13.082497119903564, "step": 1148} +{"train_info/time_between_train_steps": 0.005722761154174805, "step": 1148} +{"info/global_step": 1149, "train_info/time_within_train_step": 14.341227769851685, "step": 1149} +{"train_info/time_between_train_steps": 0.0060422420501708984, "step": 1149} +{"info/global_step": 1150, "train_info/time_within_train_step": 13.044586896896362, "step": 1150} +{"train_info": {"train_info/memory_allocated": 1920.990234375, "train_info/memory_max_allocated": 20713.404296875, "train_info/memory_reserved": 27338.0, "train_info/memory_max_reserved": 27338.0, "_timestamp": 1737384276, "_runtime": 18271}, "step": 1150} +{"logs": {"train/loss": 3.3532, "train/learning_rate": 2.7777777777777772e-05, "train/epoch": 38.01, "_timestamp": 1737384276, "_runtime": 18271}, "step": 1150} +{"train_info/time_between_train_steps": 1.8579754829406738, "step": 1150} +{"info/global_step": 1151, "train_info/time_within_train_step": 16.844013452529907, "step": 1151} +{"train_info/time_between_train_steps": 0.004640817642211914, "step": 1151} +{"info/global_step": 1152, "train_info/time_within_train_step": 13.19623589515686, "step": 1152} +{"train_info/time_between_train_steps": 0.005116462707519531, "step": 1152} +{"info/global_step": 1153, "train_info/time_within_train_step": 13.442881345748901, "step": 1153} +{"train_info/time_between_train_steps": 0.005115032196044922, "step": 1153} +{"info/global_step": 1154, "train_info/time_within_train_step": 12.964217185974121, "step": 1154} +{"train_info/time_between_train_steps": 0.0046961307525634766, "step": 1154} +{"info/global_step": 1155, "train_info/time_within_train_step": 12.983532905578613, "step": 1155} +{"train_info/time_between_train_steps": 0.004640340805053711, "step": 1155} +{"info/global_step": 1156, "train_info/time_within_train_step": 14.613216876983643, "step": 1156} +{"train_info/time_between_train_steps": 0.005103588104248047, "step": 1156} +{"info/global_step": 1157, "train_info/time_within_train_step": 14.446090459823608, "step": 1157} +{"train_info/time_between_train_steps": 0.0050733089447021484, "step": 1157} +{"info/global_step": 1158, "train_info/time_within_train_step": 12.971028089523315, "step": 1158} +{"train_info/time_between_train_steps": 0.004794120788574219, "step": 1158} +{"info/global_step": 1159, "train_info/time_within_train_step": 22.51668953895569, "step": 1159} +{"train_info/time_between_train_steps": 0.005025148391723633, "step": 1159} +{"info/global_step": 1160, "train_info/time_within_train_step": 14.169309377670288, "step": 1160} +{"train_info/time_between_train_steps": 0.005814552307128906, "step": 1160} +{"info/global_step": 1161, "train_info/time_within_train_step": 12.958565950393677, "step": 1161} +{"train_info/time_between_train_steps": 0.004575014114379883, "step": 1161} +{"info/global_step": 1162, "train_info/time_within_train_step": 12.969806909561157, "step": 1162} +{"train_info/time_between_train_steps": 0.004696846008300781, "step": 1162} +{"info/global_step": 1163, "train_info/time_within_train_step": 12.976738929748535, "step": 1163} +{"train_info/time_between_train_steps": 0.00528264045715332, "step": 1163} +{"info/global_step": 1164, "train_info/time_within_train_step": 12.98338007926941, "step": 1164} +{"train_info/time_between_train_steps": 0.005255699157714844, "step": 1164} +{"info/global_step": 1165, "train_info/time_within_train_step": 12.973960161209106, "step": 1165} +{"train_info/time_between_train_steps": 0.004646778106689453, "step": 1165} +{"info/global_step": 1166, "train_info/time_within_train_step": 12.976413488388062, "step": 1166} +{"train_info/time_between_train_steps": 0.005627155303955078, "step": 1166} +{"info/global_step": 1167, "train_info/time_within_train_step": 12.99250602722168, "step": 1167} +{"train_info/time_between_train_steps": 0.005092620849609375, "step": 1167} +{"info/global_step": 1168, "train_info/time_within_train_step": 13.002092838287354, "step": 1168} +{"train_info/time_between_train_steps": 0.005021572113037109, "step": 1168} +{"info/global_step": 1169, "train_info/time_within_train_step": 12.998850107192993, "step": 1169} +{"train_info/time_between_train_steps": 0.0062999725341796875, "step": 1169} +{"info/global_step": 1170, "train_info/time_within_train_step": 13.015329360961914, "step": 1170} +{"train_info/time_between_train_steps": 0.005799770355224609, "step": 1170} +{"train_info/time_between_train_steps": 7.985663414001465, "step": 1170} +{"info/global_step": 1171, "train_info/time_within_train_step": 12.975825548171997, "step": 1171} +{"train_info/time_between_train_steps": 0.004889249801635742, "step": 1171} +{"info/global_step": 1172, "train_info/time_within_train_step": 13.18650221824646, "step": 1172} +{"train_info/time_between_train_steps": 0.004702329635620117, "step": 1172} +{"info/global_step": 1173, "train_info/time_within_train_step": 12.986256122589111, "step": 1173} +{"train_info/time_between_train_steps": 0.005562782287597656, "step": 1173} +{"info/global_step": 1174, "train_info/time_within_train_step": 13.072617053985596, "step": 1174} +{"train_info/time_between_train_steps": 0.004718780517578125, "step": 1174} +{"info/global_step": 1175, "train_info/time_within_train_step": 12.994957447052002, "step": 1175} +{"train_info/time_between_train_steps": 0.004485368728637695, "step": 1175} +{"info/global_step": 1176, "train_info/time_within_train_step": 13.100122451782227, "step": 1176} +{"train_info/time_between_train_steps": 0.005167722702026367, "step": 1176} +{"info/global_step": 1177, "train_info/time_within_train_step": 13.05077075958252, "step": 1177} +{"train_info/time_between_train_steps": 0.004752635955810547, "step": 1177} +{"info/global_step": 1178, "train_info/time_within_train_step": 13.113131761550903, "step": 1178} +{"train_info/time_between_train_steps": 0.0048711299896240234, "step": 1178} +{"info/global_step": 1179, "train_info/time_within_train_step": 13.008929014205933, "step": 1179} +{"train_info/time_between_train_steps": 0.005562305450439453, "step": 1179} +{"info/global_step": 1180, "train_info/time_within_train_step": 13.069468021392822, "step": 1180} +{"train_info/time_between_train_steps": 0.005306243896484375, "step": 1180} +{"info/global_step": 1181, "train_info/time_within_train_step": 13.007337093353271, "step": 1181} +{"train_info/time_between_train_steps": 0.004911661148071289, "step": 1181} +{"info/global_step": 1182, "train_info/time_within_train_step": 12.988200187683105, "step": 1182} +{"train_info/time_between_train_steps": 0.004703998565673828, "step": 1182} +{"info/global_step": 1183, "train_info/time_within_train_step": 14.038418054580688, "step": 1183} +{"train_info/time_between_train_steps": 0.004421710968017578, "step": 1183} +{"info/global_step": 1184, "train_info/time_within_train_step": 15.925621509552002, "step": 1184} +{"train_info/time_between_train_steps": 0.004979372024536133, "step": 1184} +{"info/global_step": 1185, "train_info/time_within_train_step": 14.558820962905884, "step": 1185} +{"train_info/time_between_train_steps": 0.004631996154785156, "step": 1185} +{"info/global_step": 1186, "train_info/time_within_train_step": 12.970694303512573, "step": 1186} +{"train_info/time_between_train_steps": 0.004683256149291992, "step": 1186} +{"info/global_step": 1187, "train_info/time_within_train_step": 13.045371294021606, "step": 1187} +{"train_info/time_between_train_steps": 0.004836320877075195, "step": 1187} +{"info/global_step": 1188, "train_info/time_within_train_step": 12.975379467010498, "step": 1188} +{"train_info/time_between_train_steps": 0.004988908767700195, "step": 1188} +{"info/global_step": 1189, "train_info/time_within_train_step": 12.98611330986023, "step": 1189} +{"train_info/time_between_train_steps": 0.005460262298583984, "step": 1189} +{"info/global_step": 1190, "train_info/time_within_train_step": 13.007472276687622, "step": 1190} +{"train_info/time_between_train_steps": 0.0054357051849365234, "step": 1190} +{"info/global_step": 1191, "train_info/time_within_train_step": 12.978156566619873, "step": 1191} +{"train_info/time_between_train_steps": 0.0046236515045166016, "step": 1191} +{"info/global_step": 1192, "train_info/time_within_train_step": 12.984755516052246, "step": 1192} +{"train_info/time_between_train_steps": 0.004639387130737305, "step": 1192} +{"info/global_step": 1193, "train_info/time_within_train_step": 12.996056318283081, "step": 1193} +{"train_info/time_between_train_steps": 0.004756927490234375, "step": 1193} +{"info/global_step": 1194, "train_info/time_within_train_step": 13.004520416259766, "step": 1194} +{"train_info/time_between_train_steps": 0.0051250457763671875, "step": 1194} +{"info/global_step": 1195, "train_info/time_within_train_step": 12.993705987930298, "step": 1195} +{"train_info/time_between_train_steps": 0.00553131103515625, "step": 1195} +{"info/global_step": 1196, "train_info/time_within_train_step": 13.15322995185852, "step": 1196} +{"train_info/time_between_train_steps": 0.004955768585205078, "step": 1196} +{"info/global_step": 1197, "train_info/time_within_train_step": 18.355945348739624, "step": 1197} +{"train_info/time_between_train_steps": 0.0053179264068603516, "step": 1197} +{"info/global_step": 1198, "train_info/time_within_train_step": 17.481909036636353, "step": 1198} +{"train_info/time_between_train_steps": 0.00518035888671875, "step": 1198} +{"info/global_step": 1199, "train_info/time_within_train_step": 15.735066413879395, "step": 1199} +{"train_info/time_between_train_steps": 0.005945682525634766, "step": 1199} +{"info/global_step": 1200, "train_info/time_within_train_step": 15.02625060081482, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1920.990234375, "train_info/memory_max_allocated": 20713.404296875, "train_info/memory_reserved": 27338.0, "train_info/memory_max_reserved": 27338.0, "_timestamp": 1737385008, "_runtime": 19003}, "step": 1200} +{"logs": {"train/loss": 3.305, "train/learning_rate": 0.0, "train/epoch": 39.02, "_timestamp": 1737385008, "_runtime": 19003}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1920.990234375, "train_info/memory_max_allocated": 20713.404296875, "train_info/memory_reserved": 27338.0, "train_info/memory_max_reserved": 27338.0, "_timestamp": 1737385205, "_runtime": 19200}, "step": 1200} +{"logs": {"train/train_runtime": 19208.516, "train/train_samples_per_second": 31.986, "train/train_steps_per_second": 0.062, "train/total_flos": 3.2545751629824e+17, "train/train_loss": 4.644554929733276, "train/epoch": 39.02, "_timestamp": 1737385205, "_runtime": 19200}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1920.9892578125, "train_info/memory_max_allocated": 20713.404296875, "train_info/memory_reserved": 27338.0, "train_info/memory_max_reserved": 27338.0, "_timestamp": 1737385286, "_runtime": 19281}, "step": 1200} +{"logs": {"eval/loss": 4.280470371246338, "eval/runtime": 4.8547, "eval/samples_per_second": 23.071, "eval/steps_per_second": 1.442, "train/epoch": 39.02, "_timestamp": 1737385286, "_runtime": 19281}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1920.9892578125, "train_info/memory_max_allocated": 20713.404296875, "train_info/memory_reserved": 27338.0, "train_info/memory_max_reserved": 27338.0, "_timestamp": 1737385287, "_runtime": 19282}, "step": 1200} +{"logs": {"eval//scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_loss": 4.280470371246338, "eval//scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_ppl": 72.27442782594726, "eval//scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_runtime": 4.8547, "eval//scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 23.071, "train/epoch": 39.02, "_timestamp": 1737385287, "_runtime": 19282}, "step": 1200} diff --git a/pytorch_model.bin b/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..b73e49eab805485975a501262650b193710009d8 --- /dev/null +++ b/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1b7bf158fcfaa731923bdf157800985d71dd4d6884003787ada360b6b876232 +size 510411881 diff --git a/shuffle_local5_fr_FR_randinit_seed53.log b/shuffle_local5_fr_FR_randinit_seed53.log new file mode 100755 index 0000000000000000000000000000000000000000..218b9692bf94a60b98c7e7baf7a3fc0fed7fec53 --- /dev/null +++ b/shuffle_local5_fr_FR_randinit_seed53.log @@ -0,0 +1,121 @@ +|=>> 01/20 [10:35:36] - mistral - INFO :: Starting Run: shuffle_local5_fr_FR_randinit_seed53... +|=>> 01/20 [10:35:36] - mistral - INFO :: Setting Random Seed to 53! +|=>> 01/20 [10:35:37] - mistral - INFO :: Building Tokenize and Initializing `gpt2-small` via AutoModel/AutoConfig... +|=>> 01/20 [10:35:37] - mistral - INFO :: Using Configs For Model From: /scratch/ykyao/projects/multilingual-LM/mistral/conf/models/gpt2-small-FR.json ... +|=>> 01/20 [10:35:37] - mistral.models.auto - INFO :: Building Hugging Face GPT2Config from provided configs: {'activation_function': 'gelu_new', 'architectures': ['GPT2LMHeadModel'], 'attn_pdrop': 0.1, 'bos_token_id': 1, 'embd_pdrop': 0.1, 'eos_token_id': 1, 'initializer_range': 0.02, 'layer_norm_epsilon': 1e-05, 'model_type': 'gpt2', 'n_ctx': 1024, 'n_embd': 768, 'n_head': 12, 'n_inner': None, 'n_layer': 12, 'n_positions': 1024, 'reorder_and_upcast_attn': True, 'resid_pdrop': 0.1, 'scale_attn_by_inverse_layer_idx': True, 'scale_attn_weights': True, 'summary_activation': None, 'summary_first_dropout': 0.2, 'summary_proj_to_labels': True, 'summary_type': 'cls_index', 'summary_use_proj': True, 'task_specific_params': {'text-generation': {'do_sample': True, 'max_length': 1024}}, 'torch_dtype': 'float32', 'transformers_version': '4.35.2', 'use_cache': False, 'vocab_size': 50262} ... +|=>> 01/20 [10:35:37] - mistral.models.auto - INFO :: Fetching Hugging Face [Fast] AutoTokenizer for Model: `gpt2`... +|=>> 01/20 [10:35:37] - mistral.models.auto - INFO :: Using a Pretokenized Dataset +|=>> 01/20 [10:35:37] - mistral.models.auto - INFO :: Initializing Custom GPT-2 Model from Configuration: `gpt2`... +|=>> 01/20 [10:35:40] - mistral - INFO :: Setting Training Arguments from Quinfig... +|=>> 01/20 [10:35:40] - mistral.args.training - INFO :: Setting Gradient Accumulation Steps = `64` [BSZ: 512 World Size: 1 Device BSZ: 8] +|=>> 01/20 [10:35:40] - mistral - INFO :: Downloading and Preprocessing Dataset `/scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py`... +|=>> 01/20 [10:35:42] - datasets_modules.datasets.multilingual_dataset.622766c60cbb8f561ebaa3b973324d0a680a5fd9ae7223c01dd7f116023232f5.multilingual_dataset - INFO :: Generating examples from = /scratch/ykyao/projects//multilingual-LM/data/multilingual/multilingual_data_perturbed/shuffle_local5_fr/train +|=>> 01/20 [10:35:45] - datasets_modules.datasets.multilingual_dataset.622766c60cbb8f561ebaa3b973324d0a680a5fd9ae7223c01dd7f116023232f5.multilingual_dataset - INFO :: Total sentences: 1113746 +|=>> 01/20 [10:35:46] - datasets_modules.datasets.multilingual_dataset.622766c60cbb8f561ebaa3b973324d0a680a5fd9ae7223c01dd7f116023232f5.multilingual_dataset - INFO :: Loading pre-tokenized data +|=>> 01/20 [10:35:50] - datasets_modules.datasets.multilingual_dataset.622766c60cbb8f561ebaa3b973324d0a680a5fd9ae7223c01dd7f116023232f5.multilingual_dataset - INFO :: Concatenating tokenized data using EOS token +|=>> 01/20 [10:35:50] - datasets_modules.datasets.multilingual_dataset.622766c60cbb8f561ebaa3b973324d0a680a5fd9ae7223c01dd7f116023232f5.multilingual_dataset - INFO :: Chunking tokens into sublists of 1024 +|=>> 01/20 [10:35:51] - datasets_modules.datasets.multilingual_dataset.622766c60cbb8f561ebaa3b973324d0a680a5fd9ae7223c01dd7f116023232f5.multilingual_dataset - INFO :: Writing dataset as space-separated sequences of tokens +|=>> 01/20 [10:36:20] - datasets_modules.datasets.multilingual_dataset.622766c60cbb8f561ebaa3b973324d0a680a5fd9ae7223c01dd7f116023232f5.multilingual_dataset - INFO :: Generating examples from = /scratch/ykyao/projects//multilingual-LM/data/multilingual/multilingual_data_perturbed/shuffle_local5_fr/dev +|=>> 01/20 [10:36:20] - datasets_modules.datasets.multilingual_dataset.622766c60cbb8f561ebaa3b973324d0a680a5fd9ae7223c01dd7f116023232f5.multilingual_dataset - INFO :: Total sentences: 6342 +|=>> 01/20 [10:36:20] - datasets_modules.datasets.multilingual_dataset.622766c60cbb8f561ebaa3b973324d0a680a5fd9ae7223c01dd7f116023232f5.multilingual_dataset - INFO :: Loading pre-tokenized data +|=>> 01/20 [10:36:20] - datasets_modules.datasets.multilingual_dataset.622766c60cbb8f561ebaa3b973324d0a680a5fd9ae7223c01dd7f116023232f5.multilingual_dataset - INFO :: Concatenating tokenized data using EOS token +|=>> 01/20 [10:36:20] - datasets_modules.datasets.multilingual_dataset.622766c60cbb8f561ebaa3b973324d0a680a5fd9ae7223c01dd7f116023232f5.multilingual_dataset - INFO :: Chunking tokens into sublists of 1024 +|=>> 01/20 [10:36:20] - datasets_modules.datasets.multilingual_dataset.622766c60cbb8f561ebaa3b973324d0a680a5fd9ae7223c01dd7f116023232f5.multilingual_dataset - INFO :: Writing dataset as space-separated sequences of tokens +|=>> 01/20 [10:36:21] - mistral.corpora.auto - INFO :: Building Tokenized Indexed Dataset for {dataset_id}/{dataset_name}... +|=>> 01/20 [10:36:21] - mistral.corpora.auto - INFO :: Building Indexed Dataset for train +|=>> 01/20 [10:36:57] - mistral.corpora.auto - INFO :: Building Indexed Dataset for validation +|=>> 01/20 [10:36:59] - mistral - INFO :: Initializing Model Trainer... +|=>> 01/20 [10:36:59] - mistral - INFO :: Training Arguments: TrainingArguments( +_n_gpu=1, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +bf16=False, +bf16_full_eval=False, +data_seed=53, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=IntervalStrategy.STEPS, +fp16=True, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +gradient_accumulation_steps=64, +gradient_checkpointing=False, +greater_is_better=None, +group_by_length=False, +half_precision_backend=auto, +hub_model_id=None, +hub_strategy=HubStrategy.EVERY_SAVE, +hub_token=, +ignore_data_skip=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0006, +length_column_name=length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=logs, +logging_first_step=True, +logging_nan_inf_filter=True, +logging_steps=50, +logging_strategy=IntervalStrategy.STEPS, +lr_scheduler_type=SchedulerType.LINEAR, +max_grad_norm=1.0, +max_steps=1200, +metric_for_best_model=None, +mp_parameters=, +no_cuda=False, +num_train_epochs=3.0, +optim=OptimizerNames.ADAMW_HF, +output_dir=//scratch/ykyao/projects/multilingual_models/shuffle_local5_fr_FR_randinit/babylm_shuffle_local5_fr_FR_randinit_seed53/runs/shuffle_local5_fr_FR_randinit_seed53, +overwrite_output_dir=False, +past_index=-1, +per_device_eval_batch_size=16, +per_device_train_batch_size=8, +prediction_loss_only=True, +push_to_hub=False, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +remove_unused_columns=True, +report_to=[], +resume_from_checkpoint=None, +run_name=shuffle_local5_fr_FR_randinit_seed53, +save_on_each_node=False, +save_steps=1000, +save_strategy=IntervalStrategy.STEPS, +save_total_limit=None, +seed=53, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=120, +weight_decay=0.1, +xpu_backend=None, +) +|=>> 01/20 [10:37:10] - mistral.core.callbacks - INFO :: Setting W&B Project: ykyao +|=>> 01/20 [10:39:53] - mistral - INFO :: Training... +|=>> 01/20 [10:39:57] - mistral.core.callbacks - INFO :: Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" +|=>> 01/20 [16:01:21] - mistral - INFO :: ...and that's all folks! +|=>> 01/20 [16:01:22] - mistral - INFO :: Running final evaluation... diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/training_args.bin b/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..03b2288ee5b328686d8934e3fc4a6b925a9c3ede --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdfe69ffbf98648e171ef448900d45919fc613a0e3b6af7ac7ce7cdbdf004776 +size 3183