diff --git a/checkpoint-0/config.json b/checkpoint-0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..07a30fecf1535bbf080f023f13d2b580daee85ce --- /dev/null +++ b/checkpoint-0/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 1, + "embd_pdrop": 0.1, + "eos_token_id": 2, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-0/pytorch_model.bin b/checkpoint-0/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..e62f1c2a73e204dd71fd38dbd4de66ee58a3ae92 --- /dev/null +++ b/checkpoint-0/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4794ea0b9a495e87fc34e316efe1b585527d6e3d15e4d093362ab24ee74ab3f +size 510396521 diff --git a/checkpoint-0/special_tokens_map.json b/checkpoint-0/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-0/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-0/tokenizer_config.json b/checkpoint-0/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-0/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-0/training_args.bin b/checkpoint-0/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3f68d66eaa09f7732c3d230d37ae591e8f32f4f8 --- /dev/null +++ b/checkpoint-0/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27836d7e56d49d56f7b1cb7eedcf9f3729d59e5c4fa7930d4fee7671376c78c4 +size 3183 diff --git a/checkpoint-100/config.json b/checkpoint-100/config.json new file mode 100644 index 0000000000000000000000000000000000000000..07a30fecf1535bbf080f023f13d2b580daee85ce --- /dev/null +++ b/checkpoint-100/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 1, + "embd_pdrop": 0.1, + "eos_token_id": 2, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-100/optimizer.pt b/checkpoint-100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a7a32ebbed39f3b5fdd9e58bbf1515e723be387c --- /dev/null +++ b/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98de2bf978fa7ee67770d71d5bf20e1a52836eb7176810964b99cd4eec54334e +size 995603825 diff --git a/checkpoint-100/pytorch_model.bin b/checkpoint-100/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..23e56743a8da7697b48e4d0e61869804cf967f30 --- /dev/null +++ b/checkpoint-100/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cb5ac8a3a2ba219e2137d7ef4f75497500bbd313b2871b1011989da75d93171 +size 510396521 diff --git a/checkpoint-100/rng_state.pth b/checkpoint-100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5892baeef5541ff99e83ffe529da71cb887c45b2 --- /dev/null +++ b/checkpoint-100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa6bc3fdb61ee6714d5ed867ecc39b8fbb7c24eb47766a706f40ee6e0315766a +size 14567 diff --git a/checkpoint-100/scaler.pt b/checkpoint-100/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..45cc4a33e17645cb0ed4a911b11c77cb2e7ce7f3 --- /dev/null +++ b/checkpoint-100/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13a3423b2fe42f204bc8fe2c666ff379f9fd753a0f13613064a5e71e86b519e8 +size 559 diff --git a/checkpoint-100/scheduler.pt b/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..595eec9094e91b3eb24c2de88c461df2c22026ab --- /dev/null +++ b/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75e9b9d31d11c624d89b0c04ad496adf4b5addd3e703848d2583972c703e8da6 +size 623 diff --git a/checkpoint-100/special_tokens_map.json b/checkpoint-100/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-100/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-100/tokenizer_config.json b/checkpoint-100/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-100/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-100/trainer_state.json b/checkpoint-100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a42b81691ccf0ea3864b1bb7fc734d8fd6aaa961 --- /dev/null +++ b/checkpoint-100/trainer_state.json @@ -0,0 +1,34 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.015833333333333, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.8068, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.2996, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.8028, + "step": 100 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.6770413846528e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-100/training_args.bin b/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3f68d66eaa09f7732c3d230d37ae591e8f32f4f8 --- /dev/null +++ b/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27836d7e56d49d56f7b1cb7eedcf9f3729d59e5c4fa7930d4fee7671376c78c4 +size 3183 diff --git a/checkpoint-1000/config.json b/checkpoint-1000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..07a30fecf1535bbf080f023f13d2b580daee85ce --- /dev/null +++ b/checkpoint-1000/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 1, + "embd_pdrop": 0.1, + "eos_token_id": 2, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-1000/optimizer.pt b/checkpoint-1000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e3a953935edcd3c9af62c187fb26cb965a0ed32c --- /dev/null +++ b/checkpoint-1000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0ac01ec1049bd436e39b8fb1d1124c71d3dc4a5b6c1d83251fa090a36ebaf3d +size 995604017 diff --git a/checkpoint-1000/pytorch_model.bin b/checkpoint-1000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..d8c2945451239987cdf9ac24e81752ddff1d7312 --- /dev/null +++ b/checkpoint-1000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d44d454a4ee8ded5272a281c6bda1a539ea4c74bf489cbced6cc40f3d7f1275c +size 510396521 diff --git a/checkpoint-1000/rng_state.pth b/checkpoint-1000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c6c769d02ffff08b2682e0a8fb5e82fbac28529b --- /dev/null +++ b/checkpoint-1000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5d65d807138ed53bb24a14ad63d7610c0c9b04bdad2955b1132050af20370e5 +size 14567 diff --git a/checkpoint-1000/scaler.pt b/checkpoint-1000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b04695d3a30e4bab2b78883d9c849c25c37ef7d7 --- /dev/null +++ b/checkpoint-1000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f810fc7b695697c440d8985f6042b4ba23a9e1027604c265718b518ca29f1b2b +size 559 diff --git a/checkpoint-1000/scheduler.pt b/checkpoint-1000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2a616899591a36b66ac0bd1ceeb324087c181a2a --- /dev/null +++ b/checkpoint-1000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0691206f4bd9ca409d6e7104087a4e0eb05df8f8f555a400f6ecc532edba52d8 +size 623 diff --git a/checkpoint-1000/special_tokens_map.json b/checkpoint-1000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-1000/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-1000/tokenizer_config.json b/checkpoint-1000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-1000/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-1000/trainer_state.json b/checkpoint-1000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..37740033a2a9b153a3e29664de37c47a8e602831 --- /dev/null +++ b/checkpoint-1000/trainer_state.json @@ -0,0 +1,158 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 37.00083333333333, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.8068, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.2996, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.8028, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 6.3976, + "step": 150 + }, + { + "epoch": 7.01, + "learning_rate": 0.0005555555555555556, + "loss": 5.8849, + "step": 200 + }, + { + "epoch": 9.01, + "learning_rate": 0.0005277777777777777, + "loss": 5.515, + "step": 250 + }, + { + "epoch": 11.0, + "learning_rate": 0.0005, + "loss": 5.2445, + "step": 300 + }, + { + "epoch": 12.02, + "learning_rate": 0.00047222222222222224, + "loss": 5.0564, + "step": 350 + }, + { + "epoch": 14.02, + "learning_rate": 0.00044444444444444436, + "loss": 4.9239, + "step": 400 + }, + { + "epoch": 16.02, + "learning_rate": 0.00041666666666666664, + "loss": 4.7766, + "step": 450 + }, + { + "epoch": 18.01, + "learning_rate": 0.00038888888888888887, + "loss": 4.6253, + "step": 500 + }, + { + "epoch": 20.01, + "learning_rate": 0.0003611111111111111, + "loss": 4.434, + "step": 550 + }, + { + "epoch": 22.0, + "learning_rate": 0.0003333333333333333, + "loss": 4.2692, + "step": 600 + }, + { + "epoch": 24.0, + "learning_rate": 0.00030555555555555555, + "loss": 4.1342, + "step": 650 + }, + { + "epoch": 25.02, + "learning_rate": 0.0002777777777777778, + "loss": 4.0185, + "step": 700 + }, + { + "epoch": 27.02, + "learning_rate": 0.00025, + "loss": 3.9197, + "step": 750 + }, + { + "epoch": 29.01, + "learning_rate": 0.00022222222222222218, + "loss": 3.829, + "step": 800 + }, + { + "epoch": 31.01, + "learning_rate": 0.00019444444444444443, + "loss": 3.7522, + "step": 850 + }, + { + "epoch": 33.01, + "learning_rate": 0.00016666666666666666, + "loss": 3.6848, + "step": 900 + }, + { + "epoch": 35.0, + "learning_rate": 0.0001388888888888889, + "loss": 3.6253, + "step": 950 + }, + { + "epoch": 37.0, + "learning_rate": 0.00011111111111111109, + "loss": 3.5729, + "step": 1000 + }, + { + "epoch": 37.0, + "eval_loss": 4.759269714355469, + "eval_runtime": 1.889, + "eval_samples_per_second": 50.291, + "eval_steps_per_second": 3.176, + "step": 1000 + }, + { + "epoch": 37.0, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_loss": 4.759269714355469, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_ppl": 116.66069914897214, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_runtime": 1.889, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_samples_per_second": 50.291, + "step": 1000 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.67737061261312e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1000/training_args.bin b/checkpoint-1000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3f68d66eaa09f7732c3d230d37ae591e8f32f4f8 --- /dev/null +++ b/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27836d7e56d49d56f7b1cb7eedcf9f3729d59e5c4fa7930d4fee7671376c78c4 +size 3183 diff --git a/checkpoint-1100/config.json b/checkpoint-1100/config.json new file mode 100644 index 0000000000000000000000000000000000000000..07a30fecf1535bbf080f023f13d2b580daee85ce --- /dev/null +++ b/checkpoint-1100/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 1, + "embd_pdrop": 0.1, + "eos_token_id": 2, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-1100/optimizer.pt b/checkpoint-1100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..374d442c3fc30e7e321f6a2637b7d9eedeccbbea --- /dev/null +++ b/checkpoint-1100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fba0af65c883c6aaa3e4b06804fb18ae92f2aaa651c9f672a68df20227772b87 +size 995604017 diff --git a/checkpoint-1100/pytorch_model.bin b/checkpoint-1100/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..26a286dbdbcd239bcc1c53d8f873ee632a10b820 --- /dev/null +++ b/checkpoint-1100/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9acb1c3a8388c4f2c965d5cf15adce977e4284e41b9b1b818141a5226d19553 +size 510396521 diff --git a/checkpoint-1100/rng_state.pth b/checkpoint-1100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dca1cde6f24e91626d22e10b5398f13f24945fbe --- /dev/null +++ b/checkpoint-1100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:498857e92d90f74aad11a7f96d470dfddc5b2d682e686a5f62470ff63ee7fd2b +size 14567 diff --git a/checkpoint-1100/scaler.pt b/checkpoint-1100/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..37bf049fbd5fd721203bf0238edc8ff67dbd8f94 --- /dev/null +++ b/checkpoint-1100/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fb16c30b686aa43e110b0d33f9d46bf3127b7124542ca8dc34831233d4675a0 +size 559 diff --git a/checkpoint-1100/scheduler.pt b/checkpoint-1100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c158169ec0bb09620952d544fd4b4edea0cc9cf4 --- /dev/null +++ b/checkpoint-1100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f1b7713e4bb40428f29080b7d08d4a52f779ac863737861e4724292b2cf6c59 +size 623 diff --git a/checkpoint-1100/special_tokens_map.json b/checkpoint-1100/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-1100/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-1100/tokenizer_config.json b/checkpoint-1100/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-1100/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-1100/trainer_state.json b/checkpoint-1100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9dc19c56f5f118829c2377102b0768e1a4490cad --- /dev/null +++ b/checkpoint-1100/trainer_state.json @@ -0,0 +1,170 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 40.016666666666666, + "global_step": 1100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.8068, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.2996, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.8028, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 6.3976, + "step": 150 + }, + { + "epoch": 7.01, + "learning_rate": 0.0005555555555555556, + "loss": 5.8849, + "step": 200 + }, + { + "epoch": 9.01, + "learning_rate": 0.0005277777777777777, + "loss": 5.515, + "step": 250 + }, + { + "epoch": 11.0, + "learning_rate": 0.0005, + "loss": 5.2445, + "step": 300 + }, + { + "epoch": 12.02, + "learning_rate": 0.00047222222222222224, + "loss": 5.0564, + "step": 350 + }, + { + "epoch": 14.02, + "learning_rate": 0.00044444444444444436, + "loss": 4.9239, + "step": 400 + }, + { + "epoch": 16.02, + "learning_rate": 0.00041666666666666664, + "loss": 4.7766, + "step": 450 + }, + { + "epoch": 18.01, + "learning_rate": 0.00038888888888888887, + "loss": 4.6253, + "step": 500 + }, + { + "epoch": 20.01, + "learning_rate": 0.0003611111111111111, + "loss": 4.434, + "step": 550 + }, + { + "epoch": 22.0, + "learning_rate": 0.0003333333333333333, + "loss": 4.2692, + "step": 600 + }, + { + "epoch": 24.0, + "learning_rate": 0.00030555555555555555, + "loss": 4.1342, + "step": 650 + }, + { + "epoch": 25.02, + "learning_rate": 0.0002777777777777778, + "loss": 4.0185, + "step": 700 + }, + { + "epoch": 27.02, + "learning_rate": 0.00025, + "loss": 3.9197, + "step": 750 + }, + { + "epoch": 29.01, + "learning_rate": 0.00022222222222222218, + "loss": 3.829, + "step": 800 + }, + { + "epoch": 31.01, + "learning_rate": 0.00019444444444444443, + "loss": 3.7522, + "step": 850 + }, + { + "epoch": 33.01, + "learning_rate": 0.00016666666666666666, + "loss": 3.6848, + "step": 900 + }, + { + "epoch": 35.0, + "learning_rate": 0.0001388888888888889, + "loss": 3.6253, + "step": 950 + }, + { + "epoch": 37.0, + "learning_rate": 0.00011111111111111109, + "loss": 3.5729, + "step": 1000 + }, + { + "epoch": 37.0, + "eval_loss": 4.759269714355469, + "eval_runtime": 1.889, + "eval_samples_per_second": 50.291, + "eval_steps_per_second": 3.176, + "step": 1000 + }, + { + "epoch": 37.0, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_loss": 4.759269714355469, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_ppl": 116.66069914897214, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_runtime": 1.889, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_samples_per_second": 50.291, + "step": 1000 + }, + { + "epoch": 38.02, + "learning_rate": 8.333333333333333e-05, + "loss": 3.5259, + "step": 1050 + }, + { + "epoch": 40.02, + "learning_rate": 5.5555555555555545e-05, + "loss": 3.4892, + "step": 1100 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.9450747510784e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1100/training_args.bin b/checkpoint-1100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3f68d66eaa09f7732c3d230d37ae591e8f32f4f8 --- /dev/null +++ b/checkpoint-1100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27836d7e56d49d56f7b1cb7eedcf9f3729d59e5c4fa7930d4fee7671376c78c4 +size 3183 diff --git a/checkpoint-1200/config.json b/checkpoint-1200/config.json new file mode 100644 index 0000000000000000000000000000000000000000..07a30fecf1535bbf080f023f13d2b580daee85ce --- /dev/null +++ b/checkpoint-1200/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 1, + "embd_pdrop": 0.1, + "eos_token_id": 2, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-1200/optimizer.pt b/checkpoint-1200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..11f33c70ac1eeb011bfdbb1a4409bc1ff9ed8bd4 --- /dev/null +++ b/checkpoint-1200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:761cb3f5637d38c3469e19fd4f0c6393862467c20c6d8971183be0ec1a80b0eb +size 995604017 diff --git a/checkpoint-1200/pytorch_model.bin b/checkpoint-1200/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..f25dd905bcc86435e1bc14ccadadeae3729379ee --- /dev/null +++ b/checkpoint-1200/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb3ebd8b69678d14514b0ee8d7b72d1ed3060fd70c512258919e84411d5db15f +size 510396521 diff --git a/checkpoint-1200/rng_state.pth b/checkpoint-1200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..fa3e1a211ff8fb8d9d1b382824bd42d2ebbedf3e --- /dev/null +++ b/checkpoint-1200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42a69ce020a921bfe5fd65372798b26955f805a21457fbddfc024ddf26a39c07 +size 14567 diff --git a/checkpoint-1200/scaler.pt b/checkpoint-1200/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8953dddccbefc4703c09dcda27d83c15add2bade --- /dev/null +++ b/checkpoint-1200/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19c7277eaca0850ae3e9b6790b3d002d820169cce0671185e672c28c8ae8e056 +size 559 diff --git a/checkpoint-1200/scheduler.pt b/checkpoint-1200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..310d39c17fc616a9c83286ed00f0f4cefba9f5df --- /dev/null +++ b/checkpoint-1200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:935a8fb09a6e9698d9894853b05e181b3f56098deaaecddde08e55f06bf000c4 +size 623 diff --git a/checkpoint-1200/special_tokens_map.json b/checkpoint-1200/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-1200/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-1200/tokenizer_config.json b/checkpoint-1200/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-1200/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-1200/trainer_state.json b/checkpoint-1200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4decd4b45621cef0a21bfcff894c0e26a57c6819 --- /dev/null +++ b/checkpoint-1200/trainer_state.json @@ -0,0 +1,182 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 44.01, + "global_step": 1200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.8068, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.2996, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.8028, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 6.3976, + "step": 150 + }, + { + "epoch": 7.01, + "learning_rate": 0.0005555555555555556, + "loss": 5.8849, + "step": 200 + }, + { + "epoch": 9.01, + "learning_rate": 0.0005277777777777777, + "loss": 5.515, + "step": 250 + }, + { + "epoch": 11.0, + "learning_rate": 0.0005, + "loss": 5.2445, + "step": 300 + }, + { + "epoch": 12.02, + "learning_rate": 0.00047222222222222224, + "loss": 5.0564, + "step": 350 + }, + { + "epoch": 14.02, + "learning_rate": 0.00044444444444444436, + "loss": 4.9239, + "step": 400 + }, + { + "epoch": 16.02, + "learning_rate": 0.00041666666666666664, + "loss": 4.7766, + "step": 450 + }, + { + "epoch": 18.01, + "learning_rate": 0.00038888888888888887, + "loss": 4.6253, + "step": 500 + }, + { + "epoch": 20.01, + "learning_rate": 0.0003611111111111111, + "loss": 4.434, + "step": 550 + }, + { + "epoch": 22.0, + "learning_rate": 0.0003333333333333333, + "loss": 4.2692, + "step": 600 + }, + { + "epoch": 24.0, + "learning_rate": 0.00030555555555555555, + "loss": 4.1342, + "step": 650 + }, + { + "epoch": 25.02, + "learning_rate": 0.0002777777777777778, + "loss": 4.0185, + "step": 700 + }, + { + "epoch": 27.02, + "learning_rate": 0.00025, + "loss": 3.9197, + "step": 750 + }, + { + "epoch": 29.01, + "learning_rate": 0.00022222222222222218, + "loss": 3.829, + "step": 800 + }, + { + "epoch": 31.01, + "learning_rate": 0.00019444444444444443, + "loss": 3.7522, + "step": 850 + }, + { + "epoch": 33.01, + "learning_rate": 0.00016666666666666666, + "loss": 3.6848, + "step": 900 + }, + { + "epoch": 35.0, + "learning_rate": 0.0001388888888888889, + "loss": 3.6253, + "step": 950 + }, + { + "epoch": 37.0, + "learning_rate": 0.00011111111111111109, + "loss": 3.5729, + "step": 1000 + }, + { + "epoch": 37.0, + "eval_loss": 4.759269714355469, + "eval_runtime": 1.889, + "eval_samples_per_second": 50.291, + "eval_steps_per_second": 3.176, + "step": 1000 + }, + { + "epoch": 37.0, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_loss": 4.759269714355469, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_ppl": 116.66069914897214, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_runtime": 1.889, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_samples_per_second": 50.291, + "step": 1000 + }, + { + "epoch": 38.02, + "learning_rate": 8.333333333333333e-05, + "loss": 3.5259, + "step": 1050 + }, + { + "epoch": 40.02, + "learning_rate": 5.5555555555555545e-05, + "loss": 3.4892, + "step": 1100 + }, + { + "epoch": 42.01, + "learning_rate": 2.7777777777777772e-05, + "loss": 3.4568, + "step": 1150 + }, + { + "epoch": 44.01, + "learning_rate": 0.0, + "loss": 3.4341, + "step": 1200 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 3.21282592210944e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1200/training_args.bin b/checkpoint-1200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3f68d66eaa09f7732c3d230d37ae591e8f32f4f8 --- /dev/null +++ b/checkpoint-1200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27836d7e56d49d56f7b1cb7eedcf9f3729d59e5c4fa7930d4fee7671376c78c4 +size 3183 diff --git a/checkpoint-200/config.json b/checkpoint-200/config.json new file mode 100644 index 0000000000000000000000000000000000000000..07a30fecf1535bbf080f023f13d2b580daee85ce --- /dev/null +++ b/checkpoint-200/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 1, + "embd_pdrop": 0.1, + "eos_token_id": 2, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-200/optimizer.pt b/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f86472c48df48778f744f44ee587b8a5e11a60dc --- /dev/null +++ b/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d12707a61203036dcad4c95517470a347cb40afbc3f71bc5483ec8a6354b2e30 +size 995603825 diff --git a/checkpoint-200/pytorch_model.bin b/checkpoint-200/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..2a51fcfe35d8048e06d94d1e18a1263f27835af2 --- /dev/null +++ b/checkpoint-200/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a54a3f55da1056fad5eb5e315e3e2835ba6419ddbe8bd60eb6dff3f4a27fcb35 +size 510396521 diff --git a/checkpoint-200/rng_state.pth b/checkpoint-200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..4e7e1f16f1ce5c1b54ead6d110843b6193d72a68 --- /dev/null +++ b/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad5ff39e84f457435fb4e15e121a005240de062e41f982ef377813dd91743665 +size 14567 diff --git a/checkpoint-200/scaler.pt b/checkpoint-200/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..365b52ebf376498237a843f6d7332e5a49b14902 --- /dev/null +++ b/checkpoint-200/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb6982c29cd162f49aeb531674acf574eccd46a8f556bec596040d7c3b95200a +size 559 diff --git a/checkpoint-200/scheduler.pt b/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..36cb8da739c80a63971a62f06f781c40ac0fceb2 --- /dev/null +++ b/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a992625ada6d884e508ff9392d16738b4a4163147f8fcbf9f46be82ecae9888 +size 623 diff --git a/checkpoint-200/special_tokens_map.json b/checkpoint-200/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-200/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-200/tokenizer_config.json b/checkpoint-200/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-200/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-200/trainer_state.json b/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c285323bbd782ecc63cb62e0812793b74e9860a8 --- /dev/null +++ b/checkpoint-200/trainer_state.json @@ -0,0 +1,46 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.009166666666666, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.8068, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.2996, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.8028, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 6.3976, + "step": 150 + }, + { + "epoch": 7.01, + "learning_rate": 0.0005555555555555556, + "loss": 5.8849, + "step": 200 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 5.3545530949632e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-200/training_args.bin b/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3f68d66eaa09f7732c3d230d37ae591e8f32f4f8 --- /dev/null +++ b/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27836d7e56d49d56f7b1cb7eedcf9f3729d59e5c4fa7930d4fee7671376c78c4 +size 3183 diff --git a/checkpoint-300/config.json b/checkpoint-300/config.json new file mode 100644 index 0000000000000000000000000000000000000000..07a30fecf1535bbf080f023f13d2b580daee85ce --- /dev/null +++ b/checkpoint-300/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 1, + "embd_pdrop": 0.1, + "eos_token_id": 2, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-300/optimizer.pt b/checkpoint-300/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e9c9dfad94ca42920805310f221ef3086e63c35b --- /dev/null +++ b/checkpoint-300/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c6f09081a6a870079c308d1a5094c8e330e4da32b1eaa39cfa042cdc5c59983 +size 995604017 diff --git a/checkpoint-300/pytorch_model.bin b/checkpoint-300/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..1cf76b8649a7b63321a9e4b8e2785304867f529f --- /dev/null +++ b/checkpoint-300/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63620e0f0e36dede30ee4fe2789069d28cc76fb80073656750dab902c277949c +size 510396521 diff --git a/checkpoint-300/rng_state.pth b/checkpoint-300/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..4c41118cb38d8aa256728ac94668b8e472464a24 --- /dev/null +++ b/checkpoint-300/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d841feedb70dc2e76c60d1492b03d8e88af735b02920240bd545cb33137ec85 +size 14567 diff --git a/checkpoint-300/scaler.pt b/checkpoint-300/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..01066cf4761ea9d2f7962f5181762f7b08690b79 --- /dev/null +++ b/checkpoint-300/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0967b9f865f16344c55f5ccc3cf7d6e8e97ca61dda304e931ca6bad130f48dd1 +size 559 diff --git a/checkpoint-300/scheduler.pt b/checkpoint-300/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..681987f21e79cd4685aac32a5e6b74341e25d936 --- /dev/null +++ b/checkpoint-300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1db899b266916f792a0898ceb27a87eaf76647f10c29cc0c13ce22f12a12efd +size 623 diff --git a/checkpoint-300/special_tokens_map.json b/checkpoint-300/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-300/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-300/tokenizer_config.json b/checkpoint-300/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-300/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-300/trainer_state.json b/checkpoint-300/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..33c1302f9e66913148cddb9a10e881549efbe1e8 --- /dev/null +++ b/checkpoint-300/trainer_state.json @@ -0,0 +1,58 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 11.0025, + "global_step": 300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.8068, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.2996, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.8028, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 6.3976, + "step": 150 + }, + { + "epoch": 7.01, + "learning_rate": 0.0005555555555555556, + "loss": 5.8849, + "step": 200 + }, + { + "epoch": 9.01, + "learning_rate": 0.0005277777777777777, + "loss": 5.515, + "step": 250 + }, + { + "epoch": 11.0, + "learning_rate": 0.0005, + "loss": 5.2445, + "step": 300 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 8.0320648052736e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-300/training_args.bin b/checkpoint-300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3f68d66eaa09f7732c3d230d37ae591e8f32f4f8 --- /dev/null +++ b/checkpoint-300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27836d7e56d49d56f7b1cb7eedcf9f3729d59e5c4fa7930d4fee7671376c78c4 +size 3183 diff --git a/checkpoint-400/config.json b/checkpoint-400/config.json new file mode 100644 index 0000000000000000000000000000000000000000..07a30fecf1535bbf080f023f13d2b580daee85ce --- /dev/null +++ b/checkpoint-400/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 1, + "embd_pdrop": 0.1, + "eos_token_id": 2, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-400/optimizer.pt b/checkpoint-400/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e2ce68e200128d9ef8d2d0dd47e5fb3c74e45f46 --- /dev/null +++ b/checkpoint-400/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0247162ec36427eb22c2a6b51b512560286f8397c747ecd425f0d0c561b89e0 +size 995604017 diff --git a/checkpoint-400/pytorch_model.bin b/checkpoint-400/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..6d9852da371aca554d5164a5fc63c59a323bfc54 --- /dev/null +++ b/checkpoint-400/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff8a82a842c6133e40efc5f212e0983733bab7591c5b8ae70527296e6be143ff +size 510396521 diff --git a/checkpoint-400/rng_state.pth b/checkpoint-400/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..3238f06a9f9fe8e48f6607fe63d542ec19d2211e --- /dev/null +++ b/checkpoint-400/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2ff93a8ad4159dee45ce4a9f87c019c2df2d038e41f3b18b4c5efd2f3b8c3e8 +size 14567 diff --git a/checkpoint-400/scaler.pt b/checkpoint-400/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9c7aef4199e98d81152810b661dbaffc01963383 --- /dev/null +++ b/checkpoint-400/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:476e510c8ea7edbd2b51d1e76a4e037820a5639381c0d8b5d32dafa492795a1e +size 559 diff --git a/checkpoint-400/scheduler.pt b/checkpoint-400/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..967673cfc91836d239beb5a7ede06992232ba309 --- /dev/null +++ b/checkpoint-400/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db087d678047b5c346bbb8511936612c1fdf223c6fd70321e97369bc31ed76a8 +size 623 diff --git a/checkpoint-400/special_tokens_map.json b/checkpoint-400/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-400/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-400/tokenizer_config.json b/checkpoint-400/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-400/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-400/trainer_state.json b/checkpoint-400/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1b921c5c6a555e078d5fb48db960d76d0b0f12e3 --- /dev/null +++ b/checkpoint-400/trainer_state.json @@ -0,0 +1,70 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 14.018333333333333, + "global_step": 400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.8068, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.2996, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.8028, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 6.3976, + "step": 150 + }, + { + "epoch": 7.01, + "learning_rate": 0.0005555555555555556, + "loss": 5.8849, + "step": 200 + }, + { + "epoch": 9.01, + "learning_rate": 0.0005277777777777777, + "loss": 5.515, + "step": 250 + }, + { + "epoch": 11.0, + "learning_rate": 0.0005, + "loss": 5.2445, + "step": 300 + }, + { + "epoch": 12.02, + "learning_rate": 0.00047222222222222224, + "loss": 5.0564, + "step": 350 + }, + { + "epoch": 14.02, + "learning_rate": 0.00044444444444444436, + "loss": 4.9239, + "step": 400 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.07091061899264e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-400/training_args.bin b/checkpoint-400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3f68d66eaa09f7732c3d230d37ae591e8f32f4f8 --- /dev/null +++ b/checkpoint-400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27836d7e56d49d56f7b1cb7eedcf9f3729d59e5c4fa7930d4fee7671376c78c4 +size 3183 diff --git a/checkpoint-500/config.json b/checkpoint-500/config.json new file mode 100644 index 0000000000000000000000000000000000000000..07a30fecf1535bbf080f023f13d2b580daee85ce --- /dev/null +++ b/checkpoint-500/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 1, + "embd_pdrop": 0.1, + "eos_token_id": 2, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-500/optimizer.pt b/checkpoint-500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7efb2d32e682967bb4654f02c9cae7210a1b0718 --- /dev/null +++ b/checkpoint-500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54af57dcbd92db34de37a9f62a2d7a6a513685a2f7323daaf18c46ca8bece89e +size 995604017 diff --git a/checkpoint-500/pytorch_model.bin b/checkpoint-500/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..d4fa65a17999a7b5d33c7d9c3f065a4027e35b7c --- /dev/null +++ b/checkpoint-500/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f9a46df7b062cf4ffc3326fdd24ce779985d5e9723a0d1cd199e347b340a76e +size 510396521 diff --git a/checkpoint-500/rng_state.pth b/checkpoint-500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..b77cffc0b3daa1612b33a61897bf4f9e69b0ab14 --- /dev/null +++ b/checkpoint-500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d45083cfdcb5cfbbf5b5231194ad4eb83219d3d55c622c2d3d066faef3765b88 +size 14567 diff --git a/checkpoint-500/scaler.pt b/checkpoint-500/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e8b96c9c2837f2c95b1d07d4fc3f245f9ad1ef62 --- /dev/null +++ b/checkpoint-500/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fa4c7be44c959599b8b43bb9bc3371e9e4e5bbc5758b3ab5afcccfda3e72e67 +size 559 diff --git a/checkpoint-500/scheduler.pt b/checkpoint-500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..77b958181a5b47b022b96b38a6207f274a1b6604 --- /dev/null +++ b/checkpoint-500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:026fae4a90d56c24de94b10dfa7a75b6ba4e43bd5c1a3fdb2d77356b81cd6f8a +size 623 diff --git a/checkpoint-500/special_tokens_map.json b/checkpoint-500/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-500/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-500/tokenizer_config.json b/checkpoint-500/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-500/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-500/trainer_state.json b/checkpoint-500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7121d2ffc820ac6c2f59822f25537664d4dfdcb5 --- /dev/null +++ b/checkpoint-500/trainer_state.json @@ -0,0 +1,82 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 18.011666666666667, + "global_step": 500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.8068, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.2996, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.8028, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 6.3976, + "step": 150 + }, + { + "epoch": 7.01, + "learning_rate": 0.0005555555555555556, + "loss": 5.8849, + "step": 200 + }, + { + "epoch": 9.01, + "learning_rate": 0.0005277777777777777, + "loss": 5.515, + "step": 250 + }, + { + "epoch": 11.0, + "learning_rate": 0.0005, + "loss": 5.2445, + "step": 300 + }, + { + "epoch": 12.02, + "learning_rate": 0.00047222222222222224, + "loss": 5.0564, + "step": 350 + }, + { + "epoch": 14.02, + "learning_rate": 0.00044444444444444436, + "loss": 4.9239, + "step": 400 + }, + { + "epoch": 16.02, + "learning_rate": 0.00041666666666666664, + "loss": 4.7766, + "step": 450 + }, + { + "epoch": 18.01, + "learning_rate": 0.00038888888888888887, + "loss": 4.6253, + "step": 500 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.33866179002368e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-500/training_args.bin b/checkpoint-500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3f68d66eaa09f7732c3d230d37ae591e8f32f4f8 --- /dev/null +++ b/checkpoint-500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27836d7e56d49d56f7b1cb7eedcf9f3729d59e5c4fa7930d4fee7671376c78c4 +size 3183 diff --git a/checkpoint-600/config.json b/checkpoint-600/config.json new file mode 100644 index 0000000000000000000000000000000000000000..07a30fecf1535bbf080f023f13d2b580daee85ce --- /dev/null +++ b/checkpoint-600/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 1, + "embd_pdrop": 0.1, + "eos_token_id": 2, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-600/optimizer.pt b/checkpoint-600/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb2e8ef58bd7f1a0abf70a6cdc86d86bd88f2b07 --- /dev/null +++ b/checkpoint-600/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3a9e833e2ce4feca30e87e554f4873c26da62e3212193f4b2fad1d237c125bc +size 995604017 diff --git a/checkpoint-600/pytorch_model.bin b/checkpoint-600/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..429478e9e0203d554c5490986de7072db647e255 --- /dev/null +++ b/checkpoint-600/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e4c1c243b11224e48fc41b11cf33ed00f5b7f8d9fcb7fabed02a1ca468728e6 +size 510396521 diff --git a/checkpoint-600/rng_state.pth b/checkpoint-600/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5a721cf048a128472359bd5ca86e48b61a8b8291 --- /dev/null +++ b/checkpoint-600/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6babf037b3b97e65ccdbd1baa4423573fc566805d5f878ea6a96ae8892d57584 +size 14567 diff --git a/checkpoint-600/scaler.pt b/checkpoint-600/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f95b3e36da01561ec333a83ee8419ad225633e06 --- /dev/null +++ b/checkpoint-600/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e8415b86bbce347c0df306b84a695add049c2a3b2d0b6f4dda3bf036d341150 +size 559 diff --git a/checkpoint-600/scheduler.pt b/checkpoint-600/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..62e5359badd619e7dad2c51d98fa8043d9948f0b --- /dev/null +++ b/checkpoint-600/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:700940432b1c2117248896e2ce5a58d93c051d92ea97707f74d76bf1ef24deee +size 623 diff --git a/checkpoint-600/special_tokens_map.json b/checkpoint-600/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-600/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-600/tokenizer_config.json b/checkpoint-600/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-600/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-600/trainer_state.json b/checkpoint-600/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9f5a4bfcf181d3f24d8e14ae5eacf4a59ac04d58 --- /dev/null +++ b/checkpoint-600/trainer_state.json @@ -0,0 +1,94 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 22.005, + "global_step": 600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.8068, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.2996, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.8028, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 6.3976, + "step": 150 + }, + { + "epoch": 7.01, + "learning_rate": 0.0005555555555555556, + "loss": 5.8849, + "step": 200 + }, + { + "epoch": 9.01, + "learning_rate": 0.0005277777777777777, + "loss": 5.515, + "step": 250 + }, + { + "epoch": 11.0, + "learning_rate": 0.0005, + "loss": 5.2445, + "step": 300 + }, + { + "epoch": 12.02, + "learning_rate": 0.00047222222222222224, + "loss": 5.0564, + "step": 350 + }, + { + "epoch": 14.02, + "learning_rate": 0.00044444444444444436, + "loss": 4.9239, + "step": 400 + }, + { + "epoch": 16.02, + "learning_rate": 0.00041666666666666664, + "loss": 4.7766, + "step": 450 + }, + { + "epoch": 18.01, + "learning_rate": 0.00038888888888888887, + "loss": 4.6253, + "step": 500 + }, + { + "epoch": 20.01, + "learning_rate": 0.0003611111111111111, + "loss": 4.434, + "step": 550 + }, + { + "epoch": 22.0, + "learning_rate": 0.0003333333333333333, + "loss": 4.2692, + "step": 600 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.60641296105472e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-600/training_args.bin b/checkpoint-600/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3f68d66eaa09f7732c3d230d37ae591e8f32f4f8 --- /dev/null +++ b/checkpoint-600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27836d7e56d49d56f7b1cb7eedcf9f3729d59e5c4fa7930d4fee7671376c78c4 +size 3183 diff --git a/checkpoint-700/config.json b/checkpoint-700/config.json new file mode 100644 index 0000000000000000000000000000000000000000..07a30fecf1535bbf080f023f13d2b580daee85ce --- /dev/null +++ b/checkpoint-700/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 1, + "embd_pdrop": 0.1, + "eos_token_id": 2, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-700/optimizer.pt b/checkpoint-700/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..11c23bc43a5daf76c52d4925c4f4bdc12e115db8 --- /dev/null +++ b/checkpoint-700/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48c16c4480ca9620ddd913576a24e498f58b9ab4763e51113f5975eb15c1ce0b +size 995604017 diff --git a/checkpoint-700/pytorch_model.bin b/checkpoint-700/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..b0525c962d54627cd94156c3485b528b0d5e3303 --- /dev/null +++ b/checkpoint-700/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ab5200dd3a3aa1887111c12ef7f52cd6db6569fe4378b4ec7ceee0e53e2190d +size 510396521 diff --git a/checkpoint-700/rng_state.pth b/checkpoint-700/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..3415acf58e468f722beb9ed1db6901bdd8eb1fa0 --- /dev/null +++ b/checkpoint-700/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f6f193570e5d315bab42163a9978ef98c507fc375d16bd681f675a646f4f304 +size 14567 diff --git a/checkpoint-700/scaler.pt b/checkpoint-700/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..12f2b8ec834e54a2bd7cfdd0e07b0c6e125b6490 --- /dev/null +++ b/checkpoint-700/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fb213daf5cce18a5f92167ca14da9df084d907f2b9796efc4666630f312b58c +size 559 diff --git a/checkpoint-700/scheduler.pt b/checkpoint-700/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..42408b6253265af34ae78746144fbba9316e0d7e --- /dev/null +++ b/checkpoint-700/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2d4910fd408e002ebeff50d62bfb043dcae5ef658777d0c3ee4a3bbb515ec15 +size 623 diff --git a/checkpoint-700/special_tokens_map.json b/checkpoint-700/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-700/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-700/tokenizer_config.json b/checkpoint-700/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-700/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-700/trainer_state.json b/checkpoint-700/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..955cca298d65e6e6ea63aa945a43e0e6d54c72c1 --- /dev/null +++ b/checkpoint-700/trainer_state.json @@ -0,0 +1,106 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 25.020833333333332, + "global_step": 700, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.8068, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.2996, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.8028, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 6.3976, + "step": 150 + }, + { + "epoch": 7.01, + "learning_rate": 0.0005555555555555556, + "loss": 5.8849, + "step": 200 + }, + { + "epoch": 9.01, + "learning_rate": 0.0005277777777777777, + "loss": 5.515, + "step": 250 + }, + { + "epoch": 11.0, + "learning_rate": 0.0005, + "loss": 5.2445, + "step": 300 + }, + { + "epoch": 12.02, + "learning_rate": 0.00047222222222222224, + "loss": 5.0564, + "step": 350 + }, + { + "epoch": 14.02, + "learning_rate": 0.00044444444444444436, + "loss": 4.9239, + "step": 400 + }, + { + "epoch": 16.02, + "learning_rate": 0.00041666666666666664, + "loss": 4.7766, + "step": 450 + }, + { + "epoch": 18.01, + "learning_rate": 0.00038888888888888887, + "loss": 4.6253, + "step": 500 + }, + { + "epoch": 20.01, + "learning_rate": 0.0003611111111111111, + "loss": 4.434, + "step": 550 + }, + { + "epoch": 22.0, + "learning_rate": 0.0003333333333333333, + "loss": 4.2692, + "step": 600 + }, + { + "epoch": 24.0, + "learning_rate": 0.00030555555555555555, + "loss": 4.1342, + "step": 650 + }, + { + "epoch": 25.02, + "learning_rate": 0.0002777777777777778, + "loss": 4.0185, + "step": 700 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.87411709952e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-700/training_args.bin b/checkpoint-700/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3f68d66eaa09f7732c3d230d37ae591e8f32f4f8 --- /dev/null +++ b/checkpoint-700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27836d7e56d49d56f7b1cb7eedcf9f3729d59e5c4fa7930d4fee7671376c78c4 +size 3183 diff --git a/checkpoint-800/config.json b/checkpoint-800/config.json new file mode 100644 index 0000000000000000000000000000000000000000..07a30fecf1535bbf080f023f13d2b580daee85ce --- /dev/null +++ b/checkpoint-800/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 1, + "embd_pdrop": 0.1, + "eos_token_id": 2, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-800/optimizer.pt b/checkpoint-800/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..770053267534309e36f4b6c2346df92729230678 --- /dev/null +++ b/checkpoint-800/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b36cce55091e14f4b56c29463c3068789e5732777886410fa3a569ef06a66c1 +size 995604017 diff --git a/checkpoint-800/pytorch_model.bin b/checkpoint-800/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..ef62cec882b85e1882b261bdea00674d043ddc94 --- /dev/null +++ b/checkpoint-800/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:001494b57e20bb37ef0a4cb8c2eb44c5cafef9d026b6cc192a4e681ae7f3e99a +size 510396521 diff --git a/checkpoint-800/rng_state.pth b/checkpoint-800/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..79af6d42398111f18e559a29f9b4ed53fcdeb22b --- /dev/null +++ b/checkpoint-800/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ffdd503bfa20dba8c55556f9cfce46265e07e9364944ce32a84813f425448db +size 14567 diff --git a/checkpoint-800/scaler.pt b/checkpoint-800/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b3c73372264156b02df8dada2192ee3c96dd5fc4 --- /dev/null +++ b/checkpoint-800/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c2074cdcefbaa0a39f736d6b0f7bf018c350d49e85648bc8accc4f756ad816e +size 559 diff --git a/checkpoint-800/scheduler.pt b/checkpoint-800/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..75713dbce3771306ca00343ecc497c4f19a01d03 --- /dev/null +++ b/checkpoint-800/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b27fb255c84833fb6ab5d93679cb236a569de9a1c4f805f72a2f60a2bc7c7499 +size 623 diff --git a/checkpoint-800/special_tokens_map.json b/checkpoint-800/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-800/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-800/tokenizer_config.json b/checkpoint-800/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-800/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-800/trainer_state.json b/checkpoint-800/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a9bfcac809c76f8da53491c73e3ef35183fea345 --- /dev/null +++ b/checkpoint-800/trainer_state.json @@ -0,0 +1,118 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 29.014166666666668, + "global_step": 800, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.8068, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.2996, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.8028, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 6.3976, + "step": 150 + }, + { + "epoch": 7.01, + "learning_rate": 0.0005555555555555556, + "loss": 5.8849, + "step": 200 + }, + { + "epoch": 9.01, + "learning_rate": 0.0005277777777777777, + "loss": 5.515, + "step": 250 + }, + { + "epoch": 11.0, + "learning_rate": 0.0005, + "loss": 5.2445, + "step": 300 + }, + { + "epoch": 12.02, + "learning_rate": 0.00047222222222222224, + "loss": 5.0564, + "step": 350 + }, + { + "epoch": 14.02, + "learning_rate": 0.00044444444444444436, + "loss": 4.9239, + "step": 400 + }, + { + "epoch": 16.02, + "learning_rate": 0.00041666666666666664, + "loss": 4.7766, + "step": 450 + }, + { + "epoch": 18.01, + "learning_rate": 0.00038888888888888887, + "loss": 4.6253, + "step": 500 + }, + { + "epoch": 20.01, + "learning_rate": 0.0003611111111111111, + "loss": 4.434, + "step": 550 + }, + { + "epoch": 22.0, + "learning_rate": 0.0003333333333333333, + "loss": 4.2692, + "step": 600 + }, + { + "epoch": 24.0, + "learning_rate": 0.00030555555555555555, + "loss": 4.1342, + "step": 650 + }, + { + "epoch": 25.02, + "learning_rate": 0.0002777777777777778, + "loss": 4.0185, + "step": 700 + }, + { + "epoch": 27.02, + "learning_rate": 0.00025, + "loss": 3.9197, + "step": 750 + }, + { + "epoch": 29.01, + "learning_rate": 0.00022222222222222218, + "loss": 3.829, + "step": 800 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.14186827055104e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-800/training_args.bin b/checkpoint-800/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3f68d66eaa09f7732c3d230d37ae591e8f32f4f8 --- /dev/null +++ b/checkpoint-800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27836d7e56d49d56f7b1cb7eedcf9f3729d59e5c4fa7930d4fee7671376c78c4 +size 3183 diff --git a/checkpoint-900/config.json b/checkpoint-900/config.json new file mode 100644 index 0000000000000000000000000000000000000000..07a30fecf1535bbf080f023f13d2b580daee85ce --- /dev/null +++ b/checkpoint-900/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 1, + "embd_pdrop": 0.1, + "eos_token_id": 2, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-900/optimizer.pt b/checkpoint-900/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1da6ee88c78b4eb633391f11ef4ec79b4246e6fc --- /dev/null +++ b/checkpoint-900/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f15385b52be0e3733507b01750aba83cbb04a1a7f053d05e0b521d861f451ea4 +size 995604017 diff --git a/checkpoint-900/pytorch_model.bin b/checkpoint-900/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..fc07b227cec86e2099c92a4f8f7c73d0847fa3df --- /dev/null +++ b/checkpoint-900/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82a7d4a69e69b51d9ef0dd508b2ad2a24b31ce650a9de071ea6f508bce1a3b53 +size 510396521 diff --git a/checkpoint-900/rng_state.pth b/checkpoint-900/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..eb8820c2fe81dc8c803b0d1d99ead82728c3d7d3 --- /dev/null +++ b/checkpoint-900/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b1684f89bea913d480df2923bd8c69dcc3ebbeadfbbcc57277e93fd96336295 +size 14567 diff --git a/checkpoint-900/scaler.pt b/checkpoint-900/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..be54cb13c777bc6feccb478ff218e7e21fad482a --- /dev/null +++ b/checkpoint-900/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8695f57df923e22b943b0b0f2b9cc7007008e80b53ccee275b3a35963fe67e9 +size 559 diff --git a/checkpoint-900/scheduler.pt b/checkpoint-900/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..539d7c83ea252818dc9cbffac08cf340bb05a454 --- /dev/null +++ b/checkpoint-900/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2cb09e5db72772a15094286e93cbb61d745d9b63863703cf53da0bcb9827821 +size 623 diff --git a/checkpoint-900/special_tokens_map.json b/checkpoint-900/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-900/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-900/tokenizer_config.json b/checkpoint-900/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-900/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-900/trainer_state.json b/checkpoint-900/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0137937175865cc05fee118733dc0a58e634df0c --- /dev/null +++ b/checkpoint-900/trainer_state.json @@ -0,0 +1,130 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 33.0075, + "global_step": 900, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.8068, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.2996, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.8028, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 6.3976, + "step": 150 + }, + { + "epoch": 7.01, + "learning_rate": 0.0005555555555555556, + "loss": 5.8849, + "step": 200 + }, + { + "epoch": 9.01, + "learning_rate": 0.0005277777777777777, + "loss": 5.515, + "step": 250 + }, + { + "epoch": 11.0, + "learning_rate": 0.0005, + "loss": 5.2445, + "step": 300 + }, + { + "epoch": 12.02, + "learning_rate": 0.00047222222222222224, + "loss": 5.0564, + "step": 350 + }, + { + "epoch": 14.02, + "learning_rate": 0.00044444444444444436, + "loss": 4.9239, + "step": 400 + }, + { + "epoch": 16.02, + "learning_rate": 0.00041666666666666664, + "loss": 4.7766, + "step": 450 + }, + { + "epoch": 18.01, + "learning_rate": 0.00038888888888888887, + "loss": 4.6253, + "step": 500 + }, + { + "epoch": 20.01, + "learning_rate": 0.0003611111111111111, + "loss": 4.434, + "step": 550 + }, + { + "epoch": 22.0, + "learning_rate": 0.0003333333333333333, + "loss": 4.2692, + "step": 600 + }, + { + "epoch": 24.0, + "learning_rate": 0.00030555555555555555, + "loss": 4.1342, + "step": 650 + }, + { + "epoch": 25.02, + "learning_rate": 0.0002777777777777778, + "loss": 4.0185, + "step": 700 + }, + { + "epoch": 27.02, + "learning_rate": 0.00025, + "loss": 3.9197, + "step": 750 + }, + { + "epoch": 29.01, + "learning_rate": 0.00022222222222222218, + "loss": 3.829, + "step": 800 + }, + { + "epoch": 31.01, + "learning_rate": 0.00019444444444444443, + "loss": 3.7522, + "step": 850 + }, + { + "epoch": 33.01, + "learning_rate": 0.00016666666666666666, + "loss": 3.6848, + "step": 900 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.40961944158208e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-900/training_args.bin b/checkpoint-900/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3f68d66eaa09f7732c3d230d37ae591e8f32f4f8 --- /dev/null +++ b/checkpoint-900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27836d7e56d49d56f7b1cb7eedcf9f3729d59e5c4fa7930d4fee7671376c78c4 +size 3183 diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..07a30fecf1535bbf080f023f13d2b580daee85ce --- /dev/null +++ b/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 1, + "embd_pdrop": 0.1, + "eos_token_id": 2, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/metrics.json b/metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..d6486e28c1fdb991d80362f010652eb4ea6f6ac6 --- /dev/null +++ b/metrics.json @@ -0,0 +1,2505 @@ +{"num_parameters": 124439808, "trainable_parameters": 124439808, "step": 0} +{"train_info/time_between_train_steps": 3.508838176727295, "step": 0} +{"info/global_step": 1, "train_info/time_within_train_step": 28.55928373336792, "step": 1} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 19761.71484375, "train_info/memory_reserved": 22624.0, "train_info/memory_max_reserved": 22624.0, "_timestamp": 1733954301, "_runtime": 36}, "step": 1} +{"logs": {"train/loss": 10.8068, "train/learning_rate": 4.9999999999999996e-06, "train/epoch": 0.0, "_timestamp": 1733954301, "_runtime": 36}, "step": 1} +{"train_info/time_between_train_steps": 0.009075403213500977, "step": 1} +{"info/global_step": 2, "train_info/time_within_train_step": 27.99460244178772, "step": 2} +{"train_info/time_between_train_steps": 0.006182432174682617, "step": 2} +{"info/global_step": 3, "train_info/time_within_train_step": 27.89779782295227, "step": 3} +{"train_info/time_between_train_steps": 0.011397361755371094, "step": 3} +{"info/global_step": 4, "train_info/time_within_train_step": 28.204126834869385, "step": 4} +{"train_info/time_between_train_steps": 0.006030559539794922, "step": 4} +{"info/global_step": 5, "train_info/time_within_train_step": 27.839965343475342, "step": 5} +{"train_info/time_between_train_steps": 0.006130218505859375, "step": 5} +{"info/global_step": 6, "train_info/time_within_train_step": 28.065385341644287, "step": 6} +{"train_info/time_between_train_steps": 0.006512641906738281, "step": 6} +{"info/global_step": 7, "train_info/time_within_train_step": 27.946561336517334, "step": 7} +{"train_info/time_between_train_steps": 0.006088972091674805, "step": 7} +{"info/global_step": 8, "train_info/time_within_train_step": 27.875859260559082, "step": 8} +{"train_info/time_between_train_steps": 0.006495475769042969, "step": 8} +{"info/global_step": 9, "train_info/time_within_train_step": 27.84752345085144, "step": 9} +{"train_info/time_between_train_steps": 0.0056035518646240234, "step": 9} +{"info/global_step": 10, "train_info/time_within_train_step": 27.836111068725586, "step": 10} +{"train_info/time_between_train_steps": 0.005600929260253906, "step": 10} +{"info/global_step": 11, "train_info/time_within_train_step": 28.005741596221924, "step": 11} +{"train_info/time_between_train_steps": 0.006129741668701172, "step": 11} +{"info/global_step": 12, "train_info/time_within_train_step": 27.895675897598267, "step": 12} +{"train_info/time_between_train_steps": 0.0063686370849609375, "step": 12} +{"info/global_step": 13, "train_info/time_within_train_step": 27.885700941085815, "step": 13} +{"train_info/time_between_train_steps": 0.005423545837402344, "step": 13} +{"info/global_step": 14, "train_info/time_within_train_step": 28.0073881149292, "step": 14} +{"train_info/time_between_train_steps": 0.005730152130126953, "step": 14} +{"info/global_step": 15, "train_info/time_within_train_step": 27.91494607925415, "step": 15} +{"train_info/time_between_train_steps": 0.013706445693969727, "step": 15} +{"info/global_step": 16, "train_info/time_within_train_step": 28.011351346969604, "step": 16} +{"train_info/time_between_train_steps": 0.005586147308349609, "step": 16} +{"info/global_step": 17, "train_info/time_within_train_step": 27.98137331008911, "step": 17} +{"train_info/time_between_train_steps": 0.010582923889160156, "step": 17} +{"info/global_step": 18, "train_info/time_within_train_step": 27.937659978866577, "step": 18} +{"train_info/time_between_train_steps": 0.0056171417236328125, "step": 18} +{"info/global_step": 19, "train_info/time_within_train_step": 27.9910786151886, "step": 19} +{"train_info/time_between_train_steps": 0.005629062652587891, "step": 19} +{"info/global_step": 20, "train_info/time_within_train_step": 27.971011877059937, "step": 20} +{"train_info/time_between_train_steps": 0.0054814815521240234, "step": 20} +{"info/global_step": 21, "train_info/time_within_train_step": 27.995178937911987, "step": 21} +{"train_info/time_between_train_steps": 0.011135578155517578, "step": 21} +{"info/global_step": 22, "train_info/time_within_train_step": 27.938483953475952, "step": 22} +{"train_info/time_between_train_steps": 0.0054454803466796875, "step": 22} +{"info/global_step": 23, "train_info/time_within_train_step": 27.89670491218567, "step": 23} +{"train_info/time_between_train_steps": 0.005716562271118164, "step": 23} +{"info/global_step": 24, "train_info/time_within_train_step": 27.98666763305664, "step": 24} +{"train_info/time_between_train_steps": 0.005606174468994141, "step": 24} +{"info/global_step": 25, "train_info/time_within_train_step": 27.86735224723816, "step": 25} +{"train_info/time_between_train_steps": 0.011655092239379883, "step": 25} +{"info/global_step": 26, "train_info/time_within_train_step": 27.86562991142273, "step": 26} +{"train_info/time_between_train_steps": 0.0058634281158447266, "step": 26} +{"info/global_step": 27, "train_info/time_within_train_step": 27.91300916671753, "step": 27} +{"train_info/time_between_train_steps": 0.012016057968139648, "step": 27} +{"train_info/time_between_train_steps": 3.8537540435791016, "step": 27} +{"info/global_step": 28, "train_info/time_within_train_step": 27.962403535842896, "step": 28} +{"train_info/time_between_train_steps": 0.005347490310668945, "step": 28} +{"info/global_step": 29, "train_info/time_within_train_step": 28.01380205154419, "step": 29} +{"train_info/time_between_train_steps": 0.005420207977294922, "step": 29} +{"info/global_step": 30, "train_info/time_within_train_step": 27.88935112953186, "step": 30} +{"train_info/time_between_train_steps": 0.012287139892578125, "step": 30} +{"info/global_step": 31, "train_info/time_within_train_step": 28.67003631591797, "step": 31} +{"train_info/time_between_train_steps": 0.006051778793334961, "step": 31} +{"info/global_step": 32, "train_info/time_within_train_step": 28.10484504699707, "step": 32} +{"train_info/time_between_train_steps": 0.0103759765625, "step": 32} +{"info/global_step": 33, "train_info/time_within_train_step": 27.957700490951538, "step": 33} +{"train_info/time_between_train_steps": 0.005827188491821289, "step": 33} +{"info/global_step": 34, "train_info/time_within_train_step": 27.98202610015869, "step": 34} +{"train_info/time_between_train_steps": 0.011621236801147461, "step": 34} +{"info/global_step": 35, "train_info/time_within_train_step": 27.9176242351532, "step": 35} +{"train_info/time_between_train_steps": 0.005342721939086914, "step": 35} +{"info/global_step": 36, "train_info/time_within_train_step": 27.897169589996338, "step": 36} +{"train_info/time_between_train_steps": 0.010747671127319336, "step": 36} +{"info/global_step": 37, "train_info/time_within_train_step": 27.84573459625244, "step": 37} +{"train_info/time_between_train_steps": 0.015262365341186523, "step": 37} +{"info/global_step": 38, "train_info/time_within_train_step": 27.86620306968689, "step": 38} +{"train_info/time_between_train_steps": 0.005425214767456055, "step": 38} +{"info/global_step": 39, "train_info/time_within_train_step": 27.95534348487854, "step": 39} +{"train_info/time_between_train_steps": 0.005811452865600586, "step": 39} +{"info/global_step": 40, "train_info/time_within_train_step": 27.94045877456665, "step": 40} +{"train_info/time_between_train_steps": 0.005899190902709961, "step": 40} +{"info/global_step": 41, "train_info/time_within_train_step": 27.88602089881897, "step": 41} +{"train_info/time_between_train_steps": 0.005665302276611328, "step": 41} +{"info/global_step": 42, "train_info/time_within_train_step": 28.032469272613525, "step": 42} +{"train_info/time_between_train_steps": 0.013565301895141602, "step": 42} +{"info/global_step": 43, "train_info/time_within_train_step": 28.03098487854004, "step": 43} +{"train_info/time_between_train_steps": 0.014729976654052734, "step": 43} +{"info/global_step": 44, "train_info/time_within_train_step": 27.839093446731567, "step": 44} +{"train_info/time_between_train_steps": 0.005857706069946289, "step": 44} +{"info/global_step": 45, "train_info/time_within_train_step": 28.034380674362183, "step": 45} +{"train_info/time_between_train_steps": 0.005865812301635742, "step": 45} +{"info/global_step": 46, "train_info/time_within_train_step": 27.9787335395813, "step": 46} +{"train_info/time_between_train_steps": 0.005568265914916992, "step": 46} +{"info/global_step": 47, "train_info/time_within_train_step": 27.923033952713013, "step": 47} +{"train_info/time_between_train_steps": 0.012319564819335938, "step": 47} +{"info/global_step": 48, "train_info/time_within_train_step": 27.874696969985962, "step": 48} +{"train_info/time_between_train_steps": 0.005723714828491211, "step": 48} +{"info/global_step": 49, "train_info/time_within_train_step": 27.947636127471924, "step": 49} +{"train_info/time_between_train_steps": 0.01150655746459961, "step": 49} +{"info/global_step": 50, "train_info/time_within_train_step": 27.899695873260498, "step": 50} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1733955675, "_runtime": 1410}, "step": 50} +{"logs": {"train/loss": 8.2996, "train/learning_rate": 0.00025, "train/epoch": 1.02, "_timestamp": 1733955675, "_runtime": 1410}, "step": 50} +{"train_info/time_between_train_steps": 0.008099794387817383, "step": 50} +{"info/global_step": 51, "train_info/time_within_train_step": 28.045909881591797, "step": 51} +{"train_info/time_between_train_steps": 0.011338472366333008, "step": 51} +{"info/global_step": 52, "train_info/time_within_train_step": 28.04581308364868, "step": 52} +{"train_info/time_between_train_steps": 0.00713801383972168, "step": 52} +{"info/global_step": 53, "train_info/time_within_train_step": 27.937244653701782, "step": 53} +{"train_info/time_between_train_steps": 0.0065610408782958984, "step": 53} +{"info/global_step": 54, "train_info/time_within_train_step": 28.057661294937134, "step": 54} +{"train_info/time_between_train_steps": 0.011970043182373047, "step": 54} +{"train_info/time_between_train_steps": 3.794530153274536, "step": 54} +{"info/global_step": 55, "train_info/time_within_train_step": 27.87522530555725, "step": 55} +{"train_info/time_between_train_steps": 0.012275218963623047, "step": 55} +{"info/global_step": 56, "train_info/time_within_train_step": 28.109099864959717, "step": 56} +{"train_info/time_between_train_steps": 0.005912065505981445, "step": 56} +{"info/global_step": 57, "train_info/time_within_train_step": 27.89018678665161, "step": 57} +{"train_info/time_between_train_steps": 0.005889415740966797, "step": 57} +{"info/global_step": 58, "train_info/time_within_train_step": 28.079800367355347, "step": 58} +{"train_info/time_between_train_steps": 0.0058176517486572266, "step": 58} +{"info/global_step": 59, "train_info/time_within_train_step": 27.876500844955444, "step": 59} +{"train_info/time_between_train_steps": 0.0058825016021728516, "step": 59} +{"info/global_step": 60, "train_info/time_within_train_step": 28.07819128036499, "step": 60} +{"train_info/time_between_train_steps": 0.005822896957397461, "step": 60} +{"info/global_step": 61, "train_info/time_within_train_step": 27.8620765209198, "step": 61} +{"train_info/time_between_train_steps": 0.011970281600952148, "step": 61} +{"info/global_step": 62, "train_info/time_within_train_step": 27.892886638641357, "step": 62} +{"train_info/time_between_train_steps": 0.011446237564086914, "step": 62} +{"info/global_step": 63, "train_info/time_within_train_step": 28.063427925109863, "step": 63} +{"train_info/time_between_train_steps": 0.014759540557861328, "step": 63} +{"info/global_step": 64, "train_info/time_within_train_step": 27.85015344619751, "step": 64} +{"train_info/time_between_train_steps": 0.005630016326904297, "step": 64} +{"info/global_step": 65, "train_info/time_within_train_step": 27.845190048217773, "step": 65} +{"train_info/time_between_train_steps": 0.014366865158081055, "step": 65} +{"info/global_step": 66, "train_info/time_within_train_step": 27.875890016555786, "step": 66} +{"train_info/time_between_train_steps": 0.0056018829345703125, "step": 66} +{"info/global_step": 67, "train_info/time_within_train_step": 27.864030599594116, "step": 67} +{"train_info/time_between_train_steps": 0.015779733657836914, "step": 67} +{"info/global_step": 68, "train_info/time_within_train_step": 27.903523445129395, "step": 68} +{"train_info/time_between_train_steps": 0.005719900131225586, "step": 68} +{"info/global_step": 69, "train_info/time_within_train_step": 27.91453742980957, "step": 69} +{"train_info/time_between_train_steps": 0.010756731033325195, "step": 69} +{"info/global_step": 70, "train_info/time_within_train_step": 27.96153235435486, "step": 70} +{"train_info/time_between_train_steps": 0.011237859725952148, "step": 70} +{"info/global_step": 71, "train_info/time_within_train_step": 27.798466444015503, "step": 71} +{"train_info/time_between_train_steps": 0.005590677261352539, "step": 71} +{"info/global_step": 72, "train_info/time_within_train_step": 27.886526584625244, "step": 72} +{"train_info/time_between_train_steps": 0.005584239959716797, "step": 72} +{"info/global_step": 73, "train_info/time_within_train_step": 27.954452753067017, "step": 73} +{"train_info/time_between_train_steps": 0.010817766189575195, "step": 73} +{"info/global_step": 74, "train_info/time_within_train_step": 28.066631317138672, "step": 74} +{"train_info/time_between_train_steps": 0.010599136352539062, "step": 74} +{"info/global_step": 75, "train_info/time_within_train_step": 28.08057689666748, "step": 75} +{"train_info/time_between_train_steps": 0.012719392776489258, "step": 75} +{"info/global_step": 76, "train_info/time_within_train_step": 28.144665479660034, "step": 76} +{"train_info/time_between_train_steps": 0.006592512130737305, "step": 76} +{"info/global_step": 77, "train_info/time_within_train_step": 27.8506019115448, "step": 77} +{"train_info/time_between_train_steps": 0.005613803863525391, "step": 77} +{"info/global_step": 78, "train_info/time_within_train_step": 28.058496713638306, "step": 78} +{"train_info/time_between_train_steps": 0.005834102630615234, "step": 78} +{"info/global_step": 79, "train_info/time_within_train_step": 28.120331048965454, "step": 79} +{"train_info/time_between_train_steps": 0.0110015869140625, "step": 79} +{"info/global_step": 80, "train_info/time_within_train_step": 27.960925102233887, "step": 80} +{"train_info/time_between_train_steps": 0.011686325073242188, "step": 80} +{"info/global_step": 81, "train_info/time_within_train_step": 27.939178466796875, "step": 81} +{"train_info/time_between_train_steps": 0.006916046142578125, "step": 81} +{"train_info/time_between_train_steps": 4.115017652511597, "step": 81} +{"info/global_step": 82, "train_info/time_within_train_step": 27.957664012908936, "step": 82} +{"train_info/time_between_train_steps": 0.005785226821899414, "step": 82} +{"info/global_step": 83, "train_info/time_within_train_step": 28.123077869415283, "step": 83} +{"train_info/time_between_train_steps": 0.010660409927368164, "step": 83} +{"info/global_step": 84, "train_info/time_within_train_step": 28.005258798599243, "step": 84} +{"train_info/time_between_train_steps": 0.01239633560180664, "step": 84} +{"info/global_step": 85, "train_info/time_within_train_step": 28.148447275161743, "step": 85} +{"train_info/time_between_train_steps": 0.0056798458099365234, "step": 85} +{"info/global_step": 86, "train_info/time_within_train_step": 28.046574115753174, "step": 86} +{"train_info/time_between_train_steps": 0.006151914596557617, "step": 86} +{"info/global_step": 87, "train_info/time_within_train_step": 28.114786386489868, "step": 87} +{"train_info/time_between_train_steps": 0.005740642547607422, "step": 87} +{"info/global_step": 88, "train_info/time_within_train_step": 27.88627004623413, "step": 88} +{"train_info/time_between_train_steps": 0.021607160568237305, "step": 88} +{"info/global_step": 89, "train_info/time_within_train_step": 28.05929708480835, "step": 89} +{"train_info/time_between_train_steps": 0.005521535873413086, "step": 89} +{"info/global_step": 90, "train_info/time_within_train_step": 28.0609347820282, "step": 90} +{"train_info/time_between_train_steps": 0.0056231021881103516, "step": 90} +{"info/global_step": 91, "train_info/time_within_train_step": 27.930282831192017, "step": 91} +{"train_info/time_between_train_steps": 0.006058454513549805, "step": 91} +{"info/global_step": 92, "train_info/time_within_train_step": 28.018211841583252, "step": 92} +{"train_info/time_between_train_steps": 0.006604433059692383, "step": 92} +{"info/global_step": 93, "train_info/time_within_train_step": 27.94203782081604, "step": 93} +{"train_info/time_between_train_steps": 0.01226353645324707, "step": 93} +{"info/global_step": 94, "train_info/time_within_train_step": 28.11109232902527, "step": 94} +{"train_info/time_between_train_steps": 0.005490779876708984, "step": 94} +{"info/global_step": 95, "train_info/time_within_train_step": 27.960000038146973, "step": 95} +{"train_info/time_between_train_steps": 0.010790109634399414, "step": 95} +{"info/global_step": 96, "train_info/time_within_train_step": 28.01544189453125, "step": 96} +{"train_info/time_between_train_steps": 0.007019996643066406, "step": 96} +{"info/global_step": 97, "train_info/time_within_train_step": 28.004281044006348, "step": 97} +{"train_info/time_between_train_steps": 0.015547513961791992, "step": 97} +{"info/global_step": 98, "train_info/time_within_train_step": 28.030120372772217, "step": 98} +{"train_info/time_between_train_steps": 0.006019115447998047, "step": 98} +{"info/global_step": 99, "train_info/time_within_train_step": 27.905784368515015, "step": 99} +{"train_info/time_between_train_steps": 0.005948066711425781, "step": 99} +{"info/global_step": 100, "train_info/time_within_train_step": 28.0479896068573, "step": 100} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1733957083, "_runtime": 2818}, "step": 100} +{"logs": {"train/loss": 6.8028, "train/learning_rate": 0.0005, "train/epoch": 3.02, "_timestamp": 1733957083, "_runtime": 2818}, "step": 100} +{"train_info/time_between_train_steps": 2.4736313819885254, "step": 100} +{"info/global_step": 101, "train_info/time_within_train_step": 28.035807609558105, "step": 101} +{"train_info/time_between_train_steps": 0.0057849884033203125, "step": 101} +{"info/global_step": 102, "train_info/time_within_train_step": 27.84500026702881, "step": 102} +{"train_info/time_between_train_steps": 0.006061553955078125, "step": 102} +{"info/global_step": 103, "train_info/time_within_train_step": 27.91487455368042, "step": 103} +{"train_info/time_between_train_steps": 0.005803108215332031, "step": 103} +{"info/global_step": 104, "train_info/time_within_train_step": 27.893033504486084, "step": 104} +{"train_info/time_between_train_steps": 0.00581812858581543, "step": 104} +{"info/global_step": 105, "train_info/time_within_train_step": 27.95469570159912, "step": 105} +{"train_info/time_between_train_steps": 0.006096839904785156, "step": 105} +{"info/global_step": 106, "train_info/time_within_train_step": 27.91819405555725, "step": 106} +{"train_info/time_between_train_steps": 0.011598825454711914, "step": 106} +{"info/global_step": 107, "train_info/time_within_train_step": 27.83656644821167, "step": 107} +{"train_info/time_between_train_steps": 0.01107645034790039, "step": 107} +{"info/global_step": 108, "train_info/time_within_train_step": 28.102983713150024, "step": 108} +{"train_info/time_between_train_steps": 0.015357494354248047, "step": 108} +{"train_info/time_between_train_steps": 4.174447298049927, "step": 108} +{"info/global_step": 109, "train_info/time_within_train_step": 28.059093236923218, "step": 109} +{"train_info/time_between_train_steps": 0.005585432052612305, "step": 109} +{"info/global_step": 110, "train_info/time_within_train_step": 28.226717948913574, "step": 110} +{"train_info/time_between_train_steps": 0.00585484504699707, "step": 110} +{"info/global_step": 111, "train_info/time_within_train_step": 28.014111042022705, "step": 111} +{"train_info/time_between_train_steps": 0.006159067153930664, "step": 111} +{"info/global_step": 112, "train_info/time_within_train_step": 28.18371272087097, "step": 112} +{"train_info/time_between_train_steps": 0.012749195098876953, "step": 112} +{"info/global_step": 113, "train_info/time_within_train_step": 28.07880187034607, "step": 113} +{"train_info/time_between_train_steps": 0.006265878677368164, "step": 113} +{"info/global_step": 114, "train_info/time_within_train_step": 28.251726627349854, "step": 114} +{"train_info/time_between_train_steps": 0.005575895309448242, "step": 114} +{"info/global_step": 115, "train_info/time_within_train_step": 27.963443279266357, "step": 115} +{"train_info/time_between_train_steps": 0.005715131759643555, "step": 115} +{"info/global_step": 116, "train_info/time_within_train_step": 27.995994329452515, "step": 116} +{"train_info/time_between_train_steps": 0.014447212219238281, "step": 116} +{"info/global_step": 117, "train_info/time_within_train_step": 27.89358425140381, "step": 117} +{"train_info/time_between_train_steps": 0.012873649597167969, "step": 117} +{"info/global_step": 118, "train_info/time_within_train_step": 27.790684700012207, "step": 118} +{"train_info/time_between_train_steps": 0.0055468082427978516, "step": 118} +{"info/global_step": 119, "train_info/time_within_train_step": 27.98432493209839, "step": 119} +{"train_info/time_between_train_steps": 0.005605220794677734, "step": 119} +{"info/global_step": 120, "train_info/time_within_train_step": 27.94951605796814, "step": 120} +{"train_info/time_between_train_steps": 0.013458013534545898, "step": 120} +{"info/global_step": 121, "train_info/time_within_train_step": 27.950600147247314, "step": 121} +{"train_info/time_between_train_steps": 0.010467052459716797, "step": 121} +{"info/global_step": 122, "train_info/time_within_train_step": 27.933224201202393, "step": 122} +{"train_info/time_between_train_steps": 0.00554656982421875, "step": 122} +{"info/global_step": 123, "train_info/time_within_train_step": 27.993422031402588, "step": 123} +{"train_info/time_between_train_steps": 0.005569934844970703, "step": 123} +{"info/global_step": 124, "train_info/time_within_train_step": 27.857927560806274, "step": 124} +{"train_info/time_between_train_steps": 0.010528564453125, "step": 124} +{"info/global_step": 125, "train_info/time_within_train_step": 28.049411058425903, "step": 125} +{"train_info/time_between_train_steps": 0.005593299865722656, "step": 125} +{"info/global_step": 126, "train_info/time_within_train_step": 28.029024362564087, "step": 126} +{"train_info/time_between_train_steps": 0.005810976028442383, "step": 126} +{"info/global_step": 127, "train_info/time_within_train_step": 27.99120044708252, "step": 127} +{"train_info/time_between_train_steps": 0.013593196868896484, "step": 127} +{"info/global_step": 128, "train_info/time_within_train_step": 27.985567092895508, "step": 128} +{"train_info/time_between_train_steps": 0.005504608154296875, "step": 128} +{"info/global_step": 129, "train_info/time_within_train_step": 27.97547173500061, "step": 129} +{"train_info/time_between_train_steps": 0.005646944046020508, "step": 129} +{"info/global_step": 130, "train_info/time_within_train_step": 27.995630741119385, "step": 130} +{"train_info/time_between_train_steps": 0.005662679672241211, "step": 130} +{"info/global_step": 131, "train_info/time_within_train_step": 27.9442241191864, "step": 131} +{"train_info/time_between_train_steps": 0.005745649337768555, "step": 131} +{"info/global_step": 132, "train_info/time_within_train_step": 27.918048858642578, "step": 132} +{"train_info/time_between_train_steps": 0.0057332515716552734, "step": 132} +{"info/global_step": 133, "train_info/time_within_train_step": 27.93404746055603, "step": 133} +{"train_info/time_between_train_steps": 0.0058956146240234375, "step": 133} +{"info/global_step": 134, "train_info/time_within_train_step": 28.061803579330444, "step": 134} +{"train_info/time_between_train_steps": 0.015772581100463867, "step": 134} +{"info/global_step": 135, "train_info/time_within_train_step": 27.982199907302856, "step": 135} +{"train_info/time_between_train_steps": 0.006630897521972656, "step": 135} +{"train_info/time_between_train_steps": 3.7657830715179443, "step": 135} +{"info/global_step": 136, "train_info/time_within_train_step": 27.99149203300476, "step": 136} +{"train_info/time_between_train_steps": 0.005848407745361328, "step": 136} +{"info/global_step": 137, "train_info/time_within_train_step": 28.047096967697144, "step": 137} +{"train_info/time_between_train_steps": 0.0060236454010009766, "step": 137} +{"info/global_step": 138, "train_info/time_within_train_step": 27.880533933639526, "step": 138} +{"train_info/time_between_train_steps": 0.005494832992553711, "step": 138} +{"info/global_step": 139, "train_info/time_within_train_step": 28.145422220230103, "step": 139} +{"train_info/time_between_train_steps": 0.010824918746948242, "step": 139} +{"info/global_step": 140, "train_info/time_within_train_step": 27.981679439544678, "step": 140} +{"train_info/time_between_train_steps": 0.01574254035949707, "step": 140} +{"info/global_step": 141, "train_info/time_within_train_step": 28.201175928115845, "step": 141} +{"train_info/time_between_train_steps": 0.005709409713745117, "step": 141} +{"info/global_step": 142, "train_info/time_within_train_step": 27.868627071380615, "step": 142} +{"train_info/time_between_train_steps": 0.0060100555419921875, "step": 142} +{"info/global_step": 143, "train_info/time_within_train_step": 28.007177591323853, "step": 143} +{"train_info/time_between_train_steps": 0.005323171615600586, "step": 143} +{"info/global_step": 144, "train_info/time_within_train_step": 27.967544078826904, "step": 144} +{"train_info/time_between_train_steps": 0.013657093048095703, "step": 144} +{"info/global_step": 145, "train_info/time_within_train_step": 27.97971749305725, "step": 145} +{"train_info/time_between_train_steps": 0.005673408508300781, "step": 145} +{"info/global_step": 146, "train_info/time_within_train_step": 27.836219310760498, "step": 146} +{"train_info/time_between_train_steps": 0.005682229995727539, "step": 146} +{"info/global_step": 147, "train_info/time_within_train_step": 27.88535761833191, "step": 147} +{"train_info/time_between_train_steps": 0.005524396896362305, "step": 147} +{"info/global_step": 148, "train_info/time_within_train_step": 27.874494791030884, "step": 148} +{"train_info/time_between_train_steps": 0.005750417709350586, "step": 148} +{"info/global_step": 149, "train_info/time_within_train_step": 27.877017736434937, "step": 149} +{"train_info/time_between_train_steps": 0.0055463314056396484, "step": 149} +{"info/global_step": 150, "train_info/time_within_train_step": 28.046834707260132, "step": 150} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1733958493, "_runtime": 4228}, "step": 150} +{"logs": {"train/loss": 6.3976, "train/learning_rate": 0.0005833333333333333, "train/epoch": 5.01, "_timestamp": 1733958493, "_runtime": 4228}, "step": 150} +{"train_info/time_between_train_steps": 0.012418031692504883, "step": 150} +{"info/global_step": 151, "train_info/time_within_train_step": 27.987187385559082, "step": 151} +{"train_info/time_between_train_steps": 0.009532451629638672, "step": 151} +{"info/global_step": 152, "train_info/time_within_train_step": 27.985968828201294, "step": 152} +{"train_info/time_between_train_steps": 0.005633354187011719, "step": 152} +{"info/global_step": 153, "train_info/time_within_train_step": 27.93672275543213, "step": 153} +{"train_info/time_between_train_steps": 0.00564265251159668, "step": 153} +{"info/global_step": 154, "train_info/time_within_train_step": 27.937215328216553, "step": 154} +{"train_info/time_between_train_steps": 0.010893583297729492, "step": 154} +{"info/global_step": 155, "train_info/time_within_train_step": 27.990442991256714, "step": 155} +{"train_info/time_between_train_steps": 0.011291742324829102, "step": 155} +{"info/global_step": 156, "train_info/time_within_train_step": 27.97346019744873, "step": 156} +{"train_info/time_between_train_steps": 0.010666131973266602, "step": 156} +{"info/global_step": 157, "train_info/time_within_train_step": 28.036702632904053, "step": 157} +{"train_info/time_between_train_steps": 0.006242036819458008, "step": 157} +{"info/global_step": 158, "train_info/time_within_train_step": 28.054270029067993, "step": 158} +{"train_info/time_between_train_steps": 0.011092424392700195, "step": 158} +{"info/global_step": 159, "train_info/time_within_train_step": 27.9814350605011, "step": 159} +{"train_info/time_between_train_steps": 0.005852222442626953, "step": 159} +{"info/global_step": 160, "train_info/time_within_train_step": 28.05153179168701, "step": 160} +{"train_info/time_between_train_steps": 0.013576745986938477, "step": 160} +{"info/global_step": 161, "train_info/time_within_train_step": 27.934399843215942, "step": 161} +{"train_info/time_between_train_steps": 0.011132955551147461, "step": 161} +{"info/global_step": 162, "train_info/time_within_train_step": 27.895095348358154, "step": 162} +{"train_info/time_between_train_steps": 0.012245655059814453, "step": 162} +{"train_info/time_between_train_steps": 4.0855631828308105, "step": 162} +{"info/global_step": 163, "train_info/time_within_train_step": 27.94828176498413, "step": 163} +{"train_info/time_between_train_steps": 0.00627589225769043, "step": 163} +{"info/global_step": 164, "train_info/time_within_train_step": 27.971012830734253, "step": 164} +{"train_info/time_between_train_steps": 0.006167173385620117, "step": 164} +{"info/global_step": 165, "train_info/time_within_train_step": 27.9827401638031, "step": 165} +{"train_info/time_between_train_steps": 0.005979299545288086, "step": 165} +{"info/global_step": 166, "train_info/time_within_train_step": 28.11395287513733, "step": 166} +{"train_info/time_between_train_steps": 0.016068458557128906, "step": 166} +{"info/global_step": 167, "train_info/time_within_train_step": 27.9152934551239, "step": 167} +{"train_info/time_between_train_steps": 0.01369929313659668, "step": 167} +{"info/global_step": 168, "train_info/time_within_train_step": 28.10959553718567, "step": 168} +{"train_info/time_between_train_steps": 0.005888462066650391, "step": 168} +{"info/global_step": 169, "train_info/time_within_train_step": 27.91600799560547, "step": 169} +{"train_info/time_between_train_steps": 0.006241559982299805, "step": 169} +{"info/global_step": 170, "train_info/time_within_train_step": 27.8899245262146, "step": 170} +{"train_info/time_between_train_steps": 0.005320310592651367, "step": 170} +{"info/global_step": 171, "train_info/time_within_train_step": 27.940979719161987, "step": 171} +{"train_info/time_between_train_steps": 0.005595207214355469, "step": 171} +{"info/global_step": 172, "train_info/time_within_train_step": 28.017045974731445, "step": 172} +{"train_info/time_between_train_steps": 0.005890846252441406, "step": 172} +{"info/global_step": 173, "train_info/time_within_train_step": 27.951467752456665, "step": 173} +{"train_info/time_between_train_steps": 0.0058057308197021484, "step": 173} +{"info/global_step": 174, "train_info/time_within_train_step": 27.971776962280273, "step": 174} +{"train_info/time_between_train_steps": 0.005644083023071289, "step": 174} +{"info/global_step": 175, "train_info/time_within_train_step": 28.008496522903442, "step": 175} +{"train_info/time_between_train_steps": 0.005746126174926758, "step": 175} +{"info/global_step": 176, "train_info/time_within_train_step": 28.06163716316223, "step": 176} +{"train_info/time_between_train_steps": 0.006486415863037109, "step": 176} +{"info/global_step": 177, "train_info/time_within_train_step": 27.84484028816223, "step": 177} +{"train_info/time_between_train_steps": 0.0055522918701171875, "step": 177} +{"info/global_step": 178, "train_info/time_within_train_step": 27.838051319122314, "step": 178} +{"train_info/time_between_train_steps": 0.010119199752807617, "step": 178} +{"info/global_step": 179, "train_info/time_within_train_step": 28.009163856506348, "step": 179} +{"train_info/time_between_train_steps": 0.0057871341705322266, "step": 179} +{"info/global_step": 180, "train_info/time_within_train_step": 27.894786834716797, "step": 180} +{"train_info/time_between_train_steps": 0.0126800537109375, "step": 180} +{"info/global_step": 181, "train_info/time_within_train_step": 27.796913862228394, "step": 181} +{"train_info/time_between_train_steps": 0.005594015121459961, "step": 181} +{"info/global_step": 182, "train_info/time_within_train_step": 27.915470123291016, "step": 182} +{"train_info/time_between_train_steps": 0.005572319030761719, "step": 182} +{"info/global_step": 183, "train_info/time_within_train_step": 28.009151697158813, "step": 183} +{"train_info/time_between_train_steps": 0.005787372589111328, "step": 183} +{"info/global_step": 184, "train_info/time_within_train_step": 27.88573670387268, "step": 184} +{"train_info/time_between_train_steps": 0.008633136749267578, "step": 184} +{"info/global_step": 185, "train_info/time_within_train_step": 28.008453607559204, "step": 185} +{"train_info/time_between_train_steps": 0.010140419006347656, "step": 185} +{"info/global_step": 186, "train_info/time_within_train_step": 27.95203685760498, "step": 186} +{"train_info/time_between_train_steps": 0.005887746810913086, "step": 186} +{"info/global_step": 187, "train_info/time_within_train_step": 28.05147957801819, "step": 187} +{"train_info/time_between_train_steps": 0.012953996658325195, "step": 187} +{"info/global_step": 188, "train_info/time_within_train_step": 28.115337371826172, "step": 188} +{"train_info/time_between_train_steps": 0.0061855316162109375, "step": 188} +{"info/global_step": 189, "train_info/time_within_train_step": 27.851542234420776, "step": 189} +{"train_info/time_between_train_steps": 0.006748199462890625, "step": 189} +{"train_info/time_between_train_steps": 4.224915504455566, "step": 189} +{"info/global_step": 190, "train_info/time_within_train_step": 28.009299516677856, "step": 190} +{"train_info/time_between_train_steps": 0.010385751724243164, "step": 190} +{"info/global_step": 191, "train_info/time_within_train_step": 28.099888563156128, "step": 191} +{"train_info/time_between_train_steps": 0.013498544692993164, "step": 191} +{"info/global_step": 192, "train_info/time_within_train_step": 28.079286098480225, "step": 192} +{"train_info/time_between_train_steps": 0.0058553218841552734, "step": 192} +{"info/global_step": 193, "train_info/time_within_train_step": 28.05229616165161, "step": 193} +{"train_info/time_between_train_steps": 0.005858898162841797, "step": 193} +{"info/global_step": 194, "train_info/time_within_train_step": 27.897448778152466, "step": 194} +{"train_info/time_between_train_steps": 0.005726814270019531, "step": 194} +{"info/global_step": 195, "train_info/time_within_train_step": 28.087955474853516, "step": 195} +{"train_info/time_between_train_steps": 0.005939006805419922, "step": 195} +{"info/global_step": 196, "train_info/time_within_train_step": 28.011335611343384, "step": 196} +{"train_info/time_between_train_steps": 0.005887746810913086, "step": 196} +{"info/global_step": 197, "train_info/time_within_train_step": 27.985806226730347, "step": 197} +{"train_info/time_between_train_steps": 0.005364894866943359, "step": 197} +{"info/global_step": 198, "train_info/time_within_train_step": 27.98138403892517, "step": 198} +{"train_info/time_between_train_steps": 0.00570225715637207, "step": 198} +{"info/global_step": 199, "train_info/time_within_train_step": 27.952751874923706, "step": 199} +{"train_info/time_between_train_steps": 0.0055065155029296875, "step": 199} +{"info/global_step": 200, "train_info/time_within_train_step": 27.901692152023315, "step": 200} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1733959900, "_runtime": 5635}, "step": 200} +{"logs": {"train/loss": 5.8849, "train/learning_rate": 0.0005555555555555556, "train/epoch": 7.01, "_timestamp": 1733959900, "_runtime": 5635}, "step": 200} +{"train_info/time_between_train_steps": 2.6848788261413574, "step": 200} +{"info/global_step": 201, "train_info/time_within_train_step": 27.913625240325928, "step": 201} +{"train_info/time_between_train_steps": 0.005564212799072266, "step": 201} +{"info/global_step": 202, "train_info/time_within_train_step": 27.813402891159058, "step": 202} +{"train_info/time_between_train_steps": 0.005417346954345703, "step": 202} +{"info/global_step": 203, "train_info/time_within_train_step": 28.103994846343994, "step": 203} +{"train_info/time_between_train_steps": 0.010257720947265625, "step": 203} +{"info/global_step": 204, "train_info/time_within_train_step": 27.93332290649414, "step": 204} +{"train_info/time_between_train_steps": 0.0075855255126953125, "step": 204} +{"info/global_step": 205, "train_info/time_within_train_step": 28.221306085586548, "step": 205} +{"train_info/time_between_train_steps": 0.014330387115478516, "step": 205} +{"info/global_step": 206, "train_info/time_within_train_step": 28.006444692611694, "step": 206} +{"train_info/time_between_train_steps": 0.005674839019775391, "step": 206} +{"info/global_step": 207, "train_info/time_within_train_step": 27.90176296234131, "step": 207} +{"train_info/time_between_train_steps": 0.005697011947631836, "step": 207} +{"info/global_step": 208, "train_info/time_within_train_step": 27.97250008583069, "step": 208} +{"train_info/time_between_train_steps": 0.011312246322631836, "step": 208} +{"info/global_step": 209, "train_info/time_within_train_step": 27.842508792877197, "step": 209} +{"train_info/time_between_train_steps": 0.005796909332275391, "step": 209} +{"info/global_step": 210, "train_info/time_within_train_step": 27.912397623062134, "step": 210} +{"train_info/time_between_train_steps": 0.005774974822998047, "step": 210} +{"info/global_step": 211, "train_info/time_within_train_step": 28.039456129074097, "step": 211} +{"train_info/time_between_train_steps": 0.0057048797607421875, "step": 211} +{"info/global_step": 212, "train_info/time_within_train_step": 27.82820773124695, "step": 212} +{"train_info/time_between_train_steps": 0.011736631393432617, "step": 212} +{"info/global_step": 213, "train_info/time_within_train_step": 27.775851011276245, "step": 213} +{"train_info/time_between_train_steps": 0.005776166915893555, "step": 213} +{"info/global_step": 214, "train_info/time_within_train_step": 27.931634426116943, "step": 214} +{"train_info/time_between_train_steps": 0.013679981231689453, "step": 214} +{"info/global_step": 215, "train_info/time_within_train_step": 28.0160653591156, "step": 215} +{"train_info/time_between_train_steps": 0.005917549133300781, "step": 215} +{"info/global_step": 216, "train_info/time_within_train_step": 28.05815076828003, "step": 216} +{"train_info/time_between_train_steps": 0.006850481033325195, "step": 216} +{"train_info/time_between_train_steps": 3.8061957359313965, "step": 216} +{"info/global_step": 217, "train_info/time_within_train_step": 27.926769018173218, "step": 217} +{"train_info/time_between_train_steps": 0.011087179183959961, "step": 217} +{"info/global_step": 218, "train_info/time_within_train_step": 28.107194423675537, "step": 218} +{"train_info/time_between_train_steps": 0.011478424072265625, "step": 218} +{"info/global_step": 219, "train_info/time_within_train_step": 27.972595930099487, "step": 219} +{"train_info/time_between_train_steps": 0.01064753532409668, "step": 219} +{"info/global_step": 220, "train_info/time_within_train_step": 28.061328411102295, "step": 220} +{"train_info/time_between_train_steps": 0.005810260772705078, "step": 220} +{"info/global_step": 221, "train_info/time_within_train_step": 27.879906177520752, "step": 221} +{"train_info/time_between_train_steps": 0.005972146987915039, "step": 221} +{"info/global_step": 222, "train_info/time_within_train_step": 28.1159610748291, "step": 222} +{"train_info/time_between_train_steps": 0.005844831466674805, "step": 222} +{"info/global_step": 223, "train_info/time_within_train_step": 28.031708002090454, "step": 223} +{"train_info/time_between_train_steps": 0.005694389343261719, "step": 223} +{"info/global_step": 224, "train_info/time_within_train_step": 27.897048711776733, "step": 224} +{"train_info/time_between_train_steps": 0.005470752716064453, "step": 224} +{"info/global_step": 225, "train_info/time_within_train_step": 27.91433835029602, "step": 225} +{"train_info/time_between_train_steps": 0.005838632583618164, "step": 225} +{"info/global_step": 226, "train_info/time_within_train_step": 27.980183362960815, "step": 226} +{"train_info/time_between_train_steps": 0.008758306503295898, "step": 226} +{"info/global_step": 227, "train_info/time_within_train_step": 28.17328381538391, "step": 227} +{"train_info/time_between_train_steps": 0.006663799285888672, "step": 227} +{"info/global_step": 228, "train_info/time_within_train_step": 27.9932279586792, "step": 228} +{"train_info/time_between_train_steps": 0.006284952163696289, "step": 228} +{"info/global_step": 229, "train_info/time_within_train_step": 28.034364938735962, "step": 229} +{"train_info/time_between_train_steps": 0.01712656021118164, "step": 229} +{"info/global_step": 230, "train_info/time_within_train_step": 28.02698040008545, "step": 230} +{"train_info/time_between_train_steps": 0.006103038787841797, "step": 230} +{"info/global_step": 231, "train_info/time_within_train_step": 28.051816701889038, "step": 231} +{"train_info/time_between_train_steps": 0.0060236454010009766, "step": 231} +{"info/global_step": 232, "train_info/time_within_train_step": 28.07680892944336, "step": 232} +{"train_info/time_between_train_steps": 0.018239259719848633, "step": 232} +{"info/global_step": 233, "train_info/time_within_train_step": 28.140239477157593, "step": 233} +{"train_info/time_between_train_steps": 0.007582664489746094, "step": 233} +{"info/global_step": 234, "train_info/time_within_train_step": 28.049864530563354, "step": 234} +{"train_info/time_between_train_steps": 0.01164865493774414, "step": 234} +{"info/global_step": 235, "train_info/time_within_train_step": 28.14022660255432, "step": 235} +{"train_info/time_between_train_steps": 0.01159524917602539, "step": 235} +{"info/global_step": 236, "train_info/time_within_train_step": 27.89976215362549, "step": 236} +{"train_info/time_between_train_steps": 0.011699914932250977, "step": 236} +{"info/global_step": 237, "train_info/time_within_train_step": 28.08329463005066, "step": 237} +{"train_info/time_between_train_steps": 0.016836166381835938, "step": 237} +{"info/global_step": 238, "train_info/time_within_train_step": 27.943455457687378, "step": 238} +{"train_info/time_between_train_steps": 0.006291627883911133, "step": 238} +{"info/global_step": 239, "train_info/time_within_train_step": 28.17125129699707, "step": 239} +{"train_info/time_between_train_steps": 0.010603189468383789, "step": 239} +{"info/global_step": 240, "train_info/time_within_train_step": 27.957364797592163, "step": 240} +{"train_info/time_between_train_steps": 0.01215672492980957, "step": 240} +{"info/global_step": 241, "train_info/time_within_train_step": 28.35855007171631, "step": 241} +{"train_info/time_between_train_steps": 0.011315345764160156, "step": 241} +{"info/global_step": 242, "train_info/time_within_train_step": 27.99322772026062, "step": 242} +{"train_info/time_between_train_steps": 0.006306648254394531, "step": 242} +{"info/global_step": 243, "train_info/time_within_train_step": 27.99009418487549, "step": 243} +{"train_info/time_between_train_steps": 0.006886482238769531, "step": 243} +{"train_info/time_between_train_steps": 4.276829242706299, "step": 243} +{"info/global_step": 244, "train_info/time_within_train_step": 27.956405639648438, "step": 244} +{"train_info/time_between_train_steps": 0.006060123443603516, "step": 244} +{"info/global_step": 245, "train_info/time_within_train_step": 27.90166664123535, "step": 245} +{"train_info/time_between_train_steps": 0.006394624710083008, "step": 245} +{"info/global_step": 246, "train_info/time_within_train_step": 27.86245346069336, "step": 246} +{"train_info/time_between_train_steps": 0.006085634231567383, "step": 246} +{"info/global_step": 247, "train_info/time_within_train_step": 28.04163432121277, "step": 247} +{"train_info/time_between_train_steps": 0.005803585052490234, "step": 247} +{"info/global_step": 248, "train_info/time_within_train_step": 28.264427661895752, "step": 248} +{"train_info/time_between_train_steps": 0.011850118637084961, "step": 248} +{"info/global_step": 249, "train_info/time_within_train_step": 28.022042274475098, "step": 249} +{"train_info/time_between_train_steps": 0.005869388580322266, "step": 249} +{"info/global_step": 250, "train_info/time_within_train_step": 27.9157235622406, "step": 250} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1733961312, "_runtime": 7047}, "step": 250} +{"logs": {"train/loss": 5.515, "train/learning_rate": 0.0005277777777777777, "train/epoch": 9.01, "_timestamp": 1733961312, "_runtime": 7047}, "step": 250} +{"train_info/time_between_train_steps": 0.008019208908081055, "step": 250} +{"info/global_step": 251, "train_info/time_within_train_step": 28.18099093437195, "step": 251} +{"train_info/time_between_train_steps": 0.010563850402832031, "step": 251} +{"info/global_step": 252, "train_info/time_within_train_step": 28.055476665496826, "step": 252} +{"train_info/time_between_train_steps": 0.005918264389038086, "step": 252} +{"info/global_step": 253, "train_info/time_within_train_step": 28.155056715011597, "step": 253} +{"train_info/time_between_train_steps": 0.005837440490722656, "step": 253} +{"info/global_step": 254, "train_info/time_within_train_step": 28.046160459518433, "step": 254} +{"train_info/time_between_train_steps": 0.009121417999267578, "step": 254} +{"info/global_step": 255, "train_info/time_within_train_step": 27.89708685874939, "step": 255} +{"train_info/time_between_train_steps": 0.005979299545288086, "step": 255} +{"info/global_step": 256, "train_info/time_within_train_step": 27.858330488204956, "step": 256} +{"train_info/time_between_train_steps": 0.015530824661254883, "step": 256} +{"info/global_step": 257, "train_info/time_within_train_step": 27.833674669265747, "step": 257} +{"train_info/time_between_train_steps": 0.005927324295043945, "step": 257} +{"info/global_step": 258, "train_info/time_within_train_step": 27.87012529373169, "step": 258} +{"train_info/time_between_train_steps": 0.010994672775268555, "step": 258} +{"info/global_step": 259, "train_info/time_within_train_step": 28.140850067138672, "step": 259} +{"train_info/time_between_train_steps": 0.010948896408081055, "step": 259} +{"info/global_step": 260, "train_info/time_within_train_step": 27.98967933654785, "step": 260} +{"train_info/time_between_train_steps": 0.005905866622924805, "step": 260} +{"info/global_step": 261, "train_info/time_within_train_step": 27.955751180648804, "step": 261} +{"train_info/time_between_train_steps": 0.011072397232055664, "step": 261} +{"info/global_step": 262, "train_info/time_within_train_step": 27.939197778701782, "step": 262} +{"train_info/time_between_train_steps": 0.016082286834716797, "step": 262} +{"info/global_step": 263, "train_info/time_within_train_step": 27.961461067199707, "step": 263} +{"train_info/time_between_train_steps": 0.005823373794555664, "step": 263} +{"info/global_step": 264, "train_info/time_within_train_step": 27.834572315216064, "step": 264} +{"train_info/time_between_train_steps": 0.011494636535644531, "step": 264} +{"info/global_step": 265, "train_info/time_within_train_step": 27.974704027175903, "step": 265} +{"train_info/time_between_train_steps": 0.01149749755859375, "step": 265} +{"info/global_step": 266, "train_info/time_within_train_step": 28.21016836166382, "step": 266} +{"train_info/time_between_train_steps": 0.006224632263183594, "step": 266} +{"info/global_step": 267, "train_info/time_within_train_step": 28.023355722427368, "step": 267} +{"train_info/time_between_train_steps": 0.006183624267578125, "step": 267} +{"info/global_step": 268, "train_info/time_within_train_step": 27.81344437599182, "step": 268} +{"train_info/time_between_train_steps": 0.010935068130493164, "step": 268} +{"info/global_step": 269, "train_info/time_within_train_step": 27.85855770111084, "step": 269} +{"train_info/time_between_train_steps": 0.006249427795410156, "step": 269} +{"info/global_step": 270, "train_info/time_within_train_step": 27.848112106323242, "step": 270} +{"train_info/time_between_train_steps": 0.01208186149597168, "step": 270} +{"train_info/time_between_train_steps": 4.225100517272949, "step": 270} +{"info/global_step": 271, "train_info/time_within_train_step": 27.986003875732422, "step": 271} +{"train_info/time_between_train_steps": 0.011049985885620117, "step": 271} +{"info/global_step": 272, "train_info/time_within_train_step": 28.07944345474243, "step": 272} +{"train_info/time_between_train_steps": 0.01542973518371582, "step": 272} +{"info/global_step": 273, "train_info/time_within_train_step": 28.002726793289185, "step": 273} +{"train_info/time_between_train_steps": 0.010656595230102539, "step": 273} +{"info/global_step": 274, "train_info/time_within_train_step": 28.248880624771118, "step": 274} +{"train_info/time_between_train_steps": 0.00683903694152832, "step": 274} +{"info/global_step": 275, "train_info/time_within_train_step": 27.992101669311523, "step": 275} +{"train_info/time_between_train_steps": 0.0065155029296875, "step": 275} +{"info/global_step": 276, "train_info/time_within_train_step": 28.202617406845093, "step": 276} +{"train_info/time_between_train_steps": 0.006488800048828125, "step": 276} +{"info/global_step": 277, "train_info/time_within_train_step": 27.89903950691223, "step": 277} +{"train_info/time_between_train_steps": 0.011477470397949219, "step": 277} +{"info/global_step": 278, "train_info/time_within_train_step": 28.094111919403076, "step": 278} +{"train_info/time_between_train_steps": 0.005331516265869141, "step": 278} +{"info/global_step": 279, "train_info/time_within_train_step": 28.0059974193573, "step": 279} +{"train_info/time_between_train_steps": 0.005730867385864258, "step": 279} +{"info/global_step": 280, "train_info/time_within_train_step": 27.955193758010864, "step": 280} +{"train_info/time_between_train_steps": 0.005695343017578125, "step": 280} +{"info/global_step": 281, "train_info/time_within_train_step": 28.02786421775818, "step": 281} +{"train_info/time_between_train_steps": 0.010662555694580078, "step": 281} +{"info/global_step": 282, "train_info/time_within_train_step": 28.02728271484375, "step": 282} +{"train_info/time_between_train_steps": 0.010742425918579102, "step": 282} +{"info/global_step": 283, "train_info/time_within_train_step": 28.07392954826355, "step": 283} +{"train_info/time_between_train_steps": 0.010696172714233398, "step": 283} +{"info/global_step": 284, "train_info/time_within_train_step": 27.891343116760254, "step": 284} +{"train_info/time_between_train_steps": 0.0054779052734375, "step": 284} +{"info/global_step": 285, "train_info/time_within_train_step": 27.96849489212036, "step": 285} +{"train_info/time_between_train_steps": 0.005784273147583008, "step": 285} +{"info/global_step": 286, "train_info/time_within_train_step": 27.76470160484314, "step": 286} +{"train_info/time_between_train_steps": 0.005441188812255859, "step": 286} +{"info/global_step": 287, "train_info/time_within_train_step": 27.748928785324097, "step": 287} +{"train_info/time_between_train_steps": 0.011107921600341797, "step": 287} +{"info/global_step": 288, "train_info/time_within_train_step": 27.871980905532837, "step": 288} +{"train_info/time_between_train_steps": 0.007745981216430664, "step": 288} +{"info/global_step": 289, "train_info/time_within_train_step": 27.874898195266724, "step": 289} +{"train_info/time_between_train_steps": 0.011021614074707031, "step": 289} +{"info/global_step": 290, "train_info/time_within_train_step": 28.047282457351685, "step": 290} +{"train_info/time_between_train_steps": 0.01298379898071289, "step": 290} +{"info/global_step": 291, "train_info/time_within_train_step": 27.958993911743164, "step": 291} +{"train_info/time_between_train_steps": 0.010835409164428711, "step": 291} +{"info/global_step": 292, "train_info/time_within_train_step": 28.12607479095459, "step": 292} +{"train_info/time_between_train_steps": 0.017126083374023438, "step": 292} +{"info/global_step": 293, "train_info/time_within_train_step": 28.04252338409424, "step": 293} +{"train_info/time_between_train_steps": 0.006322622299194336, "step": 293} +{"info/global_step": 294, "train_info/time_within_train_step": 27.826257705688477, "step": 294} +{"train_info/time_between_train_steps": 0.0060198307037353516, "step": 294} +{"info/global_step": 295, "train_info/time_within_train_step": 27.968637228012085, "step": 295} +{"train_info/time_between_train_steps": 0.016523361206054688, "step": 295} +{"info/global_step": 296, "train_info/time_within_train_step": 27.871644735336304, "step": 296} +{"train_info/time_between_train_steps": 0.006138801574707031, "step": 296} +{"info/global_step": 297, "train_info/time_within_train_step": 28.042232275009155, "step": 297} +{"train_info/time_between_train_steps": 0.006882667541503906, "step": 297} +{"train_info/time_between_train_steps": 3.941758632659912, "step": 297} +{"info/global_step": 298, "train_info/time_within_train_step": 27.925584316253662, "step": 298} +{"train_info/time_between_train_steps": 0.005692481994628906, "step": 298} +{"info/global_step": 299, "train_info/time_within_train_step": 28.13319730758667, "step": 299} +{"train_info/time_between_train_steps": 0.005673885345458984, "step": 299} +{"info/global_step": 300, "train_info/time_within_train_step": 27.959633111953735, "step": 300} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1733962719, "_runtime": 8454}, "step": 300} +{"logs": {"train/loss": 5.2445, "train/learning_rate": 0.0005, "train/epoch": 11.0, "_timestamp": 1733962719, "_runtime": 8454}, "step": 300} +{"train_info/time_between_train_steps": 2.6645686626434326, "step": 300} +{"info/global_step": 301, "train_info/time_within_train_step": 28.052294731140137, "step": 301} +{"train_info/time_between_train_steps": 0.005677461624145508, "step": 301} +{"info/global_step": 302, "train_info/time_within_train_step": 27.95270299911499, "step": 302} +{"train_info/time_between_train_steps": 0.006067752838134766, "step": 302} +{"info/global_step": 303, "train_info/time_within_train_step": 27.965629816055298, "step": 303} +{"train_info/time_between_train_steps": 0.005919933319091797, "step": 303} +{"info/global_step": 304, "train_info/time_within_train_step": 27.900323629379272, "step": 304} +{"train_info/time_between_train_steps": 0.005845069885253906, "step": 304} +{"info/global_step": 305, "train_info/time_within_train_step": 28.000373601913452, "step": 305} +{"train_info/time_between_train_steps": 0.005603313446044922, "step": 305} +{"info/global_step": 306, "train_info/time_within_train_step": 27.834262371063232, "step": 306} +{"train_info/time_between_train_steps": 0.0056879520416259766, "step": 306} +{"info/global_step": 307, "train_info/time_within_train_step": 28.022565126419067, "step": 307} +{"train_info/time_between_train_steps": 0.011255979537963867, "step": 307} +{"info/global_step": 308, "train_info/time_within_train_step": 27.789578199386597, "step": 308} +{"train_info/time_between_train_steps": 0.005594015121459961, "step": 308} +{"info/global_step": 309, "train_info/time_within_train_step": 27.816208124160767, "step": 309} +{"train_info/time_between_train_steps": 0.005768299102783203, "step": 309} +{"info/global_step": 310, "train_info/time_within_train_step": 27.850398302078247, "step": 310} +{"train_info/time_between_train_steps": 0.010767698287963867, "step": 310} +{"info/global_step": 311, "train_info/time_within_train_step": 28.039308547973633, "step": 311} +{"train_info/time_between_train_steps": 0.00905919075012207, "step": 311} +{"info/global_step": 312, "train_info/time_within_train_step": 27.867265224456787, "step": 312} +{"train_info/time_between_train_steps": 0.006105661392211914, "step": 312} +{"info/global_step": 313, "train_info/time_within_train_step": 27.91219425201416, "step": 313} +{"train_info/time_between_train_steps": 0.005846738815307617, "step": 313} +{"info/global_step": 314, "train_info/time_within_train_step": 27.949307680130005, "step": 314} +{"train_info/time_between_train_steps": 0.016134023666381836, "step": 314} +{"info/global_step": 315, "train_info/time_within_train_step": 27.91834855079651, "step": 315} +{"train_info/time_between_train_steps": 0.005798816680908203, "step": 315} +{"info/global_step": 316, "train_info/time_within_train_step": 27.925431728363037, "step": 316} +{"train_info/time_between_train_steps": 0.005682468414306641, "step": 316} +{"info/global_step": 317, "train_info/time_within_train_step": 27.843051433563232, "step": 317} +{"train_info/time_between_train_steps": 0.016491413116455078, "step": 317} +{"info/global_step": 318, "train_info/time_within_train_step": 28.096688985824585, "step": 318} +{"train_info/time_between_train_steps": 0.005846261978149414, "step": 318} +{"info/global_step": 319, "train_info/time_within_train_step": 27.970080137252808, "step": 319} +{"train_info/time_between_train_steps": 0.007720470428466797, "step": 319} +{"info/global_step": 320, "train_info/time_within_train_step": 27.82578444480896, "step": 320} +{"train_info/time_between_train_steps": 0.005877971649169922, "step": 320} +{"info/global_step": 321, "train_info/time_within_train_step": 28.029284954071045, "step": 321} +{"train_info/time_between_train_steps": 0.01199030876159668, "step": 321} +{"info/global_step": 322, "train_info/time_within_train_step": 27.846737384796143, "step": 322} +{"train_info/time_between_train_steps": 0.011413097381591797, "step": 322} +{"info/global_step": 323, "train_info/time_within_train_step": 28.028029918670654, "step": 323} +{"train_info/time_between_train_steps": 0.006570577621459961, "step": 323} +{"info/global_step": 324, "train_info/time_within_train_step": 27.99525809288025, "step": 324} +{"train_info/time_between_train_steps": 0.0068509578704833984, "step": 324} +{"train_info/time_between_train_steps": 3.9445908069610596, "step": 324} +{"info/global_step": 325, "train_info/time_within_train_step": 27.862777948379517, "step": 325} +{"train_info/time_between_train_steps": 0.005746364593505859, "step": 325} +{"info/global_step": 326, "train_info/time_within_train_step": 28.16022276878357, "step": 326} +{"train_info/time_between_train_steps": 0.006092071533203125, "step": 326} +{"info/global_step": 327, "train_info/time_within_train_step": 28.17079257965088, "step": 327} +{"train_info/time_between_train_steps": 0.008886337280273438, "step": 327} +{"info/global_step": 328, "train_info/time_within_train_step": 28.343146562576294, "step": 328} +{"train_info/time_between_train_steps": 0.011400938034057617, "step": 328} +{"info/global_step": 329, "train_info/time_within_train_step": 28.152560234069824, "step": 329} +{"train_info/time_between_train_steps": 0.0066645145416259766, "step": 329} +{"info/global_step": 330, "train_info/time_within_train_step": 28.262156009674072, "step": 330} +{"train_info/time_between_train_steps": 0.011741399765014648, "step": 330} +{"info/global_step": 331, "train_info/time_within_train_step": 28.224217891693115, "step": 331} +{"train_info/time_between_train_steps": 0.007055759429931641, "step": 331} +{"info/global_step": 332, "train_info/time_within_train_step": 28.11061930656433, "step": 332} +{"train_info/time_between_train_steps": 0.012597084045410156, "step": 332} +{"info/global_step": 333, "train_info/time_within_train_step": 28.061841011047363, "step": 333} +{"train_info/time_between_train_steps": 0.011329889297485352, "step": 333} +{"info/global_step": 334, "train_info/time_within_train_step": 27.919846773147583, "step": 334} +{"train_info/time_between_train_steps": 0.0059278011322021484, "step": 334} +{"info/global_step": 335, "train_info/time_within_train_step": 27.930680990219116, "step": 335} +{"train_info/time_between_train_steps": 0.014841794967651367, "step": 335} +{"info/global_step": 336, "train_info/time_within_train_step": 27.98691487312317, "step": 336} +{"train_info/time_between_train_steps": 0.00574183464050293, "step": 336} +{"info/global_step": 337, "train_info/time_within_train_step": 27.982694387435913, "step": 337} +{"train_info/time_between_train_steps": 0.005766153335571289, "step": 337} +{"info/global_step": 338, "train_info/time_within_train_step": 28.093804836273193, "step": 338} +{"train_info/time_between_train_steps": 0.010897636413574219, "step": 338} +{"info/global_step": 339, "train_info/time_within_train_step": 27.86043357849121, "step": 339} +{"train_info/time_between_train_steps": 0.006960630416870117, "step": 339} +{"info/global_step": 340, "train_info/time_within_train_step": 27.79655432701111, "step": 340} +{"train_info/time_between_train_steps": 0.005611419677734375, "step": 340} +{"info/global_step": 341, "train_info/time_within_train_step": 27.81397557258606, "step": 341} +{"train_info/time_between_train_steps": 0.011167049407958984, "step": 341} +{"info/global_step": 342, "train_info/time_within_train_step": 27.81702423095703, "step": 342} +{"train_info/time_between_train_steps": 0.005856037139892578, "step": 342} +{"info/global_step": 343, "train_info/time_within_train_step": 27.831448316574097, "step": 343} +{"train_info/time_between_train_steps": 0.005604982376098633, "step": 343} +{"info/global_step": 344, "train_info/time_within_train_step": 27.92749857902527, "step": 344} +{"train_info/time_between_train_steps": 0.005647897720336914, "step": 344} +{"info/global_step": 345, "train_info/time_within_train_step": 27.78131103515625, "step": 345} +{"train_info/time_between_train_steps": 0.005518436431884766, "step": 345} +{"info/global_step": 346, "train_info/time_within_train_step": 27.84299612045288, "step": 346} +{"train_info/time_between_train_steps": 0.00569462776184082, "step": 346} +{"info/global_step": 347, "train_info/time_within_train_step": 27.874107360839844, "step": 347} +{"train_info/time_between_train_steps": 0.006072282791137695, "step": 347} +{"info/global_step": 348, "train_info/time_within_train_step": 27.942131757736206, "step": 348} +{"train_info/time_between_train_steps": 0.005705833435058594, "step": 348} +{"info/global_step": 349, "train_info/time_within_train_step": 27.777028799057007, "step": 349} +{"train_info/time_between_train_steps": 0.00877523422241211, "step": 349} +{"info/global_step": 350, "train_info/time_within_train_step": 28.013604402542114, "step": 350} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1733964124, "_runtime": 9859}, "step": 350} +{"logs": {"train/loss": 5.0564, "train/learning_rate": 0.00047222222222222224, "train/epoch": 12.02, "_timestamp": 1733964124, "_runtime": 9859}, "step": 350} +{"train_info/time_between_train_steps": 0.008500337600708008, "step": 350} +{"info/global_step": 351, "train_info/time_within_train_step": 27.8171546459198, "step": 351} +{"train_info/time_between_train_steps": 0.006696462631225586, "step": 351} +{"train_info/time_between_train_steps": 4.281299114227295, "step": 351} +{"info/global_step": 352, "train_info/time_within_train_step": 27.87739133834839, "step": 352} +{"train_info/time_between_train_steps": 0.00604701042175293, "step": 352} +{"info/global_step": 353, "train_info/time_within_train_step": 28.10347056388855, "step": 353} +{"train_info/time_between_train_steps": 0.011286258697509766, "step": 353} +{"info/global_step": 354, "train_info/time_within_train_step": 27.87213110923767, "step": 354} +{"train_info/time_between_train_steps": 0.011182308197021484, "step": 354} +{"info/global_step": 355, "train_info/time_within_train_step": 28.119579076766968, "step": 355} +{"train_info/time_between_train_steps": 0.007179737091064453, "step": 355} +{"info/global_step": 356, "train_info/time_within_train_step": 27.98795199394226, "step": 356} +{"train_info/time_between_train_steps": 0.005917549133300781, "step": 356} +{"info/global_step": 357, "train_info/time_within_train_step": 28.041330575942993, "step": 357} +{"train_info/time_between_train_steps": 0.0057392120361328125, "step": 357} +{"info/global_step": 358, "train_info/time_within_train_step": 27.821614265441895, "step": 358} +{"train_info/time_between_train_steps": 0.0062906742095947266, "step": 358} +{"info/global_step": 359, "train_info/time_within_train_step": 27.995753288269043, "step": 359} +{"train_info/time_between_train_steps": 0.005864143371582031, "step": 359} +{"info/global_step": 360, "train_info/time_within_train_step": 27.967432498931885, "step": 360} +{"train_info/time_between_train_steps": 0.005790233612060547, "step": 360} +{"info/global_step": 361, "train_info/time_within_train_step": 27.82191514968872, "step": 361} +{"train_info/time_between_train_steps": 0.0059986114501953125, "step": 361} +{"info/global_step": 362, "train_info/time_within_train_step": 27.919554948806763, "step": 362} +{"train_info/time_between_train_steps": 0.005949258804321289, "step": 362} +{"info/global_step": 363, "train_info/time_within_train_step": 27.81942582130432, "step": 363} +{"train_info/time_between_train_steps": 0.005473613739013672, "step": 363} +{"info/global_step": 364, "train_info/time_within_train_step": 27.95828676223755, "step": 364} +{"train_info/time_between_train_steps": 0.0072133541107177734, "step": 364} +{"info/global_step": 365, "train_info/time_within_train_step": 28.021055221557617, "step": 365} +{"train_info/time_between_train_steps": 0.0056111812591552734, "step": 365} +{"info/global_step": 366, "train_info/time_within_train_step": 27.83911418914795, "step": 366} +{"train_info/time_between_train_steps": 0.005700826644897461, "step": 366} +{"info/global_step": 367, "train_info/time_within_train_step": 27.92963933944702, "step": 367} +{"train_info/time_between_train_steps": 0.006270647048950195, "step": 367} +{"info/global_step": 368, "train_info/time_within_train_step": 27.915942430496216, "step": 368} +{"train_info/time_between_train_steps": 0.005624294281005859, "step": 368} +{"info/global_step": 369, "train_info/time_within_train_step": 27.946491956710815, "step": 369} +{"train_info/time_between_train_steps": 0.005598545074462891, "step": 369} +{"info/global_step": 370, "train_info/time_within_train_step": 27.9310564994812, "step": 370} +{"train_info/time_between_train_steps": 0.00577092170715332, "step": 370} +{"info/global_step": 371, "train_info/time_within_train_step": 27.8192458152771, "step": 371} +{"train_info/time_between_train_steps": 0.011073112487792969, "step": 371} +{"info/global_step": 372, "train_info/time_within_train_step": 27.918750047683716, "step": 372} +{"train_info/time_between_train_steps": 0.015553712844848633, "step": 372} +{"info/global_step": 373, "train_info/time_within_train_step": 27.9162118434906, "step": 373} +{"train_info/time_between_train_steps": 0.015925884246826172, "step": 373} +{"info/global_step": 374, "train_info/time_within_train_step": 28.027222633361816, "step": 374} +{"train_info/time_between_train_steps": 0.016266345977783203, "step": 374} +{"info/global_step": 375, "train_info/time_within_train_step": 28.023782968521118, "step": 375} +{"train_info/time_between_train_steps": 0.005944728851318359, "step": 375} +{"info/global_step": 376, "train_info/time_within_train_step": 27.864869356155396, "step": 376} +{"train_info/time_between_train_steps": 0.005873680114746094, "step": 376} +{"info/global_step": 377, "train_info/time_within_train_step": 27.858405351638794, "step": 377} +{"train_info/time_between_train_steps": 0.011548519134521484, "step": 377} +{"info/global_step": 378, "train_info/time_within_train_step": 27.95538830757141, "step": 378} +{"train_info/time_between_train_steps": 0.0066013336181640625, "step": 378} +{"train_info/time_between_train_steps": 3.833292245864868, "step": 378} +{"info/global_step": 379, "train_info/time_within_train_step": 27.7784321308136, "step": 379} +{"train_info/time_between_train_steps": 0.010890722274780273, "step": 379} +{"info/global_step": 380, "train_info/time_within_train_step": 27.92649245262146, "step": 380} +{"train_info/time_between_train_steps": 0.005651950836181641, "step": 380} +{"info/global_step": 381, "train_info/time_within_train_step": 27.821677446365356, "step": 381} +{"train_info/time_between_train_steps": 0.006540775299072266, "step": 381} +{"info/global_step": 382, "train_info/time_within_train_step": 28.03030228614807, "step": 382} +{"train_info/time_between_train_steps": 0.006526470184326172, "step": 382} +{"info/global_step": 383, "train_info/time_within_train_step": 27.879165649414062, "step": 383} +{"train_info/time_between_train_steps": 0.005881071090698242, "step": 383} +{"info/global_step": 384, "train_info/time_within_train_step": 28.087834358215332, "step": 384} +{"train_info/time_between_train_steps": 0.007203102111816406, "step": 384} +{"info/global_step": 385, "train_info/time_within_train_step": 27.962371826171875, "step": 385} +{"train_info/time_between_train_steps": 0.011308908462524414, "step": 385} +{"info/global_step": 386, "train_info/time_within_train_step": 28.142534732818604, "step": 386} +{"train_info/time_between_train_steps": 0.006047248840332031, "step": 386} +{"info/global_step": 387, "train_info/time_within_train_step": 27.798009634017944, "step": 387} +{"train_info/time_between_train_steps": 0.005755901336669922, "step": 387} +{"info/global_step": 388, "train_info/time_within_train_step": 27.775139570236206, "step": 388} +{"train_info/time_between_train_steps": 0.005870819091796875, "step": 388} +{"info/global_step": 389, "train_info/time_within_train_step": 27.807263374328613, "step": 389} +{"train_info/time_between_train_steps": 0.0055277347564697266, "step": 389} +{"info/global_step": 390, "train_info/time_within_train_step": 27.801398992538452, "step": 390} +{"train_info/time_between_train_steps": 0.00565648078918457, "step": 390} +{"info/global_step": 391, "train_info/time_within_train_step": 28.01270890235901, "step": 391} +{"train_info/time_between_train_steps": 0.005589485168457031, "step": 391} +{"info/global_step": 392, "train_info/time_within_train_step": 27.921047687530518, "step": 392} +{"train_info/time_between_train_steps": 0.011108875274658203, "step": 392} +{"info/global_step": 393, "train_info/time_within_train_step": 27.910431623458862, "step": 393} +{"train_info/time_between_train_steps": 0.008926630020141602, "step": 393} +{"info/global_step": 394, "train_info/time_within_train_step": 27.88066077232361, "step": 394} +{"train_info/time_between_train_steps": 0.010563135147094727, "step": 394} +{"info/global_step": 395, "train_info/time_within_train_step": 27.871269464492798, "step": 395} +{"train_info/time_between_train_steps": 0.006834268569946289, "step": 395} +{"info/global_step": 396, "train_info/time_within_train_step": 27.82368803024292, "step": 396} +{"train_info/time_between_train_steps": 0.005851268768310547, "step": 396} +{"info/global_step": 397, "train_info/time_within_train_step": 27.95845365524292, "step": 397} +{"train_info/time_between_train_steps": 0.006204366683959961, "step": 397} +{"info/global_step": 398, "train_info/time_within_train_step": 27.911324501037598, "step": 398} +{"train_info/time_between_train_steps": 0.0059413909912109375, "step": 398} +{"info/global_step": 399, "train_info/time_within_train_step": 27.854825019836426, "step": 399} +{"train_info/time_between_train_steps": 0.005816459655761719, "step": 399} +{"info/global_step": 400, "train_info/time_within_train_step": 27.910351514816284, "step": 400} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1733965529, "_runtime": 11264}, "step": 400} +{"logs": {"train/loss": 4.9239, "train/learning_rate": 0.00044444444444444436, "train/epoch": 14.02, "_timestamp": 1733965529, "_runtime": 11264}, "step": 400} +{"train_info/time_between_train_steps": 2.390481472015381, "step": 400} +{"info/global_step": 401, "train_info/time_within_train_step": 27.82376003265381, "step": 401} +{"train_info/time_between_train_steps": 0.005635976791381836, "step": 401} +{"info/global_step": 402, "train_info/time_within_train_step": 27.903468370437622, "step": 402} +{"train_info/time_between_train_steps": 0.006139993667602539, "step": 402} +{"info/global_step": 403, "train_info/time_within_train_step": 27.96546721458435, "step": 403} +{"train_info/time_between_train_steps": 0.0057942867279052734, "step": 403} +{"info/global_step": 404, "train_info/time_within_train_step": 27.86075520515442, "step": 404} +{"train_info/time_between_train_steps": 0.011336326599121094, "step": 404} +{"info/global_step": 405, "train_info/time_within_train_step": 27.964154481887817, "step": 405} +{"train_info/time_between_train_steps": 0.006719112396240234, "step": 405} +{"train_info/time_between_train_steps": 3.989821195602417, "step": 405} +{"info/global_step": 406, "train_info/time_within_train_step": 27.92545437812805, "step": 406} +{"train_info/time_between_train_steps": 0.005402565002441406, "step": 406} +{"info/global_step": 407, "train_info/time_within_train_step": 27.956459522247314, "step": 407} +{"train_info/time_between_train_steps": 0.0055904388427734375, "step": 407} +{"info/global_step": 408, "train_info/time_within_train_step": 28.092073440551758, "step": 408} +{"train_info/time_between_train_steps": 0.010336875915527344, "step": 408} +{"info/global_step": 409, "train_info/time_within_train_step": 28.122207403182983, "step": 409} +{"train_info/time_between_train_steps": 0.005721330642700195, "step": 409} +{"info/global_step": 410, "train_info/time_within_train_step": 27.89704179763794, "step": 410} +{"train_info/time_between_train_steps": 0.00567936897277832, "step": 410} +{"info/global_step": 411, "train_info/time_within_train_step": 27.98127770423889, "step": 411} +{"train_info/time_between_train_steps": 0.005824089050292969, "step": 411} +{"info/global_step": 412, "train_info/time_within_train_step": 27.798073291778564, "step": 412} +{"train_info/time_between_train_steps": 0.011176586151123047, "step": 412} +{"info/global_step": 413, "train_info/time_within_train_step": 27.832775115966797, "step": 413} +{"train_info/time_between_train_steps": 0.005219459533691406, "step": 413} +{"info/global_step": 414, "train_info/time_within_train_step": 27.836385250091553, "step": 414} +{"train_info/time_between_train_steps": 0.005759477615356445, "step": 414} +{"info/global_step": 415, "train_info/time_within_train_step": 27.82823133468628, "step": 415} +{"train_info/time_between_train_steps": 0.005718708038330078, "step": 415} +{"info/global_step": 416, "train_info/time_within_train_step": 27.844091176986694, "step": 416} +{"train_info/time_between_train_steps": 0.01052093505859375, "step": 416} +{"info/global_step": 417, "train_info/time_within_train_step": 27.811589002609253, "step": 417} +{"train_info/time_between_train_steps": 0.005408763885498047, "step": 417} +{"info/global_step": 418, "train_info/time_within_train_step": 27.778311252593994, "step": 418} +{"train_info/time_between_train_steps": 0.010835647583007812, "step": 418} +{"info/global_step": 419, "train_info/time_within_train_step": 28.02453351020813, "step": 419} +{"train_info/time_between_train_steps": 0.010812044143676758, "step": 419} +{"info/global_step": 420, "train_info/time_within_train_step": 27.954182863235474, "step": 420} +{"train_info/time_between_train_steps": 0.0056951045989990234, "step": 420} +{"info/global_step": 421, "train_info/time_within_train_step": 27.99918794631958, "step": 421} +{"train_info/time_between_train_steps": 0.015784502029418945, "step": 421} +{"info/global_step": 422, "train_info/time_within_train_step": 28.125169277191162, "step": 422} +{"train_info/time_between_train_steps": 0.010532140731811523, "step": 422} +{"info/global_step": 423, "train_info/time_within_train_step": 27.97251319885254, "step": 423} +{"train_info/time_between_train_steps": 0.015547752380371094, "step": 423} +{"info/global_step": 424, "train_info/time_within_train_step": 27.90796971321106, "step": 424} +{"train_info/time_between_train_steps": 0.005474090576171875, "step": 424} +{"info/global_step": 425, "train_info/time_within_train_step": 27.993440628051758, "step": 425} +{"train_info/time_between_train_steps": 0.00554203987121582, "step": 425} +{"info/global_step": 426, "train_info/time_within_train_step": 27.88852620124817, "step": 426} +{"train_info/time_between_train_steps": 0.0110321044921875, "step": 426} +{"info/global_step": 427, "train_info/time_within_train_step": 27.889010667800903, "step": 427} +{"train_info/time_between_train_steps": 0.00596928596496582, "step": 427} +{"info/global_step": 428, "train_info/time_within_train_step": 27.843730926513672, "step": 428} +{"train_info/time_between_train_steps": 0.011341094970703125, "step": 428} +{"info/global_step": 429, "train_info/time_within_train_step": 27.837457418441772, "step": 429} +{"train_info/time_between_train_steps": 0.010885477066040039, "step": 429} +{"info/global_step": 430, "train_info/time_within_train_step": 27.976147413253784, "step": 430} +{"train_info/time_between_train_steps": 0.0062601566314697266, "step": 430} +{"info/global_step": 431, "train_info/time_within_train_step": 27.963362455368042, "step": 431} +{"train_info/time_between_train_steps": 0.011993408203125, "step": 431} +{"info/global_step": 432, "train_info/time_within_train_step": 27.86529231071472, "step": 432} +{"train_info/time_between_train_steps": 0.007036924362182617, "step": 432} +{"train_info/time_between_train_steps": 4.097571611404419, "step": 432} +{"info/global_step": 433, "train_info/time_within_train_step": 27.899046182632446, "step": 433} +{"train_info/time_between_train_steps": 0.00640869140625, "step": 433} +{"info/global_step": 434, "train_info/time_within_train_step": 28.04303812980652, "step": 434} +{"train_info/time_between_train_steps": 0.005834341049194336, "step": 434} +{"info/global_step": 435, "train_info/time_within_train_step": 27.987956285476685, "step": 435} +{"train_info/time_between_train_steps": 0.011281967163085938, "step": 435} +{"info/global_step": 436, "train_info/time_within_train_step": 27.98580551147461, "step": 436} +{"train_info/time_between_train_steps": 0.0057294368743896484, "step": 436} +{"info/global_step": 437, "train_info/time_within_train_step": 28.00100874900818, "step": 437} +{"train_info/time_between_train_steps": 0.006120920181274414, "step": 437} +{"info/global_step": 438, "train_info/time_within_train_step": 28.02282476425171, "step": 438} +{"train_info/time_between_train_steps": 0.0061380863189697266, "step": 438} +{"info/global_step": 439, "train_info/time_within_train_step": 27.782308101654053, "step": 439} +{"train_info/time_between_train_steps": 0.0062406063079833984, "step": 439} +{"info/global_step": 440, "train_info/time_within_train_step": 27.88168954849243, "step": 440} +{"train_info/time_between_train_steps": 0.005657196044921875, "step": 440} +{"info/global_step": 441, "train_info/time_within_train_step": 27.763890027999878, "step": 441} +{"train_info/time_between_train_steps": 0.0055522918701171875, "step": 441} +{"info/global_step": 442, "train_info/time_within_train_step": 27.909388780593872, "step": 442} +{"train_info/time_between_train_steps": 0.005713224411010742, "step": 442} +{"info/global_step": 443, "train_info/time_within_train_step": 27.832195043563843, "step": 443} +{"train_info/time_between_train_steps": 0.010182619094848633, "step": 443} +{"info/global_step": 444, "train_info/time_within_train_step": 27.894620418548584, "step": 444} +{"train_info/time_between_train_steps": 0.005457878112792969, "step": 444} +{"info/global_step": 445, "train_info/time_within_train_step": 27.940269947052002, "step": 445} +{"train_info/time_between_train_steps": 0.005577564239501953, "step": 445} +{"info/global_step": 446, "train_info/time_within_train_step": 27.79808282852173, "step": 446} +{"train_info/time_between_train_steps": 0.011091470718383789, "step": 446} +{"info/global_step": 447, "train_info/time_within_train_step": 27.89437508583069, "step": 447} +{"train_info/time_between_train_steps": 0.0057756900787353516, "step": 447} +{"info/global_step": 448, "train_info/time_within_train_step": 27.975953102111816, "step": 448} +{"train_info/time_between_train_steps": 0.006109476089477539, "step": 448} +{"info/global_step": 449, "train_info/time_within_train_step": 27.979410648345947, "step": 449} +{"train_info/time_between_train_steps": 0.006656646728515625, "step": 449} +{"info/global_step": 450, "train_info/time_within_train_step": 27.840675592422485, "step": 450} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1733966936, "_runtime": 12671}, "step": 450} +{"logs": {"train/loss": 4.7766, "train/learning_rate": 0.00041666666666666664, "train/epoch": 16.02, "_timestamp": 1733966936, "_runtime": 12671}, "step": 450} +{"train_info/time_between_train_steps": 0.007503986358642578, "step": 450} +{"info/global_step": 451, "train_info/time_within_train_step": 28.024588346481323, "step": 451} +{"train_info/time_between_train_steps": 0.013077497482299805, "step": 451} +{"info/global_step": 452, "train_info/time_within_train_step": 27.934088706970215, "step": 452} +{"train_info/time_between_train_steps": 0.010646820068359375, "step": 452} +{"info/global_step": 453, "train_info/time_within_train_step": 27.958449840545654, "step": 453} +{"train_info/time_between_train_steps": 0.005457639694213867, "step": 453} +{"info/global_step": 454, "train_info/time_within_train_step": 27.84179401397705, "step": 454} +{"train_info/time_between_train_steps": 0.016277551651000977, "step": 454} +{"info/global_step": 455, "train_info/time_within_train_step": 27.866722345352173, "step": 455} +{"train_info/time_between_train_steps": 0.006010770797729492, "step": 455} +{"info/global_step": 456, "train_info/time_within_train_step": 27.923765897750854, "step": 456} +{"train_info/time_between_train_steps": 0.005949497222900391, "step": 456} +{"info/global_step": 457, "train_info/time_within_train_step": 27.93302822113037, "step": 457} +{"train_info/time_between_train_steps": 0.0062520503997802734, "step": 457} +{"info/global_step": 458, "train_info/time_within_train_step": 27.915688514709473, "step": 458} +{"train_info/time_between_train_steps": 0.010891437530517578, "step": 458} +{"info/global_step": 459, "train_info/time_within_train_step": 27.968697547912598, "step": 459} +{"train_info/time_between_train_steps": 0.006744861602783203, "step": 459} +{"train_info/time_between_train_steps": 3.818096160888672, "step": 459} +{"info/global_step": 460, "train_info/time_within_train_step": 27.91257953643799, "step": 460} +{"train_info/time_between_train_steps": 0.005381107330322266, "step": 460} +{"info/global_step": 461, "train_info/time_within_train_step": 28.054479837417603, "step": 461} +{"train_info/time_between_train_steps": 0.0056154727935791016, "step": 461} +{"info/global_step": 462, "train_info/time_within_train_step": 27.94524049758911, "step": 462} +{"train_info/time_between_train_steps": 0.010783195495605469, "step": 462} +{"info/global_step": 463, "train_info/time_within_train_step": 28.008689641952515, "step": 463} +{"train_info/time_between_train_steps": 0.005992889404296875, "step": 463} +{"info/global_step": 464, "train_info/time_within_train_step": 27.94476866722107, "step": 464} +{"train_info/time_between_train_steps": 0.0063800811767578125, "step": 464} +{"info/global_step": 465, "train_info/time_within_train_step": 27.960994482040405, "step": 465} +{"train_info/time_between_train_steps": 0.0067446231842041016, "step": 465} +{"info/global_step": 466, "train_info/time_within_train_step": 27.888354063034058, "step": 466} +{"train_info/time_between_train_steps": 0.006256103515625, "step": 466} +{"info/global_step": 467, "train_info/time_within_train_step": 27.82548952102661, "step": 467} +{"train_info/time_between_train_steps": 0.005709409713745117, "step": 467} +{"info/global_step": 468, "train_info/time_within_train_step": 27.793076992034912, "step": 468} +{"train_info/time_between_train_steps": 0.005589485168457031, "step": 468} +{"info/global_step": 469, "train_info/time_within_train_step": 28.01112961769104, "step": 469} +{"train_info/time_between_train_steps": 0.01080322265625, "step": 469} +{"info/global_step": 470, "train_info/time_within_train_step": 27.841110229492188, "step": 470} +{"train_info/time_between_train_steps": 0.005803108215332031, "step": 470} +{"info/global_step": 471, "train_info/time_within_train_step": 27.82098937034607, "step": 471} +{"train_info/time_between_train_steps": 0.005753517150878906, "step": 471} +{"info/global_step": 472, "train_info/time_within_train_step": 27.86390471458435, "step": 472} +{"train_info/time_between_train_steps": 0.011265039443969727, "step": 472} +{"info/global_step": 473, "train_info/time_within_train_step": 27.79002594947815, "step": 473} +{"train_info/time_between_train_steps": 0.005948066711425781, "step": 473} +{"info/global_step": 474, "train_info/time_within_train_step": 27.863248348236084, "step": 474} +{"train_info/time_between_train_steps": 0.005667448043823242, "step": 474} +{"info/global_step": 475, "train_info/time_within_train_step": 28.10631036758423, "step": 475} +{"train_info/time_between_train_steps": 0.005855560302734375, "step": 475} +{"info/global_step": 476, "train_info/time_within_train_step": 28.223978519439697, "step": 476} +{"train_info/time_between_train_steps": 0.010492563247680664, "step": 476} +{"info/global_step": 477, "train_info/time_within_train_step": 27.937723875045776, "step": 477} +{"train_info/time_between_train_steps": 0.016745805740356445, "step": 477} +{"info/global_step": 478, "train_info/time_within_train_step": 27.985267162322998, "step": 478} +{"train_info/time_between_train_steps": 0.005663394927978516, "step": 478} +{"info/global_step": 479, "train_info/time_within_train_step": 27.89673113822937, "step": 479} +{"train_info/time_between_train_steps": 0.017071008682250977, "step": 479} +{"info/global_step": 480, "train_info/time_within_train_step": 27.869428634643555, "step": 480} +{"train_info/time_between_train_steps": 0.005771160125732422, "step": 480} +{"info/global_step": 481, "train_info/time_within_train_step": 28.041688203811646, "step": 481} +{"train_info/time_between_train_steps": 0.015516519546508789, "step": 481} +{"info/global_step": 482, "train_info/time_within_train_step": 27.810519456863403, "step": 482} +{"train_info/time_between_train_steps": 0.005975008010864258, "step": 482} +{"info/global_step": 483, "train_info/time_within_train_step": 27.838212728500366, "step": 483} +{"train_info/time_between_train_steps": 0.005839347839355469, "step": 483} +{"info/global_step": 484, "train_info/time_within_train_step": 27.97121572494507, "step": 484} +{"train_info/time_between_train_steps": 0.01758122444152832, "step": 484} +{"info/global_step": 485, "train_info/time_within_train_step": 27.974843502044678, "step": 485} +{"train_info/time_between_train_steps": 0.00619959831237793, "step": 485} +{"info/global_step": 486, "train_info/time_within_train_step": 27.826489210128784, "step": 486} +{"train_info/time_between_train_steps": 0.012028694152832031, "step": 486} +{"train_info/time_between_train_steps": 3.909219741821289, "step": 486} +{"info/global_step": 487, "train_info/time_within_train_step": 28.03011918067932, "step": 487} +{"train_info/time_between_train_steps": 0.005447864532470703, "step": 487} +{"info/global_step": 488, "train_info/time_within_train_step": 28.02645444869995, "step": 488} +{"train_info/time_between_train_steps": 0.005455732345581055, "step": 488} +{"info/global_step": 489, "train_info/time_within_train_step": 28.003999948501587, "step": 489} +{"train_info/time_between_train_steps": 0.0055751800537109375, "step": 489} +{"info/global_step": 490, "train_info/time_within_train_step": 27.981616258621216, "step": 490} +{"train_info/time_between_train_steps": 0.01179957389831543, "step": 490} +{"info/global_step": 491, "train_info/time_within_train_step": 27.761273622512817, "step": 491} +{"train_info/time_between_train_steps": 0.005509138107299805, "step": 491} +{"info/global_step": 492, "train_info/time_within_train_step": 28.014715433120728, "step": 492} +{"train_info/time_between_train_steps": 0.005782365798950195, "step": 492} +{"info/global_step": 493, "train_info/time_within_train_step": 27.955476999282837, "step": 493} +{"train_info/time_between_train_steps": 0.006031036376953125, "step": 493} +{"info/global_step": 494, "train_info/time_within_train_step": 27.96413254737854, "step": 494} +{"train_info/time_between_train_steps": 0.011128902435302734, "step": 494} +{"info/global_step": 495, "train_info/time_within_train_step": 27.800493955612183, "step": 495} +{"train_info/time_between_train_steps": 0.005598306655883789, "step": 495} +{"info/global_step": 496, "train_info/time_within_train_step": 27.8407723903656, "step": 496} +{"train_info/time_between_train_steps": 0.005766391754150391, "step": 496} +{"info/global_step": 497, "train_info/time_within_train_step": 27.809658527374268, "step": 497} +{"train_info/time_between_train_steps": 0.008385181427001953, "step": 497} +{"info/global_step": 498, "train_info/time_within_train_step": 27.763856649398804, "step": 498} +{"train_info/time_between_train_steps": 0.0054264068603515625, "step": 498} +{"info/global_step": 499, "train_info/time_within_train_step": 27.834722757339478, "step": 499} +{"train_info/time_between_train_steps": 0.005445241928100586, "step": 499} +{"info/global_step": 500, "train_info/time_within_train_step": 27.866363048553467, "step": 500} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1733968340, "_runtime": 14075}, "step": 500} +{"logs": {"train/loss": 4.6253, "train/learning_rate": 0.00038888888888888887, "train/epoch": 18.01, "_timestamp": 1733968340, "_runtime": 14075}, "step": 500} +{"train_info/time_between_train_steps": 2.3379359245300293, "step": 500} +{"info/global_step": 501, "train_info/time_within_train_step": 27.898356199264526, "step": 501} +{"train_info/time_between_train_steps": 0.006066560745239258, "step": 501} +{"info/global_step": 502, "train_info/time_within_train_step": 27.897268772125244, "step": 502} +{"train_info/time_between_train_steps": 0.010944604873657227, "step": 502} +{"info/global_step": 503, "train_info/time_within_train_step": 27.925435304641724, "step": 503} +{"train_info/time_between_train_steps": 0.006060361862182617, "step": 503} +{"info/global_step": 504, "train_info/time_within_train_step": 27.84625244140625, "step": 504} +{"train_info/time_between_train_steps": 0.0058231353759765625, "step": 504} +{"info/global_step": 505, "train_info/time_within_train_step": 28.048436164855957, "step": 505} +{"train_info/time_between_train_steps": 0.010964632034301758, "step": 505} +{"info/global_step": 506, "train_info/time_within_train_step": 27.8687801361084, "step": 506} +{"train_info/time_between_train_steps": 0.011220932006835938, "step": 506} +{"info/global_step": 507, "train_info/time_within_train_step": 27.943260431289673, "step": 507} +{"train_info/time_between_train_steps": 0.0075533390045166016, "step": 507} +{"info/global_step": 508, "train_info/time_within_train_step": 27.835200786590576, "step": 508} +{"train_info/time_between_train_steps": 0.00579524040222168, "step": 508} +{"info/global_step": 509, "train_info/time_within_train_step": 27.901549577713013, "step": 509} +{"train_info/time_between_train_steps": 0.015167951583862305, "step": 509} +{"info/global_step": 510, "train_info/time_within_train_step": 27.942033529281616, "step": 510} +{"train_info/time_between_train_steps": 0.011053323745727539, "step": 510} +{"info/global_step": 511, "train_info/time_within_train_step": 27.87360644340515, "step": 511} +{"train_info/time_between_train_steps": 0.005839109420776367, "step": 511} +{"info/global_step": 512, "train_info/time_within_train_step": 27.91954755783081, "step": 512} +{"train_info/time_between_train_steps": 0.017276287078857422, "step": 512} +{"info/global_step": 513, "train_info/time_within_train_step": 27.903677463531494, "step": 513} +{"train_info/time_between_train_steps": 0.006490230560302734, "step": 513} +{"train_info/time_between_train_steps": 4.086702346801758, "step": 513} +{"info/global_step": 514, "train_info/time_within_train_step": 28.017974376678467, "step": 514} +{"train_info/time_between_train_steps": 0.041614532470703125, "step": 514} +{"info/global_step": 515, "train_info/time_within_train_step": 28.113981246948242, "step": 515} +{"train_info/time_between_train_steps": 0.005818367004394531, "step": 515} +{"info/global_step": 516, "train_info/time_within_train_step": 27.93158769607544, "step": 516} +{"train_info/time_between_train_steps": 0.0059049129486083984, "step": 516} +{"info/global_step": 517, "train_info/time_within_train_step": 28.272931575775146, "step": 517} +{"train_info/time_between_train_steps": 0.005804538726806641, "step": 517} +{"info/global_step": 518, "train_info/time_within_train_step": 27.83115816116333, "step": 518} +{"train_info/time_between_train_steps": 0.005632638931274414, "step": 518} +{"info/global_step": 519, "train_info/time_within_train_step": 27.902294397354126, "step": 519} +{"train_info/time_between_train_steps": 0.016197919845581055, "step": 519} +{"info/global_step": 520, "train_info/time_within_train_step": 27.813724994659424, "step": 520} +{"train_info/time_between_train_steps": 0.005995273590087891, "step": 520} +{"info/global_step": 521, "train_info/time_within_train_step": 27.868725776672363, "step": 521} +{"train_info/time_between_train_steps": 0.005880594253540039, "step": 521} +{"info/global_step": 522, "train_info/time_within_train_step": 27.78829073905945, "step": 522} +{"train_info/time_between_train_steps": 0.005658864974975586, "step": 522} +{"info/global_step": 523, "train_info/time_within_train_step": 27.854447603225708, "step": 523} +{"train_info/time_between_train_steps": 0.005652427673339844, "step": 523} +{"info/global_step": 524, "train_info/time_within_train_step": 27.958773612976074, "step": 524} +{"train_info/time_between_train_steps": 0.005671977996826172, "step": 524} +{"info/global_step": 525, "train_info/time_within_train_step": 27.896942615509033, "step": 525} +{"train_info/time_between_train_steps": 0.011109113693237305, "step": 525} +{"info/global_step": 526, "train_info/time_within_train_step": 27.82414436340332, "step": 526} +{"train_info/time_between_train_steps": 0.005618572235107422, "step": 526} +{"info/global_step": 527, "train_info/time_within_train_step": 27.758033275604248, "step": 527} +{"train_info/time_between_train_steps": 0.0056073665618896484, "step": 527} +{"info/global_step": 528, "train_info/time_within_train_step": 27.828810453414917, "step": 528} +{"train_info/time_between_train_steps": 0.005554676055908203, "step": 528} +{"info/global_step": 529, "train_info/time_within_train_step": 27.80906915664673, "step": 529} +{"train_info/time_between_train_steps": 0.0057201385498046875, "step": 529} +{"info/global_step": 530, "train_info/time_within_train_step": 27.827876806259155, "step": 530} +{"train_info/time_between_train_steps": 0.005715131759643555, "step": 530} +{"info/global_step": 531, "train_info/time_within_train_step": 28.084166288375854, "step": 531} +{"train_info/time_between_train_steps": 0.016163110733032227, "step": 531} +{"info/global_step": 532, "train_info/time_within_train_step": 27.826380252838135, "step": 532} +{"train_info/time_between_train_steps": 0.00575709342956543, "step": 532} +{"info/global_step": 533, "train_info/time_within_train_step": 28.00854182243347, "step": 533} +{"train_info/time_between_train_steps": 0.00551152229309082, "step": 533} +{"info/global_step": 534, "train_info/time_within_train_step": 27.817338228225708, "step": 534} +{"train_info/time_between_train_steps": 0.0057599544525146484, "step": 534} +{"info/global_step": 535, "train_info/time_within_train_step": 27.849898099899292, "step": 535} +{"train_info/time_between_train_steps": 0.010464668273925781, "step": 535} +{"info/global_step": 536, "train_info/time_within_train_step": 27.784070014953613, "step": 536} +{"train_info/time_between_train_steps": 0.009347200393676758, "step": 536} +{"info/global_step": 537, "train_info/time_within_train_step": 27.861976623535156, "step": 537} +{"train_info/time_between_train_steps": 0.006014585494995117, "step": 537} +{"info/global_step": 538, "train_info/time_within_train_step": 27.97955822944641, "step": 538} +{"train_info/time_between_train_steps": 0.014792203903198242, "step": 538} +{"info/global_step": 539, "train_info/time_within_train_step": 28.00340175628662, "step": 539} +{"train_info/time_between_train_steps": 0.006221294403076172, "step": 539} +{"info/global_step": 540, "train_info/time_within_train_step": 27.892431020736694, "step": 540} +{"train_info/time_between_train_steps": 0.006946563720703125, "step": 540} +{"train_info/time_between_train_steps": 4.209256887435913, "step": 540} +{"info/global_step": 541, "train_info/time_within_train_step": 27.88236355781555, "step": 541} +{"train_info/time_between_train_steps": 0.005343198776245117, "step": 541} +{"info/global_step": 542, "train_info/time_within_train_step": 27.99750256538391, "step": 542} +{"train_info/time_between_train_steps": 0.00959014892578125, "step": 542} +{"info/global_step": 543, "train_info/time_within_train_step": 27.809817790985107, "step": 543} +{"train_info/time_between_train_steps": 0.016113996505737305, "step": 543} +{"info/global_step": 544, "train_info/time_within_train_step": 27.993309259414673, "step": 544} +{"train_info/time_between_train_steps": 0.016232728958129883, "step": 544} +{"info/global_step": 545, "train_info/time_within_train_step": 27.90681004524231, "step": 545} +{"train_info/time_between_train_steps": 0.010787248611450195, "step": 545} +{"info/global_step": 546, "train_info/time_within_train_step": 28.108042240142822, "step": 546} +{"train_info/time_between_train_steps": 0.011537551879882812, "step": 546} +{"info/global_step": 547, "train_info/time_within_train_step": 27.890199184417725, "step": 547} +{"train_info/time_between_train_steps": 0.00571441650390625, "step": 547} +{"info/global_step": 548, "train_info/time_within_train_step": 27.8919780254364, "step": 548} +{"train_info/time_between_train_steps": 0.005415439605712891, "step": 548} +{"info/global_step": 549, "train_info/time_within_train_step": 27.90893840789795, "step": 549} +{"train_info/time_between_train_steps": 0.0053670406341552734, "step": 549} +{"info/global_step": 550, "train_info/time_within_train_step": 27.812329530715942, "step": 550} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1733969746, "_runtime": 15481}, "step": 550} +{"logs": {"train/loss": 4.434, "train/learning_rate": 0.0003611111111111111, "train/epoch": 20.01, "_timestamp": 1733969746, "_runtime": 15481}, "step": 550} +{"train_info/time_between_train_steps": 0.01372385025024414, "step": 550} +{"info/global_step": 551, "train_info/time_within_train_step": 27.820843935012817, "step": 551} +{"train_info/time_between_train_steps": 0.00574803352355957, "step": 551} +{"info/global_step": 552, "train_info/time_within_train_step": 27.78736138343811, "step": 552} +{"train_info/time_between_train_steps": 0.006474971771240234, "step": 552} +{"info/global_step": 553, "train_info/time_within_train_step": 27.97497057914734, "step": 553} +{"train_info/time_between_train_steps": 0.010966300964355469, "step": 553} +{"info/global_step": 554, "train_info/time_within_train_step": 27.90937352180481, "step": 554} +{"train_info/time_between_train_steps": 0.005640745162963867, "step": 554} +{"info/global_step": 555, "train_info/time_within_train_step": 27.80729603767395, "step": 555} +{"train_info/time_between_train_steps": 0.0053997039794921875, "step": 555} +{"info/global_step": 556, "train_info/time_within_train_step": 27.87872314453125, "step": 556} +{"train_info/time_between_train_steps": 0.005645275115966797, "step": 556} +{"info/global_step": 557, "train_info/time_within_train_step": 27.851693868637085, "step": 557} +{"train_info/time_between_train_steps": 0.007900238037109375, "step": 557} +{"info/global_step": 558, "train_info/time_within_train_step": 27.842098712921143, "step": 558} +{"train_info/time_between_train_steps": 0.0054607391357421875, "step": 558} +{"info/global_step": 559, "train_info/time_within_train_step": 27.920915842056274, "step": 559} +{"train_info/time_between_train_steps": 0.005903959274291992, "step": 559} +{"info/global_step": 560, "train_info/time_within_train_step": 27.937583684921265, "step": 560} +{"train_info/time_between_train_steps": 0.0056040287017822266, "step": 560} +{"info/global_step": 561, "train_info/time_within_train_step": 27.890241384506226, "step": 561} +{"train_info/time_between_train_steps": 0.005618572235107422, "step": 561} +{"info/global_step": 562, "train_info/time_within_train_step": 27.9260356426239, "step": 562} +{"train_info/time_between_train_steps": 0.006131887435913086, "step": 562} +{"info/global_step": 563, "train_info/time_within_train_step": 27.894007921218872, "step": 563} +{"train_info/time_between_train_steps": 0.005539894104003906, "step": 563} +{"info/global_step": 564, "train_info/time_within_train_step": 27.964881896972656, "step": 564} +{"train_info/time_between_train_steps": 0.005877256393432617, "step": 564} +{"info/global_step": 565, "train_info/time_within_train_step": 27.96068263053894, "step": 565} +{"train_info/time_between_train_steps": 0.005849123001098633, "step": 565} +{"info/global_step": 566, "train_info/time_within_train_step": 27.965462923049927, "step": 566} +{"train_info/time_between_train_steps": 0.006489992141723633, "step": 566} +{"info/global_step": 567, "train_info/time_within_train_step": 27.995410442352295, "step": 567} +{"train_info/time_between_train_steps": 0.007025241851806641, "step": 567} +{"train_info/time_between_train_steps": 3.805143356323242, "step": 567} +{"info/global_step": 568, "train_info/time_within_train_step": 27.874669313430786, "step": 568} +{"train_info/time_between_train_steps": 0.006142139434814453, "step": 568} +{"info/global_step": 569, "train_info/time_within_train_step": 27.959585428237915, "step": 569} +{"train_info/time_between_train_steps": 0.005998373031616211, "step": 569} +{"info/global_step": 570, "train_info/time_within_train_step": 27.95555543899536, "step": 570} +{"train_info/time_between_train_steps": 0.005943775177001953, "step": 570} +{"info/global_step": 571, "train_info/time_within_train_step": 27.989529371261597, "step": 571} +{"train_info/time_between_train_steps": 0.015942096710205078, "step": 571} +{"info/global_step": 572, "train_info/time_within_train_step": 27.962081909179688, "step": 572} +{"train_info/time_between_train_steps": 0.00623011589050293, "step": 572} +{"info/global_step": 573, "train_info/time_within_train_step": 27.9905047416687, "step": 573} +{"train_info/time_between_train_steps": 0.0058214664459228516, "step": 573} +{"info/global_step": 574, "train_info/time_within_train_step": 27.76459503173828, "step": 574} +{"train_info/time_between_train_steps": 0.005639791488647461, "step": 574} +{"info/global_step": 575, "train_info/time_within_train_step": 27.82093906402588, "step": 575} +{"train_info/time_between_train_steps": 0.0053751468658447266, "step": 575} +{"info/global_step": 576, "train_info/time_within_train_step": 27.89792013168335, "step": 576} +{"train_info/time_between_train_steps": 0.0062427520751953125, "step": 576} +{"info/global_step": 577, "train_info/time_within_train_step": 27.93362593650818, "step": 577} +{"train_info/time_between_train_steps": 0.005517005920410156, "step": 577} +{"info/global_step": 578, "train_info/time_within_train_step": 27.964374542236328, "step": 578} +{"train_info/time_between_train_steps": 0.005555152893066406, "step": 578} +{"info/global_step": 579, "train_info/time_within_train_step": 27.75411105155945, "step": 579} +{"train_info/time_between_train_steps": 0.005865812301635742, "step": 579} +{"info/global_step": 580, "train_info/time_within_train_step": 27.85348081588745, "step": 580} +{"train_info/time_between_train_steps": 0.014738798141479492, "step": 580} +{"info/global_step": 581, "train_info/time_within_train_step": 27.889742851257324, "step": 581} +{"train_info/time_between_train_steps": 0.01400446891784668, "step": 581} +{"info/global_step": 582, "train_info/time_within_train_step": 27.986821174621582, "step": 582} +{"train_info/time_between_train_steps": 0.010656595230102539, "step": 582} +{"info/global_step": 583, "train_info/time_within_train_step": 27.909281492233276, "step": 583} +{"train_info/time_between_train_steps": 0.005738973617553711, "step": 583} +{"info/global_step": 584, "train_info/time_within_train_step": 27.898415088653564, "step": 584} +{"train_info/time_between_train_steps": 0.005730628967285156, "step": 584} +{"info/global_step": 585, "train_info/time_within_train_step": 27.896716833114624, "step": 585} +{"train_info/time_between_train_steps": 0.011055469512939453, "step": 585} +{"info/global_step": 586, "train_info/time_within_train_step": 27.957385063171387, "step": 586} +{"train_info/time_between_train_steps": 0.01616525650024414, "step": 586} +{"info/global_step": 587, "train_info/time_within_train_step": 27.838863134384155, "step": 587} +{"train_info/time_between_train_steps": 0.005880117416381836, "step": 587} +{"info/global_step": 588, "train_info/time_within_train_step": 27.797232389450073, "step": 588} +{"train_info/time_between_train_steps": 0.011579275131225586, "step": 588} +{"info/global_step": 589, "train_info/time_within_train_step": 27.854387521743774, "step": 589} +{"train_info/time_between_train_steps": 0.005666017532348633, "step": 589} +{"info/global_step": 590, "train_info/time_within_train_step": 27.849857807159424, "step": 590} +{"train_info/time_between_train_steps": 0.005752086639404297, "step": 590} +{"info/global_step": 591, "train_info/time_within_train_step": 28.01837396621704, "step": 591} +{"train_info/time_between_train_steps": 0.011369705200195312, "step": 591} +{"info/global_step": 592, "train_info/time_within_train_step": 27.95079755783081, "step": 592} +{"train_info/time_between_train_steps": 0.005972385406494141, "step": 592} +{"info/global_step": 593, "train_info/time_within_train_step": 27.855345964431763, "step": 593} +{"train_info/time_between_train_steps": 0.007697105407714844, "step": 593} +{"info/global_step": 594, "train_info/time_within_train_step": 28.00795006752014, "step": 594} +{"train_info/time_between_train_steps": 0.006716489791870117, "step": 594} +{"train_info/time_between_train_steps": 4.072153568267822, "step": 594} +{"info/global_step": 595, "train_info/time_within_train_step": 27.87225317955017, "step": 595} +{"train_info/time_between_train_steps": 0.0053653717041015625, "step": 595} +{"info/global_step": 596, "train_info/time_within_train_step": 28.13224482536316, "step": 596} +{"train_info/time_between_train_steps": 0.005811929702758789, "step": 596} +{"info/global_step": 597, "train_info/time_within_train_step": 27.830488920211792, "step": 597} +{"train_info/time_between_train_steps": 0.011422157287597656, "step": 597} +{"info/global_step": 598, "train_info/time_within_train_step": 28.074970483779907, "step": 598} +{"train_info/time_between_train_steps": 0.010759592056274414, "step": 598} +{"info/global_step": 599, "train_info/time_within_train_step": 27.772303342819214, "step": 599} +{"train_info/time_between_train_steps": 0.00592350959777832, "step": 599} +{"info/global_step": 600, "train_info/time_within_train_step": 27.916251182556152, "step": 600} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1733971150, "_runtime": 16885}, "step": 600} +{"logs": {"train/loss": 4.2692, "train/learning_rate": 0.0003333333333333333, "train/epoch": 22.0, "_timestamp": 1733971150, "_runtime": 16885}, "step": 600} +{"train_info/time_between_train_steps": 2.389683246612549, "step": 600} +{"info/global_step": 601, "train_info/time_within_train_step": 27.84782338142395, "step": 601} +{"train_info/time_between_train_steps": 0.005631446838378906, "step": 601} +{"info/global_step": 602, "train_info/time_within_train_step": 27.774192571640015, "step": 602} +{"train_info/time_between_train_steps": 0.0054018497467041016, "step": 602} +{"info/global_step": 603, "train_info/time_within_train_step": 27.756919145584106, "step": 603} +{"train_info/time_between_train_steps": 0.00556492805480957, "step": 603} +{"info/global_step": 604, "train_info/time_within_train_step": 27.9267520904541, "step": 604} +{"train_info/time_between_train_steps": 0.005709171295166016, "step": 604} +{"info/global_step": 605, "train_info/time_within_train_step": 27.781913995742798, "step": 605} +{"train_info/time_between_train_steps": 0.005692243576049805, "step": 605} +{"info/global_step": 606, "train_info/time_within_train_step": 27.782544374465942, "step": 606} +{"train_info/time_between_train_steps": 0.0058193206787109375, "step": 606} +{"info/global_step": 607, "train_info/time_within_train_step": 27.824861526489258, "step": 607} +{"train_info/time_between_train_steps": 0.010935068130493164, "step": 607} +{"info/global_step": 608, "train_info/time_within_train_step": 27.961413860321045, "step": 608} +{"train_info/time_between_train_steps": 0.005721092224121094, "step": 608} +{"info/global_step": 609, "train_info/time_within_train_step": 27.904898405075073, "step": 609} +{"train_info/time_between_train_steps": 0.01311492919921875, "step": 609} +{"info/global_step": 610, "train_info/time_within_train_step": 27.96992802619934, "step": 610} +{"train_info/time_between_train_steps": 0.0054743289947509766, "step": 610} +{"info/global_step": 611, "train_info/time_within_train_step": 27.99239420890808, "step": 611} +{"train_info/time_between_train_steps": 0.00588536262512207, "step": 611} +{"info/global_step": 612, "train_info/time_within_train_step": 27.87205410003662, "step": 612} +{"train_info/time_between_train_steps": 0.010022878646850586, "step": 612} +{"info/global_step": 613, "train_info/time_within_train_step": 27.902284145355225, "step": 613} +{"train_info/time_between_train_steps": 0.006434202194213867, "step": 613} +{"info/global_step": 614, "train_info/time_within_train_step": 27.868959665298462, "step": 614} +{"train_info/time_between_train_steps": 0.011216402053833008, "step": 614} +{"info/global_step": 615, "train_info/time_within_train_step": 27.87202787399292, "step": 615} +{"train_info/time_between_train_steps": 0.005473613739013672, "step": 615} +{"info/global_step": 616, "train_info/time_within_train_step": 27.903727531433105, "step": 616} +{"train_info/time_between_train_steps": 0.0055921077728271484, "step": 616} +{"info/global_step": 617, "train_info/time_within_train_step": 27.905792474746704, "step": 617} +{"train_info/time_between_train_steps": 0.011097908020019531, "step": 617} +{"info/global_step": 618, "train_info/time_within_train_step": 27.85850977897644, "step": 618} +{"train_info/time_between_train_steps": 0.0055980682373046875, "step": 618} +{"info/global_step": 619, "train_info/time_within_train_step": 27.834740161895752, "step": 619} +{"train_info/time_between_train_steps": 0.005901813507080078, "step": 619} +{"info/global_step": 620, "train_info/time_within_train_step": 27.94474482536316, "step": 620} +{"train_info/time_between_train_steps": 0.011226415634155273, "step": 620} +{"info/global_step": 621, "train_info/time_within_train_step": 27.9850070476532, "step": 621} +{"train_info/time_between_train_steps": 0.01699042320251465, "step": 621} +{"train_info/time_between_train_steps": 4.526198625564575, "step": 621} +{"info/global_step": 622, "train_info/time_within_train_step": 27.905194997787476, "step": 622} +{"train_info/time_between_train_steps": 0.005875349044799805, "step": 622} +{"info/global_step": 623, "train_info/time_within_train_step": 27.94278860092163, "step": 623} +{"train_info/time_between_train_steps": 0.005580425262451172, "step": 623} +{"info/global_step": 624, "train_info/time_within_train_step": 27.91780996322632, "step": 624} +{"train_info/time_between_train_steps": 0.005743741989135742, "step": 624} +{"info/global_step": 625, "train_info/time_within_train_step": 28.092061519622803, "step": 625} +{"train_info/time_between_train_steps": 0.00561833381652832, "step": 625} +{"info/global_step": 626, "train_info/time_within_train_step": 27.96142554283142, "step": 626} +{"train_info/time_between_train_steps": 0.01628732681274414, "step": 626} +{"info/global_step": 627, "train_info/time_within_train_step": 28.021416425704956, "step": 627} +{"train_info/time_between_train_steps": 0.005808591842651367, "step": 627} +{"info/global_step": 628, "train_info/time_within_train_step": 27.89594793319702, "step": 628} +{"train_info/time_between_train_steps": 0.011033773422241211, "step": 628} +{"info/global_step": 629, "train_info/time_within_train_step": 27.863253355026245, "step": 629} +{"train_info/time_between_train_steps": 0.005490303039550781, "step": 629} +{"info/global_step": 630, "train_info/time_within_train_step": 27.81976008415222, "step": 630} +{"train_info/time_between_train_steps": 0.0057179927825927734, "step": 630} +{"info/global_step": 631, "train_info/time_within_train_step": 27.974774599075317, "step": 631} +{"train_info/time_between_train_steps": 0.005627632141113281, "step": 631} +{"info/global_step": 632, "train_info/time_within_train_step": 27.889068603515625, "step": 632} +{"train_info/time_between_train_steps": 0.010669708251953125, "step": 632} +{"info/global_step": 633, "train_info/time_within_train_step": 27.822086095809937, "step": 633} +{"train_info/time_between_train_steps": 0.00581049919128418, "step": 633} +{"info/global_step": 634, "train_info/time_within_train_step": 27.839391231536865, "step": 634} +{"train_info/time_between_train_steps": 0.005436420440673828, "step": 634} +{"info/global_step": 635, "train_info/time_within_train_step": 27.80168080329895, "step": 635} +{"train_info/time_between_train_steps": 0.005509376525878906, "step": 635} +{"info/global_step": 636, "train_info/time_within_train_step": 27.822558879852295, "step": 636} +{"train_info/time_between_train_steps": 0.005876779556274414, "step": 636} +{"info/global_step": 637, "train_info/time_within_train_step": 27.824554681777954, "step": 637} +{"train_info/time_between_train_steps": 0.014628887176513672, "step": 637} +{"info/global_step": 638, "train_info/time_within_train_step": 27.861207008361816, "step": 638} +{"train_info/time_between_train_steps": 0.005934476852416992, "step": 638} +{"info/global_step": 639, "train_info/time_within_train_step": 27.891244173049927, "step": 639} +{"train_info/time_between_train_steps": 0.005656242370605469, "step": 639} +{"info/global_step": 640, "train_info/time_within_train_step": 28.116899967193604, "step": 640} +{"train_info/time_between_train_steps": 0.005594968795776367, "step": 640} +{"info/global_step": 641, "train_info/time_within_train_step": 27.910458087921143, "step": 641} +{"train_info/time_between_train_steps": 0.005757808685302734, "step": 641} +{"info/global_step": 642, "train_info/time_within_train_step": 28.01794147491455, "step": 642} +{"train_info/time_between_train_steps": 0.00563812255859375, "step": 642} +{"info/global_step": 643, "train_info/time_within_train_step": 27.9520263671875, "step": 643} +{"train_info/time_between_train_steps": 0.01085352897644043, "step": 643} +{"info/global_step": 644, "train_info/time_within_train_step": 27.858365774154663, "step": 644} +{"train_info/time_between_train_steps": 0.005537509918212891, "step": 644} +{"info/global_step": 645, "train_info/time_within_train_step": 27.859195470809937, "step": 645} +{"train_info/time_between_train_steps": 0.005748748779296875, "step": 645} +{"info/global_step": 646, "train_info/time_within_train_step": 27.82340955734253, "step": 646} +{"train_info/time_between_train_steps": 0.006014585494995117, "step": 646} +{"info/global_step": 647, "train_info/time_within_train_step": 27.848172426223755, "step": 647} +{"train_info/time_between_train_steps": 0.005859851837158203, "step": 647} +{"info/global_step": 648, "train_info/time_within_train_step": 27.938292741775513, "step": 648} +{"train_info/time_between_train_steps": 0.01298975944519043, "step": 648} +{"train_info/time_between_train_steps": 3.8593499660491943, "step": 648} +{"info/global_step": 649, "train_info/time_within_train_step": 27.952720880508423, "step": 649} +{"train_info/time_between_train_steps": 0.010369300842285156, "step": 649} +{"info/global_step": 650, "train_info/time_within_train_step": 28.007278442382812, "step": 650} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1733972556, "_runtime": 18291}, "step": 650} +{"logs": {"train/loss": 4.1342, "train/learning_rate": 0.00030555555555555555, "train/epoch": 24.0, "_timestamp": 1733972556, "_runtime": 18291}, "step": 650} +{"train_info/time_between_train_steps": 0.007592678070068359, "step": 650} +{"info/global_step": 651, "train_info/time_within_train_step": 27.98900270462036, "step": 651} +{"train_info/time_between_train_steps": 0.00869297981262207, "step": 651} +{"info/global_step": 652, "train_info/time_within_train_step": 28.208701610565186, "step": 652} +{"train_info/time_between_train_steps": 0.0058825016021728516, "step": 652} +{"info/global_step": 653, "train_info/time_within_train_step": 27.923106908798218, "step": 653} +{"train_info/time_between_train_steps": 0.0061168670654296875, "step": 653} +{"info/global_step": 654, "train_info/time_within_train_step": 28.05739665031433, "step": 654} +{"train_info/time_between_train_steps": 0.005635738372802734, "step": 654} +{"info/global_step": 655, "train_info/time_within_train_step": 27.780192136764526, "step": 655} +{"train_info/time_between_train_steps": 0.005982398986816406, "step": 655} +{"info/global_step": 656, "train_info/time_within_train_step": 28.090128421783447, "step": 656} +{"train_info/time_between_train_steps": 0.0055389404296875, "step": 656} +{"info/global_step": 657, "train_info/time_within_train_step": 27.9757137298584, "step": 657} +{"train_info/time_between_train_steps": 0.00563359260559082, "step": 657} +{"info/global_step": 658, "train_info/time_within_train_step": 27.79888343811035, "step": 658} +{"train_info/time_between_train_steps": 0.013127326965332031, "step": 658} +{"info/global_step": 659, "train_info/time_within_train_step": 27.823981761932373, "step": 659} +{"train_info/time_between_train_steps": 0.005942344665527344, "step": 659} +{"info/global_step": 660, "train_info/time_within_train_step": 27.83306574821472, "step": 660} +{"train_info/time_between_train_steps": 0.006351947784423828, "step": 660} +{"info/global_step": 661, "train_info/time_within_train_step": 27.911402940750122, "step": 661} +{"train_info/time_between_train_steps": 0.005537748336791992, "step": 661} +{"info/global_step": 662, "train_info/time_within_train_step": 27.861915111541748, "step": 662} +{"train_info/time_between_train_steps": 0.006646633148193359, "step": 662} +{"info/global_step": 663, "train_info/time_within_train_step": 27.88244128227234, "step": 663} +{"train_info/time_between_train_steps": 0.005820274353027344, "step": 663} +{"info/global_step": 664, "train_info/time_within_train_step": 27.84826421737671, "step": 664} +{"train_info/time_between_train_steps": 0.005779743194580078, "step": 664} +{"info/global_step": 665, "train_info/time_within_train_step": 27.839904069900513, "step": 665} +{"train_info/time_between_train_steps": 0.0057332515716552734, "step": 665} +{"info/global_step": 666, "train_info/time_within_train_step": 27.913789749145508, "step": 666} +{"train_info/time_between_train_steps": 0.006356716156005859, "step": 666} +{"info/global_step": 667, "train_info/time_within_train_step": 27.98062801361084, "step": 667} +{"train_info/time_between_train_steps": 0.0056285858154296875, "step": 667} +{"info/global_step": 668, "train_info/time_within_train_step": 27.938771724700928, "step": 668} +{"train_info/time_between_train_steps": 0.0057430267333984375, "step": 668} +{"info/global_step": 669, "train_info/time_within_train_step": 27.975390911102295, "step": 669} +{"train_info/time_between_train_steps": 0.005719900131225586, "step": 669} +{"info/global_step": 670, "train_info/time_within_train_step": 27.839588165283203, "step": 670} +{"train_info/time_between_train_steps": 0.011512994766235352, "step": 670} +{"info/global_step": 671, "train_info/time_within_train_step": 27.887042760849, "step": 671} +{"train_info/time_between_train_steps": 0.006125211715698242, "step": 671} +{"info/global_step": 672, "train_info/time_within_train_step": 27.981902599334717, "step": 672} +{"train_info/time_between_train_steps": 0.005950212478637695, "step": 672} +{"info/global_step": 673, "train_info/time_within_train_step": 27.912916660308838, "step": 673} +{"train_info/time_between_train_steps": 0.006022453308105469, "step": 673} +{"info/global_step": 674, "train_info/time_within_train_step": 27.782013177871704, "step": 674} +{"train_info/time_between_train_steps": 0.006098508834838867, "step": 674} +{"info/global_step": 675, "train_info/time_within_train_step": 27.863943099975586, "step": 675} +{"train_info/time_between_train_steps": 0.006451845169067383, "step": 675} +{"train_info/time_between_train_steps": 4.0057549476623535, "step": 675} +{"info/global_step": 676, "train_info/time_within_train_step": 27.8573579788208, "step": 676} +{"train_info/time_between_train_steps": 0.005347728729248047, "step": 676} +{"info/global_step": 677, "train_info/time_within_train_step": 27.893235683441162, "step": 677} +{"train_info/time_between_train_steps": 0.01603984832763672, "step": 677} +{"info/global_step": 678, "train_info/time_within_train_step": 27.87112307548523, "step": 678} +{"train_info/time_between_train_steps": 0.01092386245727539, "step": 678} +{"info/global_step": 679, "train_info/time_within_train_step": 27.95896291732788, "step": 679} +{"train_info/time_between_train_steps": 0.005829811096191406, "step": 679} +{"info/global_step": 680, "train_info/time_within_train_step": 27.90727734565735, "step": 680} +{"train_info/time_between_train_steps": 0.005739688873291016, "step": 680} +{"info/global_step": 681, "train_info/time_within_train_step": 28.009834051132202, "step": 681} +{"train_info/time_between_train_steps": 0.017880678176879883, "step": 681} +{"info/global_step": 682, "train_info/time_within_train_step": 27.88507628440857, "step": 682} +{"train_info/time_between_train_steps": 0.005811929702758789, "step": 682} +{"info/global_step": 683, "train_info/time_within_train_step": 27.814520120620728, "step": 683} +{"train_info/time_between_train_steps": 0.005425453186035156, "step": 683} +{"info/global_step": 684, "train_info/time_within_train_step": 27.89132022857666, "step": 684} +{"train_info/time_between_train_steps": 0.005391836166381836, "step": 684} +{"info/global_step": 685, "train_info/time_within_train_step": 27.844977140426636, "step": 685} +{"train_info/time_between_train_steps": 0.00565028190612793, "step": 685} +{"info/global_step": 686, "train_info/time_within_train_step": 27.866297006607056, "step": 686} +{"train_info/time_between_train_steps": 0.005461931228637695, "step": 686} +{"info/global_step": 687, "train_info/time_within_train_step": 27.983293771743774, "step": 687} +{"train_info/time_between_train_steps": 0.005913257598876953, "step": 687} +{"info/global_step": 688, "train_info/time_within_train_step": 27.872180700302124, "step": 688} +{"train_info/time_between_train_steps": 0.011353731155395508, "step": 688} +{"info/global_step": 689, "train_info/time_within_train_step": 27.879083395004272, "step": 689} +{"train_info/time_between_train_steps": 0.011382102966308594, "step": 689} +{"info/global_step": 690, "train_info/time_within_train_step": 27.84471607208252, "step": 690} +{"train_info/time_between_train_steps": 0.007169485092163086, "step": 690} +{"info/global_step": 691, "train_info/time_within_train_step": 27.91120433807373, "step": 691} +{"train_info/time_between_train_steps": 0.005770444869995117, "step": 691} +{"info/global_step": 692, "train_info/time_within_train_step": 27.91721820831299, "step": 692} +{"train_info/time_between_train_steps": 0.0056040287017822266, "step": 692} +{"info/global_step": 693, "train_info/time_within_train_step": 27.765067100524902, "step": 693} +{"train_info/time_between_train_steps": 0.005617380142211914, "step": 693} +{"info/global_step": 694, "train_info/time_within_train_step": 27.804877996444702, "step": 694} +{"train_info/time_between_train_steps": 0.011326789855957031, "step": 694} +{"info/global_step": 695, "train_info/time_within_train_step": 27.902384757995605, "step": 695} +{"train_info/time_between_train_steps": 0.005549192428588867, "step": 695} +{"info/global_step": 696, "train_info/time_within_train_step": 27.87431526184082, "step": 696} +{"train_info/time_between_train_steps": 0.005616664886474609, "step": 696} +{"info/global_step": 697, "train_info/time_within_train_step": 27.863070249557495, "step": 697} +{"train_info/time_between_train_steps": 0.005871772766113281, "step": 697} +{"info/global_step": 698, "train_info/time_within_train_step": 27.926709413528442, "step": 698} +{"train_info/time_between_train_steps": 0.005579471588134766, "step": 698} +{"info/global_step": 699, "train_info/time_within_train_step": 27.841872930526733, "step": 699} +{"train_info/time_between_train_steps": 0.008066415786743164, "step": 699} +{"info/global_step": 700, "train_info/time_within_train_step": 27.931135892868042, "step": 700} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1733973956, "_runtime": 19691}, "step": 700} +{"logs": {"train/loss": 4.0185, "train/learning_rate": 0.0002777777777777778, "train/epoch": 25.02, "_timestamp": 1733973956, "_runtime": 19691}, "step": 700} +{"train_info/time_between_train_steps": 2.3734004497528076, "step": 700} +{"info/global_step": 701, "train_info/time_within_train_step": 27.982750177383423, "step": 701} +{"train_info/time_between_train_steps": 0.011062383651733398, "step": 701} +{"info/global_step": 702, "train_info/time_within_train_step": 27.999646425247192, "step": 702} +{"train_info/time_between_train_steps": 0.007250785827636719, "step": 702} +{"train_info/time_between_train_steps": 4.113922595977783, "step": 702} +{"info/global_step": 703, "train_info/time_within_train_step": 27.904751300811768, "step": 703} +{"train_info/time_between_train_steps": 0.010054826736450195, "step": 703} +{"info/global_step": 704, "train_info/time_within_train_step": 28.001117706298828, "step": 704} +{"train_info/time_between_train_steps": 0.010627508163452148, "step": 704} +{"info/global_step": 705, "train_info/time_within_train_step": 27.81573247909546, "step": 705} +{"train_info/time_between_train_steps": 0.005801200866699219, "step": 705} +{"info/global_step": 706, "train_info/time_within_train_step": 28.05112314224243, "step": 706} +{"train_info/time_between_train_steps": 0.005837917327880859, "step": 706} +{"info/global_step": 707, "train_info/time_within_train_step": 27.836803913116455, "step": 707} +{"train_info/time_between_train_steps": 0.005570650100708008, "step": 707} +{"info/global_step": 708, "train_info/time_within_train_step": 27.99842858314514, "step": 708} +{"train_info/time_between_train_steps": 0.005805253982543945, "step": 708} +{"info/global_step": 709, "train_info/time_within_train_step": 27.865732431411743, "step": 709} +{"train_info/time_between_train_steps": 0.005939960479736328, "step": 709} +{"info/global_step": 710, "train_info/time_within_train_step": 27.851144075393677, "step": 710} +{"train_info/time_between_train_steps": 0.00537419319152832, "step": 710} +{"info/global_step": 711, "train_info/time_within_train_step": 27.885510444641113, "step": 711} +{"train_info/time_between_train_steps": 0.0055387020111083984, "step": 711} +{"info/global_step": 712, "train_info/time_within_train_step": 27.85483145713806, "step": 712} +{"train_info/time_between_train_steps": 0.005713701248168945, "step": 712} +{"info/global_step": 713, "train_info/time_within_train_step": 27.823551177978516, "step": 713} +{"train_info/time_between_train_steps": 0.0053844451904296875, "step": 713} +{"info/global_step": 714, "train_info/time_within_train_step": 27.900087356567383, "step": 714} +{"train_info/time_between_train_steps": 0.00561213493347168, "step": 714} +{"info/global_step": 715, "train_info/time_within_train_step": 27.8282949924469, "step": 715} +{"train_info/time_between_train_steps": 0.005982160568237305, "step": 715} +{"info/global_step": 716, "train_info/time_within_train_step": 27.875192403793335, "step": 716} +{"train_info/time_between_train_steps": 0.00572514533996582, "step": 716} +{"info/global_step": 717, "train_info/time_within_train_step": 27.756329774856567, "step": 717} +{"train_info/time_between_train_steps": 0.006098031997680664, "step": 717} +{"info/global_step": 718, "train_info/time_within_train_step": 27.94970703125, "step": 718} +{"train_info/time_between_train_steps": 0.010526895523071289, "step": 718} +{"info/global_step": 719, "train_info/time_within_train_step": 27.767451524734497, "step": 719} +{"train_info/time_between_train_steps": 0.0055925846099853516, "step": 719} +{"info/global_step": 720, "train_info/time_within_train_step": 27.776097774505615, "step": 720} +{"train_info/time_between_train_steps": 0.005793333053588867, "step": 720} +{"info/global_step": 721, "train_info/time_within_train_step": 27.96760392189026, "step": 721} +{"train_info/time_between_train_steps": 0.005525827407836914, "step": 721} +{"info/global_step": 722, "train_info/time_within_train_step": 27.771504640579224, "step": 722} +{"train_info/time_between_train_steps": 0.0071277618408203125, "step": 722} +{"info/global_step": 723, "train_info/time_within_train_step": 27.94497060775757, "step": 723} +{"train_info/time_between_train_steps": 0.005530595779418945, "step": 723} +{"info/global_step": 724, "train_info/time_within_train_step": 27.797244787216187, "step": 724} +{"train_info/time_between_train_steps": 0.005777835845947266, "step": 724} +{"info/global_step": 725, "train_info/time_within_train_step": 27.85804295539856, "step": 725} +{"train_info/time_between_train_steps": 0.006406545639038086, "step": 725} +{"info/global_step": 726, "train_info/time_within_train_step": 27.84669589996338, "step": 726} +{"train_info/time_between_train_steps": 0.0059244632720947266, "step": 726} +{"info/global_step": 727, "train_info/time_within_train_step": 27.848985195159912, "step": 727} +{"train_info/time_between_train_steps": 0.014980077743530273, "step": 727} +{"info/global_step": 728, "train_info/time_within_train_step": 27.839672565460205, "step": 728} +{"train_info/time_between_train_steps": 0.007767677307128906, "step": 728} +{"info/global_step": 729, "train_info/time_within_train_step": 27.839553356170654, "step": 729} +{"train_info/time_between_train_steps": 0.00655364990234375, "step": 729} +{"train_info/time_between_train_steps": 3.8480732440948486, "step": 729} +{"info/global_step": 730, "train_info/time_within_train_step": 27.905354976654053, "step": 730} +{"train_info/time_between_train_steps": 0.013199090957641602, "step": 730} +{"info/global_step": 731, "train_info/time_within_train_step": 28.094012022018433, "step": 731} +{"train_info/time_between_train_steps": 0.006071567535400391, "step": 731} +{"info/global_step": 732, "train_info/time_within_train_step": 27.87245774269104, "step": 732} +{"train_info/time_between_train_steps": 0.006249427795410156, "step": 732} +{"info/global_step": 733, "train_info/time_within_train_step": 28.06659770011902, "step": 733} +{"train_info/time_between_train_steps": 0.015965700149536133, "step": 733} +{"info/global_step": 734, "train_info/time_within_train_step": 28.00554919242859, "step": 734} +{"train_info/time_between_train_steps": 0.006194353103637695, "step": 734} +{"info/global_step": 735, "train_info/time_within_train_step": 27.965465307235718, "step": 735} +{"train_info/time_between_train_steps": 0.010884523391723633, "step": 735} +{"info/global_step": 736, "train_info/time_within_train_step": 27.91888117790222, "step": 736} +{"train_info/time_between_train_steps": 0.0061779022216796875, "step": 736} +{"info/global_step": 737, "train_info/time_within_train_step": 27.882295846939087, "step": 737} +{"train_info/time_between_train_steps": 0.005615234375, "step": 737} +{"info/global_step": 738, "train_info/time_within_train_step": 27.922412633895874, "step": 738} +{"train_info/time_between_train_steps": 0.006028890609741211, "step": 738} +{"info/global_step": 739, "train_info/time_within_train_step": 27.796386241912842, "step": 739} +{"train_info/time_between_train_steps": 0.010404586791992188, "step": 739} +{"info/global_step": 740, "train_info/time_within_train_step": 27.87165904045105, "step": 740} +{"train_info/time_between_train_steps": 0.015622138977050781, "step": 740} +{"info/global_step": 741, "train_info/time_within_train_step": 27.839897871017456, "step": 741} +{"train_info/time_between_train_steps": 0.01120758056640625, "step": 741} +{"info/global_step": 742, "train_info/time_within_train_step": 27.80543613433838, "step": 742} +{"train_info/time_between_train_steps": 0.005465984344482422, "step": 742} +{"info/global_step": 743, "train_info/time_within_train_step": 27.99437713623047, "step": 743} +{"train_info/time_between_train_steps": 0.005515098571777344, "step": 743} +{"info/global_step": 744, "train_info/time_within_train_step": 27.858092546463013, "step": 744} +{"train_info/time_between_train_steps": 0.005735158920288086, "step": 744} +{"info/global_step": 745, "train_info/time_within_train_step": 27.813693046569824, "step": 745} +{"train_info/time_between_train_steps": 0.010413885116577148, "step": 745} +{"info/global_step": 746, "train_info/time_within_train_step": 27.77233386039734, "step": 746} +{"train_info/time_between_train_steps": 0.0056498050689697266, "step": 746} +{"info/global_step": 747, "train_info/time_within_train_step": 27.740296363830566, "step": 747} +{"train_info/time_between_train_steps": 0.005722522735595703, "step": 747} +{"info/global_step": 748, "train_info/time_within_train_step": 27.86040186882019, "step": 748} +{"train_info/time_between_train_steps": 0.005537271499633789, "step": 748} +{"info/global_step": 749, "train_info/time_within_train_step": 27.89871907234192, "step": 749} +{"train_info/time_between_train_steps": 0.005759000778198242, "step": 749} +{"info/global_step": 750, "train_info/time_within_train_step": 27.96234393119812, "step": 750} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1733975361, "_runtime": 21096}, "step": 750} +{"logs": {"train/loss": 3.9197, "train/learning_rate": 0.00025, "train/epoch": 27.02, "_timestamp": 1733975361, "_runtime": 21096}, "step": 750} +{"train_info/time_between_train_steps": 0.007611274719238281, "step": 750} +{"info/global_step": 751, "train_info/time_within_train_step": 27.760108709335327, "step": 751} +{"train_info/time_between_train_steps": 0.005471706390380859, "step": 751} +{"info/global_step": 752, "train_info/time_within_train_step": 27.791521310806274, "step": 752} +{"train_info/time_between_train_steps": 0.005656242370605469, "step": 752} +{"info/global_step": 753, "train_info/time_within_train_step": 27.757004499435425, "step": 753} +{"train_info/time_between_train_steps": 0.005710601806640625, "step": 753} +{"info/global_step": 754, "train_info/time_within_train_step": 27.78600001335144, "step": 754} +{"train_info/time_between_train_steps": 0.011182546615600586, "step": 754} +{"info/global_step": 755, "train_info/time_within_train_step": 27.8531334400177, "step": 755} +{"train_info/time_between_train_steps": 0.007088422775268555, "step": 755} +{"info/global_step": 756, "train_info/time_within_train_step": 27.946957111358643, "step": 756} +{"train_info/time_between_train_steps": 0.006816864013671875, "step": 756} +{"train_info/time_between_train_steps": 3.9997637271881104, "step": 756} +{"info/global_step": 757, "train_info/time_within_train_step": 27.88540482521057, "step": 757} +{"train_info/time_between_train_steps": 0.0067179203033447266, "step": 757} +{"info/global_step": 758, "train_info/time_within_train_step": 27.94946050643921, "step": 758} +{"train_info/time_between_train_steps": 0.0057642459869384766, "step": 758} +{"info/global_step": 759, "train_info/time_within_train_step": 27.856849670410156, "step": 759} +{"train_info/time_between_train_steps": 0.011205673217773438, "step": 759} +{"info/global_step": 760, "train_info/time_within_train_step": 28.008697032928467, "step": 760} +{"train_info/time_between_train_steps": 0.005806446075439453, "step": 760} +{"info/global_step": 761, "train_info/time_within_train_step": 27.770631313323975, "step": 761} +{"train_info/time_between_train_steps": 0.005745649337768555, "step": 761} +{"info/global_step": 762, "train_info/time_within_train_step": 27.877461433410645, "step": 762} +{"train_info/time_between_train_steps": 0.0057353973388671875, "step": 762} +{"info/global_step": 763, "train_info/time_within_train_step": 27.807528018951416, "step": 763} +{"train_info/time_between_train_steps": 0.0061109066009521484, "step": 763} +{"info/global_step": 764, "train_info/time_within_train_step": 27.865072011947632, "step": 764} +{"train_info/time_between_train_steps": 0.005416393280029297, "step": 764} +{"info/global_step": 765, "train_info/time_within_train_step": 27.977526903152466, "step": 765} +{"train_info/time_between_train_steps": 0.010408878326416016, "step": 765} +{"info/global_step": 766, "train_info/time_within_train_step": 27.870373487472534, "step": 766} +{"train_info/time_between_train_steps": 0.005431175231933594, "step": 766} +{"info/global_step": 767, "train_info/time_within_train_step": 27.749231100082397, "step": 767} +{"train_info/time_between_train_steps": 0.005426168441772461, "step": 767} +{"info/global_step": 768, "train_info/time_within_train_step": 27.748451948165894, "step": 768} +{"train_info/time_between_train_steps": 0.0056269168853759766, "step": 768} +{"info/global_step": 769, "train_info/time_within_train_step": 27.806061506271362, "step": 769} +{"train_info/time_between_train_steps": 0.0053637027740478516, "step": 769} +{"info/global_step": 770, "train_info/time_within_train_step": 27.936800241470337, "step": 770} +{"train_info/time_between_train_steps": 0.00537419319152832, "step": 770} +{"info/global_step": 771, "train_info/time_within_train_step": 27.857605934143066, "step": 771} +{"train_info/time_between_train_steps": 0.011191844940185547, "step": 771} +{"info/global_step": 772, "train_info/time_within_train_step": 27.8991436958313, "step": 772} +{"train_info/time_between_train_steps": 0.0056514739990234375, "step": 772} +{"info/global_step": 773, "train_info/time_within_train_step": 27.956403017044067, "step": 773} +{"train_info/time_between_train_steps": 0.005552768707275391, "step": 773} +{"info/global_step": 774, "train_info/time_within_train_step": 27.807860851287842, "step": 774} +{"train_info/time_between_train_steps": 0.005537509918212891, "step": 774} +{"info/global_step": 775, "train_info/time_within_train_step": 27.959360122680664, "step": 775} +{"train_info/time_between_train_steps": 0.005732059478759766, "step": 775} +{"info/global_step": 776, "train_info/time_within_train_step": 27.841144323349, "step": 776} +{"train_info/time_between_train_steps": 0.011535406112670898, "step": 776} +{"info/global_step": 777, "train_info/time_within_train_step": 27.79446506500244, "step": 777} +{"train_info/time_between_train_steps": 0.005745887756347656, "step": 777} +{"info/global_step": 778, "train_info/time_within_train_step": 27.78454566001892, "step": 778} +{"train_info/time_between_train_steps": 0.01125645637512207, "step": 778} +{"info/global_step": 779, "train_info/time_within_train_step": 27.88392972946167, "step": 779} +{"train_info/time_between_train_steps": 0.010943412780761719, "step": 779} +{"info/global_step": 780, "train_info/time_within_train_step": 27.828157424926758, "step": 780} +{"train_info/time_between_train_steps": 0.0058689117431640625, "step": 780} +{"info/global_step": 781, "train_info/time_within_train_step": 27.97582983970642, "step": 781} +{"train_info/time_between_train_steps": 0.005816459655761719, "step": 781} +{"info/global_step": 782, "train_info/time_within_train_step": 27.828356981277466, "step": 782} +{"train_info/time_between_train_steps": 0.006028652191162109, "step": 782} +{"info/global_step": 783, "train_info/time_within_train_step": 27.838525533676147, "step": 783} +{"train_info/time_between_train_steps": 0.006740093231201172, "step": 783} +{"train_info/time_between_train_steps": 4.195208549499512, "step": 783} +{"info/global_step": 784, "train_info/time_within_train_step": 27.765884160995483, "step": 784} +{"train_info/time_between_train_steps": 0.0060694217681884766, "step": 784} +{"info/global_step": 785, "train_info/time_within_train_step": 27.966875791549683, "step": 785} +{"train_info/time_between_train_steps": 0.01101541519165039, "step": 785} +{"info/global_step": 786, "train_info/time_within_train_step": 27.829920291900635, "step": 786} +{"train_info/time_between_train_steps": 0.00633549690246582, "step": 786} +{"info/global_step": 787, "train_info/time_within_train_step": 28.043700218200684, "step": 787} +{"train_info/time_between_train_steps": 0.005988597869873047, "step": 787} +{"info/global_step": 788, "train_info/time_within_train_step": 27.794564962387085, "step": 788} +{"train_info/time_between_train_steps": 0.00560450553894043, "step": 788} +{"info/global_step": 789, "train_info/time_within_train_step": 27.928846836090088, "step": 789} +{"train_info/time_between_train_steps": 0.005864858627319336, "step": 789} +{"info/global_step": 790, "train_info/time_within_train_step": 27.78606605529785, "step": 790} +{"train_info/time_between_train_steps": 0.010930776596069336, "step": 790} +{"info/global_step": 791, "train_info/time_within_train_step": 28.102226972579956, "step": 791} +{"train_info/time_between_train_steps": 0.005425214767456055, "step": 791} +{"info/global_step": 792, "train_info/time_within_train_step": 27.793790817260742, "step": 792} +{"train_info/time_between_train_steps": 0.0054051876068115234, "step": 792} +{"info/global_step": 793, "train_info/time_within_train_step": 27.842780590057373, "step": 793} +{"train_info/time_between_train_steps": 0.007894277572631836, "step": 793} +{"info/global_step": 794, "train_info/time_within_train_step": 27.878676176071167, "step": 794} +{"train_info/time_between_train_steps": 0.005708456039428711, "step": 794} +{"info/global_step": 795, "train_info/time_within_train_step": 27.823850393295288, "step": 795} +{"train_info/time_between_train_steps": 0.0063664913177490234, "step": 795} +{"info/global_step": 796, "train_info/time_within_train_step": 27.933111429214478, "step": 796} +{"train_info/time_between_train_steps": 0.005552530288696289, "step": 796} +{"info/global_step": 797, "train_info/time_within_train_step": 27.857226371765137, "step": 797} +{"train_info/time_between_train_steps": 0.005469560623168945, "step": 797} +{"info/global_step": 798, "train_info/time_within_train_step": 28.06124186515808, "step": 798} +{"train_info/time_between_train_steps": 0.005896091461181641, "step": 798} +{"info/global_step": 799, "train_info/time_within_train_step": 27.894954919815063, "step": 799} +{"train_info/time_between_train_steps": 0.011029481887817383, "step": 799} +{"info/global_step": 800, "train_info/time_within_train_step": 27.84968113899231, "step": 800} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1733976763, "_runtime": 22498}, "step": 800} +{"logs": {"train/loss": 3.829, "train/learning_rate": 0.00022222222222222218, "train/epoch": 29.01, "_timestamp": 1733976763, "_runtime": 22498}, "step": 800} +{"train_info/time_between_train_steps": 2.3136677742004395, "step": 800} +{"info/global_step": 801, "train_info/time_within_train_step": 28.04197859764099, "step": 801} +{"train_info/time_between_train_steps": 0.0056607723236083984, "step": 801} +{"info/global_step": 802, "train_info/time_within_train_step": 27.825809955596924, "step": 802} +{"train_info/time_between_train_steps": 0.005699634552001953, "step": 802} +{"info/global_step": 803, "train_info/time_within_train_step": 27.791109800338745, "step": 803} +{"train_info/time_between_train_steps": 0.005663156509399414, "step": 803} +{"info/global_step": 804, "train_info/time_within_train_step": 27.758413553237915, "step": 804} +{"train_info/time_between_train_steps": 0.0056841373443603516, "step": 804} +{"info/global_step": 805, "train_info/time_within_train_step": 27.756327390670776, "step": 805} +{"train_info/time_between_train_steps": 0.005886554718017578, "step": 805} +{"info/global_step": 806, "train_info/time_within_train_step": 27.93438482284546, "step": 806} +{"train_info/time_between_train_steps": 0.011461734771728516, "step": 806} +{"info/global_step": 807, "train_info/time_within_train_step": 27.802349090576172, "step": 807} +{"train_info/time_between_train_steps": 0.011405706405639648, "step": 807} +{"info/global_step": 808, "train_info/time_within_train_step": 27.88156819343567, "step": 808} +{"train_info/time_between_train_steps": 0.011498451232910156, "step": 808} +{"info/global_step": 809, "train_info/time_within_train_step": 27.893160581588745, "step": 809} +{"train_info/time_between_train_steps": 0.01095724105834961, "step": 809} +{"info/global_step": 810, "train_info/time_within_train_step": 27.865145683288574, "step": 810} +{"train_info/time_between_train_steps": 0.006602287292480469, "step": 810} +{"train_info/time_between_train_steps": 3.8348958492279053, "step": 810} +{"info/global_step": 811, "train_info/time_within_train_step": 27.824594497680664, "step": 811} +{"train_info/time_between_train_steps": 0.009404182434082031, "step": 811} +{"info/global_step": 812, "train_info/time_within_train_step": 28.20454692840576, "step": 812} +{"train_info/time_between_train_steps": 0.005858421325683594, "step": 812} +{"info/global_step": 813, "train_info/time_within_train_step": 27.975038528442383, "step": 813} +{"train_info/time_between_train_steps": 0.006230831146240234, "step": 813} +{"info/global_step": 814, "train_info/time_within_train_step": 28.08871865272522, "step": 814} +{"train_info/time_between_train_steps": 0.005865812301635742, "step": 814} +{"info/global_step": 815, "train_info/time_within_train_step": 27.84553575515747, "step": 815} +{"train_info/time_between_train_steps": 0.011460065841674805, "step": 815} +{"info/global_step": 816, "train_info/time_within_train_step": 28.004160404205322, "step": 816} +{"train_info/time_between_train_steps": 0.0124359130859375, "step": 816} +{"info/global_step": 817, "train_info/time_within_train_step": 27.850114107131958, "step": 817} +{"train_info/time_between_train_steps": 0.0060498714447021484, "step": 817} +{"info/global_step": 818, "train_info/time_within_train_step": 27.856766939163208, "step": 818} +{"train_info/time_between_train_steps": 0.007051706314086914, "step": 818} +{"info/global_step": 819, "train_info/time_within_train_step": 27.81092071533203, "step": 819} +{"train_info/time_between_train_steps": 0.022118091583251953, "step": 819} +{"info/global_step": 820, "train_info/time_within_train_step": 27.904107570648193, "step": 820} +{"train_info/time_between_train_steps": 0.005803346633911133, "step": 820} +{"info/global_step": 821, "train_info/time_within_train_step": 27.778321743011475, "step": 821} +{"train_info/time_between_train_steps": 0.005510091781616211, "step": 821} +{"info/global_step": 822, "train_info/time_within_train_step": 27.786157608032227, "step": 822} +{"train_info/time_between_train_steps": 0.00540924072265625, "step": 822} +{"info/global_step": 823, "train_info/time_within_train_step": 27.79898166656494, "step": 823} +{"train_info/time_between_train_steps": 0.011164426803588867, "step": 823} +{"info/global_step": 824, "train_info/time_within_train_step": 27.906317949295044, "step": 824} +{"train_info/time_between_train_steps": 0.005626201629638672, "step": 824} +{"info/global_step": 825, "train_info/time_within_train_step": 27.85829210281372, "step": 825} +{"train_info/time_between_train_steps": 0.005568981170654297, "step": 825} +{"info/global_step": 826, "train_info/time_within_train_step": 27.82732057571411, "step": 826} +{"train_info/time_between_train_steps": 0.005749702453613281, "step": 826} +{"info/global_step": 827, "train_info/time_within_train_step": 27.773392915725708, "step": 827} +{"train_info/time_between_train_steps": 0.005699634552001953, "step": 827} +{"info/global_step": 828, "train_info/time_within_train_step": 28.03038239479065, "step": 828} +{"train_info/time_between_train_steps": 0.005806684494018555, "step": 828} +{"info/global_step": 829, "train_info/time_within_train_step": 27.883963346481323, "step": 829} +{"train_info/time_between_train_steps": 0.005627632141113281, "step": 829} +{"info/global_step": 830, "train_info/time_within_train_step": 27.887221574783325, "step": 830} +{"train_info/time_between_train_steps": 0.0056781768798828125, "step": 830} +{"info/global_step": 831, "train_info/time_within_train_step": 27.966556072235107, "step": 831} +{"train_info/time_between_train_steps": 0.011414527893066406, "step": 831} +{"info/global_step": 832, "train_info/time_within_train_step": 27.883957386016846, "step": 832} +{"train_info/time_between_train_steps": 0.005808115005493164, "step": 832} +{"info/global_step": 833, "train_info/time_within_train_step": 27.76400923728943, "step": 833} +{"train_info/time_between_train_steps": 0.011725425720214844, "step": 833} +{"info/global_step": 834, "train_info/time_within_train_step": 28.033394813537598, "step": 834} +{"train_info/time_between_train_steps": 0.015414714813232422, "step": 834} +{"info/global_step": 835, "train_info/time_within_train_step": 27.820366859436035, "step": 835} +{"train_info/time_between_train_steps": 0.011938333511352539, "step": 835} +{"info/global_step": 836, "train_info/time_within_train_step": 27.960700035095215, "step": 836} +{"train_info/time_between_train_steps": 0.00624537467956543, "step": 836} +{"info/global_step": 837, "train_info/time_within_train_step": 27.92483425140381, "step": 837} +{"train_info/time_between_train_steps": 0.0065708160400390625, "step": 837} +{"train_info/time_between_train_steps": 4.012511730194092, "step": 837} +{"info/global_step": 838, "train_info/time_within_train_step": 27.773279428482056, "step": 838} +{"train_info/time_between_train_steps": 0.006117820739746094, "step": 838} +{"info/global_step": 839, "train_info/time_within_train_step": 27.985954523086548, "step": 839} +{"train_info/time_between_train_steps": 0.0060274600982666016, "step": 839} +{"info/global_step": 840, "train_info/time_within_train_step": 27.87256932258606, "step": 840} +{"train_info/time_between_train_steps": 0.0060214996337890625, "step": 840} +{"info/global_step": 841, "train_info/time_within_train_step": 28.043906211853027, "step": 841} +{"train_info/time_between_train_steps": 0.0057964324951171875, "step": 841} +{"info/global_step": 842, "train_info/time_within_train_step": 27.86800742149353, "step": 842} +{"train_info/time_between_train_steps": 0.0060575008392333984, "step": 842} +{"info/global_step": 843, "train_info/time_within_train_step": 28.02875804901123, "step": 843} +{"train_info/time_between_train_steps": 0.011636734008789062, "step": 843} +{"info/global_step": 844, "train_info/time_within_train_step": 27.83219599723816, "step": 844} +{"train_info/time_between_train_steps": 0.011069774627685547, "step": 844} +{"info/global_step": 845, "train_info/time_within_train_step": 27.910643339157104, "step": 845} +{"train_info/time_between_train_steps": 0.011209964752197266, "step": 845} +{"info/global_step": 846, "train_info/time_within_train_step": 27.829198122024536, "step": 846} +{"train_info/time_between_train_steps": 0.011214733123779297, "step": 846} +{"info/global_step": 847, "train_info/time_within_train_step": 27.776357412338257, "step": 847} +{"train_info/time_between_train_steps": 0.010390043258666992, "step": 847} +{"info/global_step": 848, "train_info/time_within_train_step": 27.836926221847534, "step": 848} +{"train_info/time_between_train_steps": 0.005476713180541992, "step": 848} +{"info/global_step": 849, "train_info/time_within_train_step": 27.797642707824707, "step": 849} +{"train_info/time_between_train_steps": 0.005486488342285156, "step": 849} +{"info/global_step": 850, "train_info/time_within_train_step": 27.829964876174927, "step": 850} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1733978167, "_runtime": 23902}, "step": 850} +{"logs": {"train/loss": 3.7522, "train/learning_rate": 0.00019444444444444443, "train/epoch": 31.01, "_timestamp": 1733978167, "_runtime": 23902}, "step": 850} +{"train_info/time_between_train_steps": 0.0077362060546875, "step": 850} +{"info/global_step": 851, "train_info/time_within_train_step": 27.79939889907837, "step": 851} +{"train_info/time_between_train_steps": 0.0053560733795166016, "step": 851} +{"info/global_step": 852, "train_info/time_within_train_step": 27.77695322036743, "step": 852} +{"train_info/time_between_train_steps": 0.011106491088867188, "step": 852} +{"info/global_step": 853, "train_info/time_within_train_step": 27.78903555870056, "step": 853} +{"train_info/time_between_train_steps": 0.01141810417175293, "step": 853} +{"info/global_step": 854, "train_info/time_within_train_step": 27.763843536376953, "step": 854} +{"train_info/time_between_train_steps": 0.00547337532043457, "step": 854} +{"info/global_step": 855, "train_info/time_within_train_step": 27.78886342048645, "step": 855} +{"train_info/time_between_train_steps": 0.005698442459106445, "step": 855} +{"info/global_step": 856, "train_info/time_within_train_step": 27.7824125289917, "step": 856} +{"train_info/time_between_train_steps": 0.005583286285400391, "step": 856} +{"info/global_step": 857, "train_info/time_within_train_step": 27.822047233581543, "step": 857} +{"train_info/time_between_train_steps": 0.005589485168457031, "step": 857} +{"info/global_step": 858, "train_info/time_within_train_step": 27.822603940963745, "step": 858} +{"train_info/time_between_train_steps": 0.011235237121582031, "step": 858} +{"info/global_step": 859, "train_info/time_within_train_step": 27.91883373260498, "step": 859} +{"train_info/time_between_train_steps": 0.0057637691497802734, "step": 859} +{"info/global_step": 860, "train_info/time_within_train_step": 27.87667989730835, "step": 860} +{"train_info/time_between_train_steps": 0.01124429702758789, "step": 860} +{"info/global_step": 861, "train_info/time_within_train_step": 27.854571104049683, "step": 861} +{"train_info/time_between_train_steps": 0.005640983581542969, "step": 861} +{"info/global_step": 862, "train_info/time_within_train_step": 27.920230627059937, "step": 862} +{"train_info/time_between_train_steps": 0.005835294723510742, "step": 862} +{"info/global_step": 863, "train_info/time_within_train_step": 27.81021499633789, "step": 863} +{"train_info/time_between_train_steps": 0.006163358688354492, "step": 863} +{"info/global_step": 864, "train_info/time_within_train_step": 27.86443781852722, "step": 864} +{"train_info/time_between_train_steps": 0.00658416748046875, "step": 864} +{"train_info/time_between_train_steps": 4.100052356719971, "step": 864} +{"info/global_step": 865, "train_info/time_within_train_step": 27.869321823120117, "step": 865} +{"train_info/time_between_train_steps": 0.00607752799987793, "step": 865} +{"info/global_step": 866, "train_info/time_within_train_step": 27.926007509231567, "step": 866} +{"train_info/time_between_train_steps": 0.005563020706176758, "step": 866} +{"info/global_step": 867, "train_info/time_within_train_step": 27.838717222213745, "step": 867} +{"train_info/time_between_train_steps": 0.005789995193481445, "step": 867} +{"info/global_step": 868, "train_info/time_within_train_step": 28.008721590042114, "step": 868} +{"train_info/time_between_train_steps": 0.005905628204345703, "step": 868} +{"info/global_step": 869, "train_info/time_within_train_step": 27.907679557800293, "step": 869} +{"train_info/time_between_train_steps": 0.01545858383178711, "step": 869} +{"info/global_step": 870, "train_info/time_within_train_step": 27.974718809127808, "step": 870} +{"train_info/time_between_train_steps": 0.005877494812011719, "step": 870} +{"info/global_step": 871, "train_info/time_within_train_step": 27.861714601516724, "step": 871} +{"train_info/time_between_train_steps": 0.005886554718017578, "step": 871} +{"info/global_step": 872, "train_info/time_within_train_step": 28.008573055267334, "step": 872} +{"train_info/time_between_train_steps": 0.007912397384643555, "step": 872} +{"info/global_step": 873, "train_info/time_within_train_step": 27.79374408721924, "step": 873} +{"train_info/time_between_train_steps": 0.005482673645019531, "step": 873} +{"info/global_step": 874, "train_info/time_within_train_step": 27.91011095046997, "step": 874} +{"train_info/time_between_train_steps": 0.005650043487548828, "step": 874} +{"info/global_step": 875, "train_info/time_within_train_step": 27.78339195251465, "step": 875} +{"train_info/time_between_train_steps": 0.006807088851928711, "step": 875} +{"info/global_step": 876, "train_info/time_within_train_step": 27.818821907043457, "step": 876} +{"train_info/time_between_train_steps": 0.014635562896728516, "step": 876} +{"info/global_step": 877, "train_info/time_within_train_step": 27.76896572113037, "step": 877} +{"train_info/time_between_train_steps": 0.005738019943237305, "step": 877} +{"info/global_step": 878, "train_info/time_within_train_step": 27.810237169265747, "step": 878} +{"train_info/time_between_train_steps": 0.005358219146728516, "step": 878} +{"info/global_step": 879, "train_info/time_within_train_step": 27.763688564300537, "step": 879} +{"train_info/time_between_train_steps": 0.005795001983642578, "step": 879} +{"info/global_step": 880, "train_info/time_within_train_step": 27.821563720703125, "step": 880} +{"train_info/time_between_train_steps": 0.005510807037353516, "step": 880} +{"info/global_step": 881, "train_info/time_within_train_step": 27.778247117996216, "step": 881} +{"train_info/time_between_train_steps": 0.005541086196899414, "step": 881} +{"info/global_step": 882, "train_info/time_within_train_step": 27.77432894706726, "step": 882} +{"train_info/time_between_train_steps": 0.005815744400024414, "step": 882} +{"info/global_step": 883, "train_info/time_within_train_step": 27.820680856704712, "step": 883} +{"train_info/time_between_train_steps": 0.005471706390380859, "step": 883} +{"info/global_step": 884, "train_info/time_within_train_step": 27.785038948059082, "step": 884} +{"train_info/time_between_train_steps": 0.010393142700195312, "step": 884} +{"info/global_step": 885, "train_info/time_within_train_step": 27.798377990722656, "step": 885} +{"train_info/time_between_train_steps": 0.0056726932525634766, "step": 885} +{"info/global_step": 886, "train_info/time_within_train_step": 27.833245038986206, "step": 886} +{"train_info/time_between_train_steps": 0.011050224304199219, "step": 886} +{"info/global_step": 887, "train_info/time_within_train_step": 27.80659055709839, "step": 887} +{"train_info/time_between_train_steps": 0.0056531429290771484, "step": 887} +{"info/global_step": 888, "train_info/time_within_train_step": 27.861560344696045, "step": 888} +{"train_info/time_between_train_steps": 0.011479854583740234, "step": 888} +{"info/global_step": 889, "train_info/time_within_train_step": 27.81890630722046, "step": 889} +{"train_info/time_between_train_steps": 0.005685091018676758, "step": 889} +{"info/global_step": 890, "train_info/time_within_train_step": 27.945695400238037, "step": 890} +{"train_info/time_between_train_steps": 0.006162166595458984, "step": 890} +{"info/global_step": 891, "train_info/time_within_train_step": 28.01691198348999, "step": 891} +{"train_info/time_between_train_steps": 0.011899709701538086, "step": 891} +{"train_info/time_between_train_steps": 3.8590147495269775, "step": 891} +{"info/global_step": 892, "train_info/time_within_train_step": 27.763765335083008, "step": 892} +{"train_info/time_between_train_steps": 0.006989002227783203, "step": 892} +{"info/global_step": 893, "train_info/time_within_train_step": 27.929440021514893, "step": 893} +{"train_info/time_between_train_steps": 0.005747556686401367, "step": 893} +{"info/global_step": 894, "train_info/time_within_train_step": 27.823565244674683, "step": 894} +{"train_info/time_between_train_steps": 0.005970001220703125, "step": 894} +{"info/global_step": 895, "train_info/time_within_train_step": 27.900076150894165, "step": 895} +{"train_info/time_between_train_steps": 0.016175031661987305, "step": 895} +{"info/global_step": 896, "train_info/time_within_train_step": 27.760072708129883, "step": 896} +{"train_info/time_between_train_steps": 0.005960941314697266, "step": 896} +{"info/global_step": 897, "train_info/time_within_train_step": 28.035971641540527, "step": 897} +{"train_info/time_between_train_steps": 0.006029367446899414, "step": 897} +{"info/global_step": 898, "train_info/time_within_train_step": 27.787559747695923, "step": 898} +{"train_info/time_between_train_steps": 0.005639076232910156, "step": 898} +{"info/global_step": 899, "train_info/time_within_train_step": 27.80404758453369, "step": 899} +{"train_info/time_between_train_steps": 0.005438327789306641, "step": 899} +{"info/global_step": 900, "train_info/time_within_train_step": 27.944325923919678, "step": 900} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1733979568, "_runtime": 25303}, "step": 900} +{"logs": {"train/loss": 3.6848, "train/learning_rate": 0.00016666666666666666, "train/epoch": 33.01, "_timestamp": 1733979568, "_runtime": 25303}, "step": 900} +{"train_info/time_between_train_steps": 2.3651604652404785, "step": 900} +{"info/global_step": 901, "train_info/time_within_train_step": 27.826740026474, "step": 901} +{"train_info/time_between_train_steps": 0.005471229553222656, "step": 901} +{"info/global_step": 902, "train_info/time_within_train_step": 27.751277208328247, "step": 902} +{"train_info/time_between_train_steps": 0.0058248043060302734, "step": 902} +{"info/global_step": 903, "train_info/time_within_train_step": 27.78578805923462, "step": 903} +{"train_info/time_between_train_steps": 0.005447864532470703, "step": 903} +{"info/global_step": 904, "train_info/time_within_train_step": 27.82868194580078, "step": 904} +{"train_info/time_between_train_steps": 0.005497932434082031, "step": 904} +{"info/global_step": 905, "train_info/time_within_train_step": 27.750911951065063, "step": 905} +{"train_info/time_between_train_steps": 0.005751132965087891, "step": 905} +{"info/global_step": 906, "train_info/time_within_train_step": 27.84987163543701, "step": 906} +{"train_info/time_between_train_steps": 0.005645751953125, "step": 906} +{"info/global_step": 907, "train_info/time_within_train_step": 27.806477785110474, "step": 907} +{"train_info/time_between_train_steps": 0.0055620670318603516, "step": 907} +{"info/global_step": 908, "train_info/time_within_train_step": 27.767679929733276, "step": 908} +{"train_info/time_between_train_steps": 0.005764484405517578, "step": 908} +{"info/global_step": 909, "train_info/time_within_train_step": 27.7494900226593, "step": 909} +{"train_info/time_between_train_steps": 0.005703449249267578, "step": 909} +{"info/global_step": 910, "train_info/time_within_train_step": 27.784656524658203, "step": 910} +{"train_info/time_between_train_steps": 0.011375904083251953, "step": 910} +{"info/global_step": 911, "train_info/time_within_train_step": 27.786499977111816, "step": 911} +{"train_info/time_between_train_steps": 0.00559544563293457, "step": 911} +{"info/global_step": 912, "train_info/time_within_train_step": 27.749433755874634, "step": 912} +{"train_info/time_between_train_steps": 0.005602121353149414, "step": 912} +{"info/global_step": 913, "train_info/time_within_train_step": 27.84768843650818, "step": 913} +{"train_info/time_between_train_steps": 0.005673408508300781, "step": 913} +{"info/global_step": 914, "train_info/time_within_train_step": 28.03378963470459, "step": 914} +{"train_info/time_between_train_steps": 0.005536079406738281, "step": 914} +{"info/global_step": 915, "train_info/time_within_train_step": 27.77123475074768, "step": 915} +{"train_info/time_between_train_steps": 0.0056378841400146484, "step": 915} +{"info/global_step": 916, "train_info/time_within_train_step": 27.799684286117554, "step": 916} +{"train_info/time_between_train_steps": 0.006000995635986328, "step": 916} +{"info/global_step": 917, "train_info/time_within_train_step": 27.820759773254395, "step": 917} +{"train_info/time_between_train_steps": 0.01688385009765625, "step": 917} +{"info/global_step": 918, "train_info/time_within_train_step": 27.845252990722656, "step": 918} +{"train_info/time_between_train_steps": 0.013770341873168945, "step": 918} +{"train_info/time_between_train_steps": 3.9513113498687744, "step": 918} +{"info/global_step": 919, "train_info/time_within_train_step": 27.761454105377197, "step": 919} +{"train_info/time_between_train_steps": 0.0054285526275634766, "step": 919} +{"info/global_step": 920, "train_info/time_within_train_step": 27.916847467422485, "step": 920} +{"train_info/time_between_train_steps": 0.005374431610107422, "step": 920} +{"info/global_step": 921, "train_info/time_within_train_step": 27.8961660861969, "step": 921} +{"train_info/time_between_train_steps": 0.011715173721313477, "step": 921} +{"info/global_step": 922, "train_info/time_within_train_step": 28.08725380897522, "step": 922} +{"train_info/time_between_train_steps": 0.005803823471069336, "step": 922} +{"info/global_step": 923, "train_info/time_within_train_step": 27.754432916641235, "step": 923} +{"train_info/time_between_train_steps": 0.005637645721435547, "step": 923} +{"info/global_step": 924, "train_info/time_within_train_step": 27.848323822021484, "step": 924} +{"train_info/time_between_train_steps": 0.0056400299072265625, "step": 924} +{"info/global_step": 925, "train_info/time_within_train_step": 27.782323122024536, "step": 925} +{"train_info/time_between_train_steps": 0.0059049129486083984, "step": 925} +{"info/global_step": 926, "train_info/time_within_train_step": 27.761478662490845, "step": 926} +{"train_info/time_between_train_steps": 0.005444765090942383, "step": 926} +{"info/global_step": 927, "train_info/time_within_train_step": 27.796438694000244, "step": 927} +{"train_info/time_between_train_steps": 0.0054473876953125, "step": 927} +{"info/global_step": 928, "train_info/time_within_train_step": 27.74899697303772, "step": 928} +{"train_info/time_between_train_steps": 0.005558013916015625, "step": 928} +{"info/global_step": 929, "train_info/time_within_train_step": 27.789191484451294, "step": 929} +{"train_info/time_between_train_steps": 0.005755901336669922, "step": 929} +{"info/global_step": 930, "train_info/time_within_train_step": 27.799302577972412, "step": 930} +{"train_info/time_between_train_steps": 0.005442142486572266, "step": 930} +{"info/global_step": 931, "train_info/time_within_train_step": 27.769543409347534, "step": 931} +{"train_info/time_between_train_steps": 0.005939483642578125, "step": 931} +{"info/global_step": 932, "train_info/time_within_train_step": 27.73770546913147, "step": 932} +{"train_info/time_between_train_steps": 0.005350828170776367, "step": 932} +{"info/global_step": 933, "train_info/time_within_train_step": 27.74488878250122, "step": 933} +{"train_info/time_between_train_steps": 0.005631208419799805, "step": 933} +{"info/global_step": 934, "train_info/time_within_train_step": 27.75939106941223, "step": 934} +{"train_info/time_between_train_steps": 0.005460977554321289, "step": 934} +{"info/global_step": 935, "train_info/time_within_train_step": 27.758890628814697, "step": 935} +{"train_info/time_between_train_steps": 0.005507707595825195, "step": 935} +{"info/global_step": 936, "train_info/time_within_train_step": 27.75447726249695, "step": 936} +{"train_info/time_between_train_steps": 0.005679130554199219, "step": 936} +{"info/global_step": 937, "train_info/time_within_train_step": 27.855801343917847, "step": 937} +{"train_info/time_between_train_steps": 0.005527019500732422, "step": 937} +{"info/global_step": 938, "train_info/time_within_train_step": 27.756415128707886, "step": 938} +{"train_info/time_between_train_steps": 0.00552678108215332, "step": 938} +{"info/global_step": 939, "train_info/time_within_train_step": 27.757009267807007, "step": 939} +{"train_info/time_between_train_steps": 0.005850791931152344, "step": 939} +{"info/global_step": 940, "train_info/time_within_train_step": 27.752610445022583, "step": 940} +{"train_info/time_between_train_steps": 0.005517482757568359, "step": 940} +{"info/global_step": 941, "train_info/time_within_train_step": 27.81917643547058, "step": 941} +{"train_info/time_between_train_steps": 0.0057621002197265625, "step": 941} +{"info/global_step": 942, "train_info/time_within_train_step": 27.847766160964966, "step": 942} +{"train_info/time_between_train_steps": 0.005880594253540039, "step": 942} +{"info/global_step": 943, "train_info/time_within_train_step": 27.890689849853516, "step": 943} +{"train_info/time_between_train_steps": 0.010678291320800781, "step": 943} +{"info/global_step": 944, "train_info/time_within_train_step": 27.923900365829468, "step": 944} +{"train_info/time_between_train_steps": 0.006108760833740234, "step": 944} +{"info/global_step": 945, "train_info/time_within_train_step": 27.87129259109497, "step": 945} +{"train_info/time_between_train_steps": 0.006628513336181641, "step": 945} +{"train_info/time_between_train_steps": 4.112321376800537, "step": 945} +{"info/global_step": 946, "train_info/time_within_train_step": 27.91118550300598, "step": 946} +{"train_info/time_between_train_steps": 0.0058689117431640625, "step": 946} +{"info/global_step": 947, "train_info/time_within_train_step": 27.91859197616577, "step": 947} +{"train_info/time_between_train_steps": 0.0056688785552978516, "step": 947} +{"info/global_step": 948, "train_info/time_within_train_step": 27.79960608482361, "step": 948} +{"train_info/time_between_train_steps": 0.005806922912597656, "step": 948} +{"info/global_step": 949, "train_info/time_within_train_step": 27.96781039237976, "step": 949} +{"train_info/time_between_train_steps": 0.007173299789428711, "step": 949} +{"info/global_step": 950, "train_info/time_within_train_step": 27.81957697868347, "step": 950} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1733980970, "_runtime": 26705}, "step": 950} +{"logs": {"train/loss": 3.6253, "train/learning_rate": 0.0001388888888888889, "train/epoch": 35.0, "_timestamp": 1733980970, "_runtime": 26705}, "step": 950} +{"train_info/time_between_train_steps": 0.008095502853393555, "step": 950} +{"info/global_step": 951, "train_info/time_within_train_step": 27.94313097000122, "step": 951} +{"train_info/time_between_train_steps": 0.005827665328979492, "step": 951} +{"info/global_step": 952, "train_info/time_within_train_step": 27.786142587661743, "step": 952} +{"train_info/time_between_train_steps": 0.0056498050689697266, "step": 952} +{"info/global_step": 953, "train_info/time_within_train_step": 28.0016667842865, "step": 953} +{"train_info/time_between_train_steps": 0.011221885681152344, "step": 953} +{"info/global_step": 954, "train_info/time_within_train_step": 27.795602321624756, "step": 954} +{"train_info/time_between_train_steps": 0.005688905715942383, "step": 954} +{"info/global_step": 955, "train_info/time_within_train_step": 27.87404227256775, "step": 955} +{"train_info/time_between_train_steps": 0.010772466659545898, "step": 955} +{"info/global_step": 956, "train_info/time_within_train_step": 27.7628333568573, "step": 956} +{"train_info/time_between_train_steps": 0.005584716796875, "step": 956} +{"info/global_step": 957, "train_info/time_within_train_step": 27.784483194351196, "step": 957} +{"train_info/time_between_train_steps": 0.016120433807373047, "step": 957} +{"info/global_step": 958, "train_info/time_within_train_step": 27.799997329711914, "step": 958} +{"train_info/time_between_train_steps": 0.011032342910766602, "step": 958} +{"info/global_step": 959, "train_info/time_within_train_step": 27.787557363510132, "step": 959} +{"train_info/time_between_train_steps": 0.005862236022949219, "step": 959} +{"info/global_step": 960, "train_info/time_within_train_step": 27.786972284317017, "step": 960} +{"train_info/time_between_train_steps": 0.005498647689819336, "step": 960} +{"info/global_step": 961, "train_info/time_within_train_step": 27.944000720977783, "step": 961} +{"train_info/time_between_train_steps": 0.005506753921508789, "step": 961} +{"info/global_step": 962, "train_info/time_within_train_step": 27.84106731414795, "step": 962} +{"train_info/time_between_train_steps": 0.005843162536621094, "step": 962} +{"info/global_step": 963, "train_info/time_within_train_step": 27.805113077163696, "step": 963} +{"train_info/time_between_train_steps": 0.005448341369628906, "step": 963} +{"info/global_step": 964, "train_info/time_within_train_step": 27.75080180168152, "step": 964} +{"train_info/time_between_train_steps": 0.0055658817291259766, "step": 964} +{"info/global_step": 965, "train_info/time_within_train_step": 27.779449224472046, "step": 965} +{"train_info/time_between_train_steps": 0.00568842887878418, "step": 965} +{"info/global_step": 966, "train_info/time_within_train_step": 27.80232310295105, "step": 966} +{"train_info/time_between_train_steps": 0.005553722381591797, "step": 966} +{"info/global_step": 967, "train_info/time_within_train_step": 27.80024480819702, "step": 967} +{"train_info/time_between_train_steps": 0.0120697021484375, "step": 967} +{"info/global_step": 968, "train_info/time_within_train_step": 27.93117380142212, "step": 968} +{"train_info/time_between_train_steps": 0.005769252777099609, "step": 968} +{"info/global_step": 969, "train_info/time_within_train_step": 27.95982313156128, "step": 969} +{"train_info/time_between_train_steps": 0.00567936897277832, "step": 969} +{"info/global_step": 970, "train_info/time_within_train_step": 27.837175846099854, "step": 970} +{"train_info/time_between_train_steps": 0.0060024261474609375, "step": 970} +{"info/global_step": 971, "train_info/time_within_train_step": 27.791756868362427, "step": 971} +{"train_info/time_between_train_steps": 0.011195898056030273, "step": 971} +{"info/global_step": 972, "train_info/time_within_train_step": 27.835659980773926, "step": 972} +{"train_info/time_between_train_steps": 0.01206207275390625, "step": 972} +{"train_info/time_between_train_steps": 3.814680576324463, "step": 972} +{"info/global_step": 973, "train_info/time_within_train_step": 27.824971437454224, "step": 973} +{"train_info/time_between_train_steps": 0.005348682403564453, "step": 973} +{"info/global_step": 974, "train_info/time_within_train_step": 28.311994075775146, "step": 974} +{"train_info/time_between_train_steps": 0.016877174377441406, "step": 974} +{"info/global_step": 975, "train_info/time_within_train_step": 27.85230779647827, "step": 975} +{"train_info/time_between_train_steps": 0.005459785461425781, "step": 975} +{"info/global_step": 976, "train_info/time_within_train_step": 27.967336893081665, "step": 976} +{"train_info/time_between_train_steps": 0.005897045135498047, "step": 976} +{"info/global_step": 977, "train_info/time_within_train_step": 27.773027896881104, "step": 977} +{"train_info/time_between_train_steps": 0.011378765106201172, "step": 977} +{"info/global_step": 978, "train_info/time_within_train_step": 27.914435148239136, "step": 978} +{"train_info/time_between_train_steps": 0.005921602249145508, "step": 978} +{"info/global_step": 979, "train_info/time_within_train_step": 27.896020889282227, "step": 979} +{"train_info/time_between_train_steps": 0.006151676177978516, "step": 979} +{"info/global_step": 980, "train_info/time_within_train_step": 27.837136030197144, "step": 980} +{"train_info/time_between_train_steps": 0.0052793025970458984, "step": 980} +{"info/global_step": 981, "train_info/time_within_train_step": 27.806023120880127, "step": 981} +{"train_info/time_between_train_steps": 0.005556583404541016, "step": 981} +{"info/global_step": 982, "train_info/time_within_train_step": 27.84927272796631, "step": 982} +{"train_info/time_between_train_steps": 0.0056209564208984375, "step": 982} +{"info/global_step": 983, "train_info/time_within_train_step": 27.847092151641846, "step": 983} +{"train_info/time_between_train_steps": 0.00799250602722168, "step": 983} +{"info/global_step": 984, "train_info/time_within_train_step": 27.89302682876587, "step": 984} +{"train_info/time_between_train_steps": 0.005615234375, "step": 984} +{"info/global_step": 985, "train_info/time_within_train_step": 27.860260486602783, "step": 985} +{"train_info/time_between_train_steps": 0.008976221084594727, "step": 985} +{"info/global_step": 986, "train_info/time_within_train_step": 27.834789991378784, "step": 986} +{"train_info/time_between_train_steps": 0.014803171157836914, "step": 986} +{"info/global_step": 987, "train_info/time_within_train_step": 27.781277418136597, "step": 987} +{"train_info/time_between_train_steps": 0.0057544708251953125, "step": 987} +{"info/global_step": 988, "train_info/time_within_train_step": 27.770607233047485, "step": 988} +{"train_info/time_between_train_steps": 0.005663156509399414, "step": 988} +{"info/global_step": 989, "train_info/time_within_train_step": 27.848011255264282, "step": 989} +{"train_info/time_between_train_steps": 0.010007381439208984, "step": 989} +{"info/global_step": 990, "train_info/time_within_train_step": 27.799304008483887, "step": 990} +{"train_info/time_between_train_steps": 0.005700588226318359, "step": 990} +{"info/global_step": 991, "train_info/time_within_train_step": 27.80947971343994, "step": 991} +{"train_info/time_between_train_steps": 0.005646467208862305, "step": 991} +{"info/global_step": 992, "train_info/time_within_train_step": 27.926143884658813, "step": 992} +{"train_info/time_between_train_steps": 0.005566596984863281, "step": 992} +{"info/global_step": 993, "train_info/time_within_train_step": 27.903642654418945, "step": 993} +{"train_info/time_between_train_steps": 0.011169910430908203, "step": 993} +{"info/global_step": 994, "train_info/time_within_train_step": 27.835837364196777, "step": 994} +{"train_info/time_between_train_steps": 0.005730390548706055, "step": 994} +{"info/global_step": 995, "train_info/time_within_train_step": 27.85700798034668, "step": 995} +{"train_info/time_between_train_steps": 0.00580143928527832, "step": 995} +{"info/global_step": 996, "train_info/time_within_train_step": 27.785881519317627, "step": 996} +{"train_info/time_between_train_steps": 0.014570951461791992, "step": 996} +{"info/global_step": 997, "train_info/time_within_train_step": 27.791209936141968, "step": 997} +{"train_info/time_between_train_steps": 0.011801004409790039, "step": 997} +{"info/global_step": 998, "train_info/time_within_train_step": 27.823012113571167, "step": 998} +{"train_info/time_between_train_steps": 0.00631260871887207, "step": 998} +{"info/global_step": 999, "train_info/time_within_train_step": 27.905210971832275, "step": 999} +{"train_info/time_between_train_steps": 0.006749629974365234, "step": 999} +{"train_info/time_between_train_steps": 3.730462074279785, "step": 999} +{"info/global_step": 1000, "train_info/time_within_train_step": 27.754495859146118, "step": 1000} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1733982370, "_runtime": 28105}, "step": 1000} +{"logs": {"train/loss": 3.5729, "train/learning_rate": 0.00011111111111111109, "train/epoch": 37.0, "_timestamp": 1733982370, "_runtime": 28105}, "step": 1000} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27358.0, "train_info/memory_max_reserved": 27358.0, "_timestamp": 1733982372, "_runtime": 28107}, "step": 1000} +{"logs": {"eval/loss": 4.759269714355469, "eval/runtime": 1.889, "eval/samples_per_second": 50.291, "eval/steps_per_second": 3.176, "train/epoch": 37.0, "_timestamp": 1733982372, "_runtime": 28107}, "step": 1000} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27358.0, "train_info/memory_max_reserved": 27358.0, "_timestamp": 1733982372, "_runtime": 28107}, "step": 1000} +{"logs": {"eval//local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_loss": 4.759269714355469, "eval//local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_ppl": 116.66069914897214, "eval//local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_runtime": 1.889, "eval//local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_samples_per_second": 50.291, "train/epoch": 37.0, "_timestamp": 1733982372, "_runtime": 28107}, "step": 1000} +{"train_info/time_between_train_steps": 4.270559310913086, "step": 1000} +{"info/global_step": 1001, "train_info/time_within_train_step": 27.995242834091187, "step": 1001} +{"train_info/time_between_train_steps": 0.005694866180419922, "step": 1001} +{"info/global_step": 1002, "train_info/time_within_train_step": 27.809717655181885, "step": 1002} +{"train_info/time_between_train_steps": 0.005667448043823242, "step": 1002} +{"info/global_step": 1003, "train_info/time_within_train_step": 27.920193910598755, "step": 1003} +{"train_info/time_between_train_steps": 0.005629062652587891, "step": 1003} +{"info/global_step": 1004, "train_info/time_within_train_step": 27.813717365264893, "step": 1004} +{"train_info/time_between_train_steps": 0.006155252456665039, "step": 1004} +{"info/global_step": 1005, "train_info/time_within_train_step": 28.089826107025146, "step": 1005} +{"train_info/time_between_train_steps": 0.006066083908081055, "step": 1005} +{"info/global_step": 1006, "train_info/time_within_train_step": 27.814254760742188, "step": 1006} +{"train_info/time_between_train_steps": 0.011294841766357422, "step": 1006} +{"info/global_step": 1007, "train_info/time_within_train_step": 27.81173849105835, "step": 1007} +{"train_info/time_between_train_steps": 0.010996818542480469, "step": 1007} +{"info/global_step": 1008, "train_info/time_within_train_step": 27.770312547683716, "step": 1008} +{"train_info/time_between_train_steps": 0.005673646926879883, "step": 1008} +{"info/global_step": 1009, "train_info/time_within_train_step": 27.946099758148193, "step": 1009} +{"train_info/time_between_train_steps": 0.010179996490478516, "step": 1009} +{"info/global_step": 1010, "train_info/time_within_train_step": 27.823083639144897, "step": 1010} +{"train_info/time_between_train_steps": 0.005520343780517578, "step": 1010} +{"info/global_step": 1011, "train_info/time_within_train_step": 27.76725673675537, "step": 1011} +{"train_info/time_between_train_steps": 0.005625486373901367, "step": 1011} +{"info/global_step": 1012, "train_info/time_within_train_step": 27.785743951797485, "step": 1012} +{"train_info/time_between_train_steps": 0.005523681640625, "step": 1012} +{"info/global_step": 1013, "train_info/time_within_train_step": 27.93231773376465, "step": 1013} +{"train_info/time_between_train_steps": 0.005749225616455078, "step": 1013} +{"info/global_step": 1014, "train_info/time_within_train_step": 27.837672472000122, "step": 1014} +{"train_info/time_between_train_steps": 0.005746126174926758, "step": 1014} +{"info/global_step": 1015, "train_info/time_within_train_step": 27.989002466201782, "step": 1015} +{"train_info/time_between_train_steps": 0.005631923675537109, "step": 1015} +{"info/global_step": 1016, "train_info/time_within_train_step": 27.845052003860474, "step": 1016} +{"train_info/time_between_train_steps": 0.005861997604370117, "step": 1016} +{"info/global_step": 1017, "train_info/time_within_train_step": 27.76713466644287, "step": 1017} +{"train_info/time_between_train_steps": 0.008850336074829102, "step": 1017} +{"info/global_step": 1018, "train_info/time_within_train_step": 27.75248408317566, "step": 1018} +{"train_info/time_between_train_steps": 0.005593299865722656, "step": 1018} +{"info/global_step": 1019, "train_info/time_within_train_step": 27.770108222961426, "step": 1019} +{"train_info/time_between_train_steps": 0.005778074264526367, "step": 1019} +{"info/global_step": 1020, "train_info/time_within_train_step": 27.811972618103027, "step": 1020} +{"train_info/time_between_train_steps": 0.01109623908996582, "step": 1020} +{"info/global_step": 1021, "train_info/time_within_train_step": 27.87199115753174, "step": 1021} +{"train_info/time_between_train_steps": 0.011275768280029297, "step": 1021} +{"info/global_step": 1022, "train_info/time_within_train_step": 27.818883895874023, "step": 1022} +{"train_info/time_between_train_steps": 0.005600690841674805, "step": 1022} +{"info/global_step": 1023, "train_info/time_within_train_step": 27.898505210876465, "step": 1023} +{"train_info/time_between_train_steps": 0.011115550994873047, "step": 1023} +{"info/global_step": 1024, "train_info/time_within_train_step": 27.81491756439209, "step": 1024} +{"train_info/time_between_train_steps": 0.005883693695068359, "step": 1024} +{"info/global_step": 1025, "train_info/time_within_train_step": 27.83967423439026, "step": 1025} +{"train_info/time_between_train_steps": 0.0063822269439697266, "step": 1025} +{"info/global_step": 1026, "train_info/time_within_train_step": 27.899770975112915, "step": 1026} +{"train_info/time_between_train_steps": 0.016177654266357422, "step": 1026} +{"train_info/time_between_train_steps": 4.089317321777344, "step": 1026} +{"info/global_step": 1027, "train_info/time_within_train_step": 27.807170629501343, "step": 1027} +{"train_info/time_between_train_steps": 0.005300998687744141, "step": 1027} +{"info/global_step": 1028, "train_info/time_within_train_step": 28.01859450340271, "step": 1028} +{"train_info/time_between_train_steps": 0.010863780975341797, "step": 1028} +{"info/global_step": 1029, "train_info/time_within_train_step": 27.822676181793213, "step": 1029} +{"train_info/time_between_train_steps": 0.005780696868896484, "step": 1029} +{"info/global_step": 1030, "train_info/time_within_train_step": 28.013246774673462, "step": 1030} +{"train_info/time_between_train_steps": 0.014957427978515625, "step": 1030} +{"info/global_step": 1031, "train_info/time_within_train_step": 27.928428173065186, "step": 1031} +{"train_info/time_between_train_steps": 0.005656003952026367, "step": 1031} +{"info/global_step": 1032, "train_info/time_within_train_step": 27.97158980369568, "step": 1032} +{"train_info/time_between_train_steps": 0.011444091796875, "step": 1032} +{"info/global_step": 1033, "train_info/time_within_train_step": 27.79109764099121, "step": 1033} +{"train_info/time_between_train_steps": 0.011420726776123047, "step": 1033} +{"info/global_step": 1034, "train_info/time_within_train_step": 27.800132274627686, "step": 1034} +{"train_info/time_between_train_steps": 0.011264562606811523, "step": 1034} +{"info/global_step": 1035, "train_info/time_within_train_step": 27.798561811447144, "step": 1035} +{"train_info/time_between_train_steps": 0.01115274429321289, "step": 1035} +{"info/global_step": 1036, "train_info/time_within_train_step": 27.82014226913452, "step": 1036} +{"train_info/time_between_train_steps": 0.011128425598144531, "step": 1036} +{"info/global_step": 1037, "train_info/time_within_train_step": 27.772883653640747, "step": 1037} +{"train_info/time_between_train_steps": 0.005518198013305664, "step": 1037} +{"info/global_step": 1038, "train_info/time_within_train_step": 27.79349660873413, "step": 1038} +{"train_info/time_between_train_steps": 0.005652666091918945, "step": 1038} +{"info/global_step": 1039, "train_info/time_within_train_step": 27.775066137313843, "step": 1039} +{"train_info/time_between_train_steps": 0.005465984344482422, "step": 1039} +{"info/global_step": 1040, "train_info/time_within_train_step": 27.76570415496826, "step": 1040} +{"train_info/time_between_train_steps": 0.005632877349853516, "step": 1040} +{"info/global_step": 1041, "train_info/time_within_train_step": 27.802711248397827, "step": 1041} +{"train_info/time_between_train_steps": 0.011259794235229492, "step": 1041} +{"info/global_step": 1042, "train_info/time_within_train_step": 27.8129723072052, "step": 1042} +{"train_info/time_between_train_steps": 0.010419845581054688, "step": 1042} +{"info/global_step": 1043, "train_info/time_within_train_step": 27.90432643890381, "step": 1043} +{"train_info/time_between_train_steps": 0.006994962692260742, "step": 1043} +{"info/global_step": 1044, "train_info/time_within_train_step": 27.83004927635193, "step": 1044} +{"train_info/time_between_train_steps": 0.0057146549224853516, "step": 1044} +{"info/global_step": 1045, "train_info/time_within_train_step": 27.865925788879395, "step": 1045} +{"train_info/time_between_train_steps": 0.005558013916015625, "step": 1045} +{"info/global_step": 1046, "train_info/time_within_train_step": 27.942863941192627, "step": 1046} +{"train_info/time_between_train_steps": 0.005510807037353516, "step": 1046} +{"info/global_step": 1047, "train_info/time_within_train_step": 27.81254506111145, "step": 1047} +{"train_info/time_between_train_steps": 0.005590915679931641, "step": 1047} +{"info/global_step": 1048, "train_info/time_within_train_step": 27.76069664955139, "step": 1048} +{"train_info/time_between_train_steps": 0.005530118942260742, "step": 1048} +{"info/global_step": 1049, "train_info/time_within_train_step": 27.77637553215027, "step": 1049} +{"train_info/time_between_train_steps": 0.005734443664550781, "step": 1049} +{"info/global_step": 1050, "train_info/time_within_train_step": 27.87224769592285, "step": 1050} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27358.0, "train_info/memory_max_reserved": 27358.0, "_timestamp": 1733983772, "_runtime": 29507}, "step": 1050} +{"logs": {"train/loss": 3.5259, "train/learning_rate": 8.333333333333333e-05, "train/epoch": 38.02, "_timestamp": 1733983772, "_runtime": 29507}, "step": 1050} +{"train_info/time_between_train_steps": 0.007847785949707031, "step": 1050} +{"info/global_step": 1051, "train_info/time_within_train_step": 27.796337842941284, "step": 1051} +{"train_info/time_between_train_steps": 0.006181478500366211, "step": 1051} +{"info/global_step": 1052, "train_info/time_within_train_step": 27.792522192001343, "step": 1052} +{"train_info/time_between_train_steps": 0.006178140640258789, "step": 1052} +{"info/global_step": 1053, "train_info/time_within_train_step": 27.808168649673462, "step": 1053} +{"train_info/time_between_train_steps": 0.012247085571289062, "step": 1053} +{"train_info/time_between_train_steps": 4.2220940589904785, "step": 1053} +{"info/global_step": 1054, "train_info/time_within_train_step": 27.867274045944214, "step": 1054} +{"train_info/time_between_train_steps": 0.006281614303588867, "step": 1054} +{"info/global_step": 1055, "train_info/time_within_train_step": 27.973175525665283, "step": 1055} +{"train_info/time_between_train_steps": 0.006030559539794922, "step": 1055} +{"info/global_step": 1056, "train_info/time_within_train_step": 27.838530778884888, "step": 1056} +{"train_info/time_between_train_steps": 0.005834341049194336, "step": 1056} +{"info/global_step": 1057, "train_info/time_within_train_step": 27.955368280410767, "step": 1057} +{"train_info/time_between_train_steps": 0.00591731071472168, "step": 1057} +{"info/global_step": 1058, "train_info/time_within_train_step": 27.81548047065735, "step": 1058} +{"train_info/time_between_train_steps": 0.005775928497314453, "step": 1058} +{"info/global_step": 1059, "train_info/time_within_train_step": 27.977562427520752, "step": 1059} +{"train_info/time_between_train_steps": 0.005824565887451172, "step": 1059} +{"info/global_step": 1060, "train_info/time_within_train_step": 27.89479899406433, "step": 1060} +{"train_info/time_between_train_steps": 0.007362842559814453, "step": 1060} +{"info/global_step": 1061, "train_info/time_within_train_step": 27.86912727355957, "step": 1061} +{"train_info/time_between_train_steps": 0.005696535110473633, "step": 1061} +{"info/global_step": 1062, "train_info/time_within_train_step": 27.929607629776, "step": 1062} +{"train_info/time_between_train_steps": 0.005651950836181641, "step": 1062} +{"info/global_step": 1063, "train_info/time_within_train_step": 27.878100633621216, "step": 1063} +{"train_info/time_between_train_steps": 0.0059223175048828125, "step": 1063} +{"info/global_step": 1064, "train_info/time_within_train_step": 27.858256101608276, "step": 1064} +{"train_info/time_between_train_steps": 0.0054166316986083984, "step": 1064} +{"info/global_step": 1065, "train_info/time_within_train_step": 27.761894941329956, "step": 1065} +{"train_info/time_between_train_steps": 0.005430936813354492, "step": 1065} +{"info/global_step": 1066, "train_info/time_within_train_step": 27.78595757484436, "step": 1066} +{"train_info/time_between_train_steps": 0.011023998260498047, "step": 1066} +{"info/global_step": 1067, "train_info/time_within_train_step": 27.916205644607544, "step": 1067} +{"train_info/time_between_train_steps": 0.0054819583892822266, "step": 1067} +{"info/global_step": 1068, "train_info/time_within_train_step": 27.845035552978516, "step": 1068} +{"train_info/time_between_train_steps": 0.0056684017181396484, "step": 1068} +{"info/global_step": 1069, "train_info/time_within_train_step": 27.762741565704346, "step": 1069} +{"train_info/time_between_train_steps": 0.0054607391357421875, "step": 1069} +{"info/global_step": 1070, "train_info/time_within_train_step": 27.83403754234314, "step": 1070} +{"train_info/time_between_train_steps": 0.005534172058105469, "step": 1070} +{"info/global_step": 1071, "train_info/time_within_train_step": 27.834861755371094, "step": 1071} +{"train_info/time_between_train_steps": 0.005757331848144531, "step": 1071} +{"info/global_step": 1072, "train_info/time_within_train_step": 27.800857543945312, "step": 1072} +{"train_info/time_between_train_steps": 0.005563020706176758, "step": 1072} +{"info/global_step": 1073, "train_info/time_within_train_step": 27.79031753540039, "step": 1073} +{"train_info/time_between_train_steps": 0.005605936050415039, "step": 1073} +{"info/global_step": 1074, "train_info/time_within_train_step": 27.94700312614441, "step": 1074} +{"train_info/time_between_train_steps": 0.010573387145996094, "step": 1074} +{"info/global_step": 1075, "train_info/time_within_train_step": 27.77216625213623, "step": 1075} +{"train_info/time_between_train_steps": 0.0056667327880859375, "step": 1075} +{"info/global_step": 1076, "train_info/time_within_train_step": 27.77913761138916, "step": 1076} +{"train_info/time_between_train_steps": 0.0058650970458984375, "step": 1076} +{"info/global_step": 1077, "train_info/time_within_train_step": 27.991021871566772, "step": 1077} +{"train_info/time_between_train_steps": 0.005654096603393555, "step": 1077} +{"info/global_step": 1078, "train_info/time_within_train_step": 27.78515338897705, "step": 1078} +{"train_info/time_between_train_steps": 0.0060045719146728516, "step": 1078} +{"info/global_step": 1079, "train_info/time_within_train_step": 27.80515766143799, "step": 1079} +{"train_info/time_between_train_steps": 0.006131649017333984, "step": 1079} +{"info/global_step": 1080, "train_info/time_within_train_step": 27.84235644340515, "step": 1080} +{"train_info/time_between_train_steps": 0.00646209716796875, "step": 1080} +{"train_info/time_between_train_steps": 3.7308194637298584, "step": 1080} +{"info/global_step": 1081, "train_info/time_within_train_step": 27.81313920021057, "step": 1081} +{"train_info/time_between_train_steps": 0.00586247444152832, "step": 1081} +{"info/global_step": 1082, "train_info/time_within_train_step": 27.992904663085938, "step": 1082} +{"train_info/time_between_train_steps": 0.005772113800048828, "step": 1082} +{"info/global_step": 1083, "train_info/time_within_train_step": 27.820512533187866, "step": 1083} +{"train_info/time_between_train_steps": 0.005872488021850586, "step": 1083} +{"info/global_step": 1084, "train_info/time_within_train_step": 27.92610239982605, "step": 1084} +{"train_info/time_between_train_steps": 0.005923032760620117, "step": 1084} +{"info/global_step": 1085, "train_info/time_within_train_step": 27.854179859161377, "step": 1085} +{"train_info/time_between_train_steps": 0.015104532241821289, "step": 1085} +{"info/global_step": 1086, "train_info/time_within_train_step": 27.986329793930054, "step": 1086} +{"train_info/time_between_train_steps": 0.005759239196777344, "step": 1086} +{"info/global_step": 1087, "train_info/time_within_train_step": 27.794819355010986, "step": 1087} +{"train_info/time_between_train_steps": 0.005736351013183594, "step": 1087} +{"info/global_step": 1088, "train_info/time_within_train_step": 27.805453300476074, "step": 1088} +{"train_info/time_between_train_steps": 0.005552053451538086, "step": 1088} +{"info/global_step": 1089, "train_info/time_within_train_step": 27.895743370056152, "step": 1089} +{"train_info/time_between_train_steps": 0.016681671142578125, "step": 1089} +{"info/global_step": 1090, "train_info/time_within_train_step": 27.787564754486084, "step": 1090} +{"train_info/time_between_train_steps": 0.005448579788208008, "step": 1090} +{"info/global_step": 1091, "train_info/time_within_train_step": 27.788697957992554, "step": 1091} +{"train_info/time_between_train_steps": 0.00580906867980957, "step": 1091} +{"info/global_step": 1092, "train_info/time_within_train_step": 27.922556161880493, "step": 1092} +{"train_info/time_between_train_steps": 0.01622486114501953, "step": 1092} +{"info/global_step": 1093, "train_info/time_within_train_step": 27.941775798797607, "step": 1093} +{"train_info/time_between_train_steps": 0.005452394485473633, "step": 1093} +{"info/global_step": 1094, "train_info/time_within_train_step": 27.787893295288086, "step": 1094} +{"train_info/time_between_train_steps": 0.005621910095214844, "step": 1094} +{"info/global_step": 1095, "train_info/time_within_train_step": 27.824621200561523, "step": 1095} +{"train_info/time_between_train_steps": 0.005579710006713867, "step": 1095} +{"info/global_step": 1096, "train_info/time_within_train_step": 27.85096001625061, "step": 1096} +{"train_info/time_between_train_steps": 0.005674839019775391, "step": 1096} +{"info/global_step": 1097, "train_info/time_within_train_step": 27.932958126068115, "step": 1097} +{"train_info/time_between_train_steps": 0.005693912506103516, "step": 1097} +{"info/global_step": 1098, "train_info/time_within_train_step": 27.76370930671692, "step": 1098} +{"train_info/time_between_train_steps": 0.005490779876708984, "step": 1098} +{"info/global_step": 1099, "train_info/time_within_train_step": 27.81460928916931, "step": 1099} +{"train_info/time_between_train_steps": 0.016130924224853516, "step": 1099} +{"info/global_step": 1100, "train_info/time_within_train_step": 27.78128719329834, "step": 1100} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27358.0, "train_info/memory_max_reserved": 27358.0, "_timestamp": 1733985173, "_runtime": 30908}, "step": 1100} +{"logs": {"train/loss": 3.4892, "train/learning_rate": 5.5555555555555545e-05, "train/epoch": 40.02, "_timestamp": 1733985173, "_runtime": 30908}, "step": 1100} +{"train_info/time_between_train_steps": 2.3319990634918213, "step": 1100} +{"info/global_step": 1101, "train_info/time_within_train_step": 27.75087833404541, "step": 1101} +{"train_info/time_between_train_steps": 0.006059885025024414, "step": 1101} +{"info/global_step": 1102, "train_info/time_within_train_step": 27.851832151412964, "step": 1102} +{"train_info/time_between_train_steps": 0.006042003631591797, "step": 1102} +{"info/global_step": 1103, "train_info/time_within_train_step": 27.83518695831299, "step": 1103} +{"train_info/time_between_train_steps": 0.005586147308349609, "step": 1103} +{"info/global_step": 1104, "train_info/time_within_train_step": 27.76811408996582, "step": 1104} +{"train_info/time_between_train_steps": 0.005693912506103516, "step": 1104} +{"info/global_step": 1105, "train_info/time_within_train_step": 27.76597809791565, "step": 1105} +{"train_info/time_between_train_steps": 0.006223201751708984, "step": 1105} +{"info/global_step": 1106, "train_info/time_within_train_step": 27.77742838859558, "step": 1106} +{"train_info/time_between_train_steps": 0.006186723709106445, "step": 1106} +{"info/global_step": 1107, "train_info/time_within_train_step": 27.786975860595703, "step": 1107} +{"train_info/time_between_train_steps": 0.006331205368041992, "step": 1107} +{"train_info/time_between_train_steps": 3.9702529907226562, "step": 1107} +{"info/global_step": 1108, "train_info/time_within_train_step": 27.795350313186646, "step": 1108} +{"train_info/time_between_train_steps": 0.005340576171875, "step": 1108} +{"info/global_step": 1109, "train_info/time_within_train_step": 28.13563323020935, "step": 1109} +{"train_info/time_between_train_steps": 0.005912303924560547, "step": 1109} +{"info/global_step": 1110, "train_info/time_within_train_step": 27.925907850265503, "step": 1110} +{"train_info/time_between_train_steps": 0.01604175567626953, "step": 1110} +{"info/global_step": 1111, "train_info/time_within_train_step": 28.045093297958374, "step": 1111} +{"train_info/time_between_train_steps": 0.005803585052490234, "step": 1111} +{"info/global_step": 1112, "train_info/time_within_train_step": 27.80987858772278, "step": 1112} +{"train_info/time_between_train_steps": 0.005898475646972656, "step": 1112} +{"info/global_step": 1113, "train_info/time_within_train_step": 27.94865655899048, "step": 1113} +{"train_info/time_between_train_steps": 0.006080150604248047, "step": 1113} +{"info/global_step": 1114, "train_info/time_within_train_step": 27.75092077255249, "step": 1114} +{"train_info/time_between_train_steps": 0.005625724792480469, "step": 1114} +{"info/global_step": 1115, "train_info/time_within_train_step": 27.794381380081177, "step": 1115} +{"train_info/time_between_train_steps": 0.005744457244873047, "step": 1115} +{"info/global_step": 1116, "train_info/time_within_train_step": 27.797726154327393, "step": 1116} +{"train_info/time_between_train_steps": 0.007055759429931641, "step": 1116} +{"info/global_step": 1117, "train_info/time_within_train_step": 27.763160467147827, "step": 1117} +{"train_info/time_between_train_steps": 0.0053942203521728516, "step": 1117} +{"info/global_step": 1118, "train_info/time_within_train_step": 27.74544095993042, "step": 1118} +{"train_info/time_between_train_steps": 0.005698442459106445, "step": 1118} +{"info/global_step": 1119, "train_info/time_within_train_step": 27.77744221687317, "step": 1119} +{"train_info/time_between_train_steps": 0.0054378509521484375, "step": 1119} +{"info/global_step": 1120, "train_info/time_within_train_step": 27.74810004234314, "step": 1120} +{"train_info/time_between_train_steps": 0.005382537841796875, "step": 1120} +{"info/global_step": 1121, "train_info/time_within_train_step": 27.76371121406555, "step": 1121} +{"train_info/time_between_train_steps": 0.01132965087890625, "step": 1121} +{"info/global_step": 1122, "train_info/time_within_train_step": 27.776427507400513, "step": 1122} +{"train_info/time_between_train_steps": 0.005455493927001953, "step": 1122} +{"info/global_step": 1123, "train_info/time_within_train_step": 27.781070232391357, "step": 1123} +{"train_info/time_between_train_steps": 0.005517244338989258, "step": 1123} +{"info/global_step": 1124, "train_info/time_within_train_step": 27.896593809127808, "step": 1124} +{"train_info/time_between_train_steps": 0.00564265251159668, "step": 1124} +{"info/global_step": 1125, "train_info/time_within_train_step": 27.882596015930176, "step": 1125} +{"train_info/time_between_train_steps": 0.0055234432220458984, "step": 1125} +{"info/global_step": 1126, "train_info/time_within_train_step": 27.900678157806396, "step": 1126} +{"train_info/time_between_train_steps": 0.010793685913085938, "step": 1126} +{"info/global_step": 1127, "train_info/time_within_train_step": 27.82077383995056, "step": 1127} +{"train_info/time_between_train_steps": 0.005585908889770508, "step": 1127} +{"info/global_step": 1128, "train_info/time_within_train_step": 27.957876205444336, "step": 1128} +{"train_info/time_between_train_steps": 0.005426645278930664, "step": 1128} +{"info/global_step": 1129, "train_info/time_within_train_step": 27.797732830047607, "step": 1129} +{"train_info/time_between_train_steps": 0.005764007568359375, "step": 1129} +{"info/global_step": 1130, "train_info/time_within_train_step": 27.84912872314453, "step": 1130} +{"train_info/time_between_train_steps": 0.0056726932525634766, "step": 1130} +{"info/global_step": 1131, "train_info/time_within_train_step": 27.790923595428467, "step": 1131} +{"train_info/time_between_train_steps": 0.005601644515991211, "step": 1131} +{"info/global_step": 1132, "train_info/time_within_train_step": 27.790396451950073, "step": 1132} +{"train_info/time_between_train_steps": 0.006213665008544922, "step": 1132} +{"info/global_step": 1133, "train_info/time_within_train_step": 27.900676012039185, "step": 1133} +{"train_info/time_between_train_steps": 0.006802082061767578, "step": 1133} +{"info/global_step": 1134, "train_info/time_within_train_step": 27.809464931488037, "step": 1134} +{"train_info/time_between_train_steps": 0.00674891471862793, "step": 1134} +{"train_info/time_between_train_steps": 4.133435487747192, "step": 1134} +{"info/global_step": 1135, "train_info/time_within_train_step": 27.832208156585693, "step": 1135} +{"train_info/time_between_train_steps": 0.00615692138671875, "step": 1135} +{"info/global_step": 1136, "train_info/time_within_train_step": 27.962125539779663, "step": 1136} +{"train_info/time_between_train_steps": 0.005820274353027344, "step": 1136} +{"info/global_step": 1137, "train_info/time_within_train_step": 27.76046848297119, "step": 1137} +{"train_info/time_between_train_steps": 0.005844593048095703, "step": 1137} +{"info/global_step": 1138, "train_info/time_within_train_step": 27.949135065078735, "step": 1138} +{"train_info/time_between_train_steps": 0.0057871341705322266, "step": 1138} +{"info/global_step": 1139, "train_info/time_within_train_step": 27.779052257537842, "step": 1139} +{"train_info/time_between_train_steps": 0.006087064743041992, "step": 1139} +{"info/global_step": 1140, "train_info/time_within_train_step": 28.04791021347046, "step": 1140} +{"train_info/time_between_train_steps": 0.011382341384887695, "step": 1140} +{"info/global_step": 1141, "train_info/time_within_train_step": 27.7733371257782, "step": 1141} +{"train_info/time_between_train_steps": 0.006116390228271484, "step": 1141} +{"info/global_step": 1142, "train_info/time_within_train_step": 27.820142030715942, "step": 1142} +{"train_info/time_between_train_steps": 0.010863065719604492, "step": 1142} +{"info/global_step": 1143, "train_info/time_within_train_step": 27.813059091567993, "step": 1143} +{"train_info/time_between_train_steps": 0.005370616912841797, "step": 1143} +{"info/global_step": 1144, "train_info/time_within_train_step": 27.862078189849854, "step": 1144} +{"train_info/time_between_train_steps": 0.010494232177734375, "step": 1144} +{"info/global_step": 1145, "train_info/time_within_train_step": 27.81110453605652, "step": 1145} +{"train_info/time_between_train_steps": 0.005684614181518555, "step": 1145} +{"info/global_step": 1146, "train_info/time_within_train_step": 27.865697622299194, "step": 1146} +{"train_info/time_between_train_steps": 0.0054399967193603516, "step": 1146} +{"info/global_step": 1147, "train_info/time_within_train_step": 27.98774027824402, "step": 1147} +{"train_info/time_between_train_steps": 0.010679483413696289, "step": 1147} +{"info/global_step": 1148, "train_info/time_within_train_step": 28.024198055267334, "step": 1148} +{"train_info/time_between_train_steps": 0.005543231964111328, "step": 1148} +{"info/global_step": 1149, "train_info/time_within_train_step": 27.750635862350464, "step": 1149} +{"train_info/time_between_train_steps": 0.005536079406738281, "step": 1149} +{"info/global_step": 1150, "train_info/time_within_train_step": 27.795249462127686, "step": 1150} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27358.0, "train_info/memory_max_reserved": 27358.0, "_timestamp": 1733986576, "_runtime": 32311}, "step": 1150} +{"logs": {"train/loss": 3.4568, "train/learning_rate": 2.7777777777777772e-05, "train/epoch": 42.01, "_timestamp": 1733986576, "_runtime": 32311}, "step": 1150} +{"train_info/time_between_train_steps": 0.007745981216430664, "step": 1150} +{"info/global_step": 1151, "train_info/time_within_train_step": 27.781107902526855, "step": 1151} +{"train_info/time_between_train_steps": 0.005528926849365234, "step": 1151} +{"info/global_step": 1152, "train_info/time_within_train_step": 27.752641201019287, "step": 1152} +{"train_info/time_between_train_steps": 0.005796194076538086, "step": 1152} +{"info/global_step": 1153, "train_info/time_within_train_step": 27.949127912521362, "step": 1153} +{"train_info/time_between_train_steps": 0.0056383609771728516, "step": 1153} +{"info/global_step": 1154, "train_info/time_within_train_step": 27.79070782661438, "step": 1154} +{"train_info/time_between_train_steps": 0.005788087844848633, "step": 1154} +{"info/global_step": 1155, "train_info/time_within_train_step": 27.894047021865845, "step": 1155} +{"train_info/time_between_train_steps": 0.0056765079498291016, "step": 1155} +{"info/global_step": 1156, "train_info/time_within_train_step": 27.80353832244873, "step": 1156} +{"train_info/time_between_train_steps": 0.005829572677612305, "step": 1156} +{"info/global_step": 1157, "train_info/time_within_train_step": 27.83844256401062, "step": 1157} +{"train_info/time_between_train_steps": 0.005713939666748047, "step": 1157} +{"info/global_step": 1158, "train_info/time_within_train_step": 27.873066663742065, "step": 1158} +{"train_info/time_between_train_steps": 0.007023334503173828, "step": 1158} +{"info/global_step": 1159, "train_info/time_within_train_step": 27.84999966621399, "step": 1159} +{"train_info/time_between_train_steps": 0.006000995635986328, "step": 1159} +{"info/global_step": 1160, "train_info/time_within_train_step": 27.851211547851562, "step": 1160} +{"train_info/time_between_train_steps": 0.00638890266418457, "step": 1160} +{"info/global_step": 1161, "train_info/time_within_train_step": 27.90030312538147, "step": 1161} +{"train_info/time_between_train_steps": 0.0073184967041015625, "step": 1161} +{"train_info/time_between_train_steps": 3.746920347213745, "step": 1161} +{"info/global_step": 1162, "train_info/time_within_train_step": 27.7810640335083, "step": 1162} +{"train_info/time_between_train_steps": 0.0054590702056884766, "step": 1162} +{"info/global_step": 1163, "train_info/time_within_train_step": 27.97341275215149, "step": 1163} +{"train_info/time_between_train_steps": 0.01687335968017578, "step": 1163} +{"info/global_step": 1164, "train_info/time_within_train_step": 27.921112298965454, "step": 1164} +{"train_info/time_between_train_steps": 0.005980968475341797, "step": 1164} +{"info/global_step": 1165, "train_info/time_within_train_step": 27.924252033233643, "step": 1165} +{"train_info/time_between_train_steps": 0.005735874176025391, "step": 1165} +{"info/global_step": 1166, "train_info/time_within_train_step": 27.791826725006104, "step": 1166} +{"train_info/time_between_train_steps": 0.0058171749114990234, "step": 1166} +{"info/global_step": 1167, "train_info/time_within_train_step": 27.9296817779541, "step": 1167} +{"train_info/time_between_train_steps": 0.005837440490722656, "step": 1167} +{"info/global_step": 1168, "train_info/time_within_train_step": 27.785434007644653, "step": 1168} +{"train_info/time_between_train_steps": 0.005753517150878906, "step": 1168} +{"info/global_step": 1169, "train_info/time_within_train_step": 27.882189512252808, "step": 1169} +{"train_info/time_between_train_steps": 0.005677700042724609, "step": 1169} +{"info/global_step": 1170, "train_info/time_within_train_step": 27.847328186035156, "step": 1170} +{"train_info/time_between_train_steps": 0.005584001541137695, "step": 1170} +{"info/global_step": 1171, "train_info/time_within_train_step": 27.913699865341187, "step": 1171} +{"train_info/time_between_train_steps": 0.005475521087646484, "step": 1171} +{"info/global_step": 1172, "train_info/time_within_train_step": 27.833008289337158, "step": 1172} +{"train_info/time_between_train_steps": 0.014480352401733398, "step": 1172} +{"info/global_step": 1173, "train_info/time_within_train_step": 27.76047444343567, "step": 1173} +{"train_info/time_between_train_steps": 0.006471395492553711, "step": 1173} +{"info/global_step": 1174, "train_info/time_within_train_step": 27.77593159675598, "step": 1174} +{"train_info/time_between_train_steps": 0.005530595779418945, "step": 1174} +{"info/global_step": 1175, "train_info/time_within_train_step": 27.763898372650146, "step": 1175} +{"train_info/time_between_train_steps": 0.005554676055908203, "step": 1175} +{"info/global_step": 1176, "train_info/time_within_train_step": 27.829833984375, "step": 1176} +{"train_info/time_between_train_steps": 0.0104217529296875, "step": 1176} +{"info/global_step": 1177, "train_info/time_within_train_step": 27.761313915252686, "step": 1177} +{"train_info/time_between_train_steps": 0.005485057830810547, "step": 1177} +{"info/global_step": 1178, "train_info/time_within_train_step": 27.77226686477661, "step": 1178} +{"train_info/time_between_train_steps": 0.005646944046020508, "step": 1178} +{"info/global_step": 1179, "train_info/time_within_train_step": 27.742661714553833, "step": 1179} +{"train_info/time_between_train_steps": 0.006305217742919922, "step": 1179} +{"info/global_step": 1180, "train_info/time_within_train_step": 27.74558663368225, "step": 1180} +{"train_info/time_between_train_steps": 0.005479574203491211, "step": 1180} +{"info/global_step": 1181, "train_info/time_within_train_step": 27.761241674423218, "step": 1181} +{"train_info/time_between_train_steps": 0.005755424499511719, "step": 1181} +{"info/global_step": 1182, "train_info/time_within_train_step": 27.930809497833252, "step": 1182} +{"train_info/time_between_train_steps": 0.01627802848815918, "step": 1182} +{"info/global_step": 1183, "train_info/time_within_train_step": 27.813397645950317, "step": 1183} +{"train_info/time_between_train_steps": 0.011143684387207031, "step": 1183} +{"info/global_step": 1184, "train_info/time_within_train_step": 27.796833515167236, "step": 1184} +{"train_info/time_between_train_steps": 0.00598454475402832, "step": 1184} +{"info/global_step": 1185, "train_info/time_within_train_step": 27.777621507644653, "step": 1185} +{"train_info/time_between_train_steps": 0.005972862243652344, "step": 1185} +{"info/global_step": 1186, "train_info/time_within_train_step": 27.780861616134644, "step": 1186} +{"train_info/time_between_train_steps": 0.006151676177978516, "step": 1186} +{"info/global_step": 1187, "train_info/time_within_train_step": 27.949281215667725, "step": 1187} +{"train_info/time_between_train_steps": 0.006082296371459961, "step": 1187} +{"info/global_step": 1188, "train_info/time_within_train_step": 27.819469928741455, "step": 1188} +{"train_info/time_between_train_steps": 0.0066678524017333984, "step": 1188} +{"train_info/time_between_train_steps": 3.9506983757019043, "step": 1188} +{"info/global_step": 1189, "train_info/time_within_train_step": 27.77939462661743, "step": 1189} +{"train_info/time_between_train_steps": 0.010751724243164062, "step": 1189} +{"info/global_step": 1190, "train_info/time_within_train_step": 27.931954622268677, "step": 1190} +{"train_info/time_between_train_steps": 0.005698204040527344, "step": 1190} +{"info/global_step": 1191, "train_info/time_within_train_step": 27.798295736312866, "step": 1191} +{"train_info/time_between_train_steps": 0.005669116973876953, "step": 1191} +{"info/global_step": 1192, "train_info/time_within_train_step": 27.97522258758545, "step": 1192} +{"train_info/time_between_train_steps": 0.005994319915771484, "step": 1192} +{"info/global_step": 1193, "train_info/time_within_train_step": 27.87484622001648, "step": 1193} +{"train_info/time_between_train_steps": 0.018180370330810547, "step": 1193} +{"info/global_step": 1194, "train_info/time_within_train_step": 27.968724966049194, "step": 1194} +{"train_info/time_between_train_steps": 0.00589442253112793, "step": 1194} +{"info/global_step": 1195, "train_info/time_within_train_step": 27.828015327453613, "step": 1195} +{"train_info/time_between_train_steps": 0.011261463165283203, "step": 1195} +{"info/global_step": 1196, "train_info/time_within_train_step": 28.00460457801819, "step": 1196} +{"train_info/time_between_train_steps": 0.0052950382232666016, "step": 1196} +{"info/global_step": 1197, "train_info/time_within_train_step": 27.83181095123291, "step": 1197} +{"train_info/time_between_train_steps": 0.01028585433959961, "step": 1197} +{"info/global_step": 1198, "train_info/time_within_train_step": 27.96950936317444, "step": 1198} +{"train_info/time_between_train_steps": 0.0057375431060791016, "step": 1198} +{"info/global_step": 1199, "train_info/time_within_train_step": 27.77337074279785, "step": 1199} +{"train_info/time_between_train_steps": 0.0057637691497802734, "step": 1199} +{"info/global_step": 1200, "train_info/time_within_train_step": 27.865653038024902, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27358.0, "train_info/memory_max_reserved": 27358.0, "_timestamp": 1733987976, "_runtime": 33711}, "step": 1200} +{"logs": {"train/loss": 3.4341, "train/learning_rate": 0.0, "train/epoch": 44.01, "_timestamp": 1733987976, "_runtime": 33711}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27358.0, "train_info/memory_max_reserved": 27358.0, "_timestamp": 1733987979, "_runtime": 33714}, "step": 1200} +{"logs": {"train/train_runtime": 33715.0423, "train/train_samples_per_second": 18.223, "train/train_steps_per_second": 0.036, "train/total_flos": 3.21282592210944e+17, "train/train_loss": 4.613435382048289, "train/epoch": 44.01, "_timestamp": 1733987979, "_runtime": 33714}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1922.4453125, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27358.0, "train_info/memory_max_reserved": 27358.0, "_timestamp": 1733987982, "_runtime": 33717}, "step": 1200} +{"logs": {"eval/loss": 4.769293308258057, "eval/runtime": 1.871, "eval/samples_per_second": 50.774, "eval/steps_per_second": 3.207, "train/epoch": 44.01, "_timestamp": 1733987982, "_runtime": 33717}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1922.4453125, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27358.0, "train_info/memory_max_reserved": 27358.0, "_timestamp": 1733987982, "_runtime": 33717}, "step": 1200} +{"logs": {"eval//local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_loss": 4.769293308258057, "eval//local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_ppl": 117.83593884443988, "eval//local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_runtime": 1.871, "eval//local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_samples_per_second": 50.774, "train/epoch": 44.01, "_timestamp": 1733987982, "_runtime": 33717}, "step": 1200} diff --git a/pytorch_model.bin b/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..f25dd905bcc86435e1bc14ccadadeae3729379ee --- /dev/null +++ b/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb3ebd8b69678d14514b0ee8d7b72d1ed3060fd70c512258919e84411d5db15f +size 510396521 diff --git a/shuffle_control_ru_RU_randinit_seed53.log b/shuffle_control_ru_RU_randinit_seed53.log new file mode 100755 index 0000000000000000000000000000000000000000..5cbebf71eb351d0daf7448c9fbcd65ff1ec89953 --- /dev/null +++ b/shuffle_control_ru_RU_randinit_seed53.log @@ -0,0 +1,121 @@ +|=>> 12/11 [22:56:56] - mistral - INFO :: Starting Run: shuffle_control_ru_RU_randinit_seed53... +|=>> 12/11 [22:56:56] - mistral - INFO :: Setting Random Seed to 53! +|=>> 12/11 [22:56:56] - mistral - INFO :: Building Tokenize and Initializing `gpt2-small` via AutoModel/AutoConfig... +|=>> 12/11 [22:56:56] - mistral - INFO :: Using Configs For Model From: /local/xiulyang/mission-impossible-language-models/mistral/conf/models/gpt2-small-50257-RU.json ... +|=>> 12/11 [22:56:56] - mistral.models.auto - INFO :: Building Hugging Face GPT2Config from provided configs: {'activation_function': 'gelu_new', 'architectures': ['GPT2LMHeadModel'], 'attn_pdrop': 0.1, 'embd_pdrop': 0.1, 'eos_token_id': 2, 'bos_token_id': 1, 'initializer_range': 0.02, 'layer_norm_epsilon': 1e-05, 'model_type': 'gpt2', 'n_ctx': 1024, 'n_embd': 768, 'n_head': 12, 'n_inner': None, 'n_layer': 12, 'n_positions': 1024, 'reorder_and_upcast_attn': True, 'resid_pdrop': 0.1, 'scale_attn_by_inverse_layer_idx': True, 'scale_attn_weights': True, 'summary_activation': None, 'summary_first_dropout': 0.2, 'summary_proj_to_labels': True, 'summary_type': 'cls_index', 'summary_use_proj': True, 'task_specific_params': {'text-generation': {'do_sample': True, 'max_length': 1024}}, 'torch_dtype': 'float32', 'transformers_version': '4.35.2', 'use_cache': False, 'vocab_size': 50257} ... +|=>> 12/11 [22:56:56] - mistral.models.auto - INFO :: Fetching Hugging Face [Fast] AutoTokenizer for Model: `gpt2`... +|=>> 12/11 [22:56:56] - mistral.models.auto - INFO :: Using a Pretokenized Dataset +|=>> 12/11 [22:56:56] - mistral.models.auto - INFO :: Initializing Custom GPT-2 Model from Configuration: `gpt2`... +|=>> 12/11 [22:57:00] - mistral - INFO :: Setting Training Arguments from Quinfig... +|=>> 12/11 [22:57:00] - mistral.args.training - INFO :: Setting Gradient Accumulation Steps = `64` [BSZ: 512 World Size: 1 Device BSZ: 8] +|=>> 12/11 [22:57:00] - mistral - INFO :: Downloading and Preprocessing Dataset `/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py`... +|=>> 12/11 [22:57:00] - datasets_modules.datasets.babylm_dataset.43b5d6f57ff7566a9e5fca41a7eeab46164e1c1b30ed1dcb135a5808f8c10159.babylm_dataset - INFO :: Generating examples from = /local/xiulyang/mission-impossible-language-models/data/multilingual/multilingual_data_perturbed/shuffle_control_ru/train +|=>> 12/11 [22:57:01] - datasets_modules.datasets.babylm_dataset.43b5d6f57ff7566a9e5fca41a7eeab46164e1c1b30ed1dcb135a5808f8c10159.babylm_dataset - INFO :: Total sentences: 1022785 +|=>> 12/11 [22:57:01] - datasets_modules.datasets.babylm_dataset.43b5d6f57ff7566a9e5fca41a7eeab46164e1c1b30ed1dcb135a5808f8c10159.babylm_dataset - INFO :: Loading pre-tokenized data +|=>> 12/11 [22:57:06] - datasets_modules.datasets.babylm_dataset.43b5d6f57ff7566a9e5fca41a7eeab46164e1c1b30ed1dcb135a5808f8c10159.babylm_dataset - INFO :: Concatenating tokenized data using EOS token +|=>> 12/11 [22:57:07] - datasets_modules.datasets.babylm_dataset.43b5d6f57ff7566a9e5fca41a7eeab46164e1c1b30ed1dcb135a5808f8c10159.babylm_dataset - INFO :: Chunking tokens into sublists of 1024 +|=>> 12/11 [22:57:07] - datasets_modules.datasets.babylm_dataset.43b5d6f57ff7566a9e5fca41a7eeab46164e1c1b30ed1dcb135a5808f8c10159.babylm_dataset - INFO :: Writing dataset as space-separated sequences of tokens +|=>> 12/11 [22:57:10] - datasets_modules.datasets.babylm_dataset.43b5d6f57ff7566a9e5fca41a7eeab46164e1c1b30ed1dcb135a5808f8c10159.babylm_dataset - INFO :: Generating examples from = /local/xiulyang/mission-impossible-language-models/data/multilingual/multilingual_data_perturbed/shuffle_control_ru/dev +|=>> 12/11 [22:57:10] - datasets_modules.datasets.babylm_dataset.43b5d6f57ff7566a9e5fca41a7eeab46164e1c1b30ed1dcb135a5808f8c10159.babylm_dataset - INFO :: Total sentences: 5060 +|=>> 12/11 [22:57:10] - datasets_modules.datasets.babylm_dataset.43b5d6f57ff7566a9e5fca41a7eeab46164e1c1b30ed1dcb135a5808f8c10159.babylm_dataset - INFO :: Loading pre-tokenized data +|=>> 12/11 [22:57:10] - datasets_modules.datasets.babylm_dataset.43b5d6f57ff7566a9e5fca41a7eeab46164e1c1b30ed1dcb135a5808f8c10159.babylm_dataset - INFO :: Concatenating tokenized data using EOS token +|=>> 12/11 [22:57:10] - datasets_modules.datasets.babylm_dataset.43b5d6f57ff7566a9e5fca41a7eeab46164e1c1b30ed1dcb135a5808f8c10159.babylm_dataset - INFO :: Chunking tokens into sublists of 1024 +|=>> 12/11 [22:57:10] - datasets_modules.datasets.babylm_dataset.43b5d6f57ff7566a9e5fca41a7eeab46164e1c1b30ed1dcb135a5808f8c10159.babylm_dataset - INFO :: Writing dataset as space-separated sequences of tokens +|=>> 12/11 [22:57:10] - mistral.corpora.auto - INFO :: Building Tokenized Indexed Dataset for {dataset_id}/{dataset_name}... +|=>> 12/11 [22:57:10] - mistral.corpora.auto - INFO :: Building Indexed Dataset for train +|=>> 12/11 [22:57:40] - mistral.corpora.auto - INFO :: Building Indexed Dataset for validation +|=>> 12/11 [22:57:40] - mistral - INFO :: Initializing Model Trainer... +|=>> 12/11 [22:57:40] - mistral - INFO :: Training Arguments: TrainingArguments( +_n_gpu=1, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +bf16=False, +bf16_full_eval=False, +data_seed=53, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=IntervalStrategy.STEPS, +fp16=True, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +gradient_accumulation_steps=64, +gradient_checkpointing=False, +greater_is_better=None, +group_by_length=False, +half_precision_backend=auto, +hub_model_id=None, +hub_strategy=HubStrategy.EVERY_SAVE, +hub_token=, +ignore_data_skip=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0006, +length_column_name=length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=logs, +logging_first_step=True, +logging_nan_inf_filter=True, +logging_steps=50, +logging_strategy=IntervalStrategy.STEPS, +lr_scheduler_type=SchedulerType.LINEAR, +max_grad_norm=1.0, +max_steps=1200, +metric_for_best_model=None, +mp_parameters=, +no_cuda=False, +num_train_epochs=3.0, +optim=OptimizerNames.ADAMW_HF, +output_dir=//local/xiulyang/babylm_models/shuffle_control_ru_RU_randinit/babylm_shuffle_control_ru_RU_randinit_seed53/runs/shuffle_control_ru_RU_randinit_seed53, +overwrite_output_dir=False, +past_index=-1, +per_device_eval_batch_size=16, +per_device_train_batch_size=8, +prediction_loss_only=True, +push_to_hub=False, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +remove_unused_columns=True, +report_to=[], +resume_from_checkpoint=None, +run_name=shuffle_control_ru_RU_randinit_seed53, +save_on_each_node=False, +save_steps=1000, +save_strategy=IntervalStrategy.STEPS, +save_total_limit=None, +seed=53, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=120, +weight_decay=0.1, +xpu_backend=None, +) +|=>> 12/11 [22:57:40] - mistral.core.callbacks - INFO :: Setting W&B Project: xiulin-yang-compling +|=>> 12/11 [22:57:44] - mistral - INFO :: Training... +|=>> 12/11 [22:57:44] - mistral.core.callbacks - INFO :: Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" +|=>> 12/12 [08:19:40] - mistral - INFO :: ...and that's all folks! +|=>> 12/12 [08:19:40] - mistral - INFO :: Running final evaluation... diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3f68d66eaa09f7732c3d230d37ae591e8f32f4f8 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27836d7e56d49d56f7b1cb7eedcf9f3729d59e5c4fa7930d4fee7671376c78c4 +size 3183