diff --git a/checkpoint-0/config.json b/checkpoint-0/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-0/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-0/pytorch_model.bin b/checkpoint-0/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..e62f1c2a73e204dd71fd38dbd4de66ee58a3ae92 --- /dev/null +++ b/checkpoint-0/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4794ea0b9a495e87fc34e316efe1b585527d6e3d15e4d093362ab24ee74ab3f +size 510396521 diff --git a/checkpoint-0/special_tokens_map.json b/checkpoint-0/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-0/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-0/tokenizer_config.json b/checkpoint-0/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-0/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-0/training_args.bin b/checkpoint-0/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..bca3042c603d146089deb3309dc6c6828df7acbf --- /dev/null +++ b/checkpoint-0/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79b2014dac66e89fff48441e3acbe29cace7e30db0b4b50068d0d17e60e27a36 +size 3183 diff --git a/checkpoint-100/config.json b/checkpoint-100/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-100/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-100/optimizer.pt b/checkpoint-100/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..ca0fa0aa308d19c5a856f504a62a0a132882e31e --- /dev/null +++ b/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c72b4fe9f4e2d75a1511e9b9206598b3d136bf3dc4ad13cb5693e33b64cb0cf6 +size 995603825 diff --git a/checkpoint-100/pytorch_model.bin b/checkpoint-100/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..24993c7bc765b1a61ffc90022f21e1e87a3debd6 --- /dev/null +++ b/checkpoint-100/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38bd00175ed8d41bf6b694f9341b75ca404ad2fb5682eeb33af2554097cd01f4 +size 510396521 diff --git a/checkpoint-100/rng_state.pth b/checkpoint-100/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..65a2799dcde15365bafa49d230ae1d6381f39dbb --- /dev/null +++ b/checkpoint-100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c24e465139c080042c0f27df7ee536fb049a45dc538a82bba8d898500e2cfe6c +size 14567 diff --git a/checkpoint-100/scaler.pt b/checkpoint-100/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..45cc4a33e17645cb0ed4a911b11c77cb2e7ce7f3 --- /dev/null +++ b/checkpoint-100/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13a3423b2fe42f204bc8fe2c666ff379f9fd753a0f13613064a5e71e86b519e8 +size 559 diff --git a/checkpoint-100/scheduler.pt b/checkpoint-100/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..595eec9094e91b3eb24c2de88c461df2c22026ab --- /dev/null +++ b/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75e9b9d31d11c624d89b0c04ad496adf4b5addd3e703848d2583972c703e8da6 +size 623 diff --git a/checkpoint-100/special_tokens_map.json b/checkpoint-100/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-100/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-100/tokenizer_config.json b/checkpoint-100/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-100/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-100/trainer_state.json b/checkpoint-100/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..c3702b3716dc18dc8df0ccbc5a6862a93d027763 --- /dev/null +++ b/checkpoint-100/trainer_state.json @@ -0,0 +1,34 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.013333333333333, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0201, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 7.8456, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 5.4866, + "step": 100 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.6757871828992e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-100/training_args.bin b/checkpoint-100/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..bca3042c603d146089deb3309dc6c6828df7acbf --- /dev/null +++ b/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79b2014dac66e89fff48441e3acbe29cace7e30db0b4b50068d0d17e60e27a36 +size 3183 diff --git a/checkpoint-1000/config.json b/checkpoint-1000/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-1000/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-1000/optimizer.pt b/checkpoint-1000/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..aa8367523f62d6512436d2895e24500b567e8b8d --- /dev/null +++ b/checkpoint-1000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:217fe7732bcb7c8dec914ae2514552dc0ca462bff395a4616d55ea406441c99e +size 995604017 diff --git a/checkpoint-1000/pytorch_model.bin b/checkpoint-1000/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..1c9ae8f69ae09c74982250cc1dd5e3959fa993f1 --- /dev/null +++ b/checkpoint-1000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73b5773f0cc4a040ae0527469327ed539241aa01ff4fdffed45f81fbbab8010d +size 510396521 diff --git a/checkpoint-1000/rng_state.pth b/checkpoint-1000/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..3dc011be1d093ffe25ed9dd21510b4acc309f224 --- /dev/null +++ b/checkpoint-1000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:520973adee116dc95952002de5a7f96c5369839c6e26c013255c8159a5272629 +size 14567 diff --git a/checkpoint-1000/scaler.pt b/checkpoint-1000/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..b04695d3a30e4bab2b78883d9c849c25c37ef7d7 --- /dev/null +++ b/checkpoint-1000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f810fc7b695697c440d8985f6042b4ba23a9e1027604c265718b518ca29f1b2b +size 559 diff --git a/checkpoint-1000/scheduler.pt b/checkpoint-1000/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..2a616899591a36b66ac0bd1ceeb324087c181a2a --- /dev/null +++ b/checkpoint-1000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0691206f4bd9ca409d6e7104087a4e0eb05df8f8f555a400f6ecc532edba52d8 +size 623 diff --git a/checkpoint-1000/special_tokens_map.json b/checkpoint-1000/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-1000/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-1000/tokenizer_config.json b/checkpoint-1000/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-1000/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-1000/trainer_state.json b/checkpoint-1000/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..e8edc8215698f86df2be6603c9781cfdccfc8add --- /dev/null +++ b/checkpoint-1000/trainer_state.json @@ -0,0 +1,158 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 35.016666666666666, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0201, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 7.8456, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 5.4866, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 4.9816, + "step": 150 + }, + { + "epoch": 7.0, + "learning_rate": 0.0005555555555555556, + "loss": 4.7462, + "step": 200 + }, + { + "epoch": 8.02, + "learning_rate": 0.0005277777777777777, + "loss": 4.6079, + "step": 250 + }, + { + "epoch": 10.02, + "learning_rate": 0.0005, + "loss": 4.4553, + "step": 300 + }, + { + "epoch": 12.01, + "learning_rate": 0.00047222222222222224, + "loss": 4.2711, + "step": 350 + }, + { + "epoch": 14.01, + "learning_rate": 0.00044444444444444436, + "loss": 4.0704, + "step": 400 + }, + { + "epoch": 16.0, + "learning_rate": 0.00041666666666666664, + "loss": 3.9149, + "step": 450 + }, + { + "epoch": 17.02, + "learning_rate": 0.00038888888888888887, + "loss": 3.7895, + "step": 500 + }, + { + "epoch": 19.02, + "learning_rate": 0.0003611111111111111, + "loss": 3.6784, + "step": 550 + }, + { + "epoch": 21.01, + "learning_rate": 0.0003333333333333333, + "loss": 3.5851, + "step": 600 + }, + { + "epoch": 23.0, + "learning_rate": 0.00030555555555555555, + "loss": 3.4989, + "step": 650 + }, + { + "epoch": 24.02, + "learning_rate": 0.0002777777777777778, + "loss": 3.4216, + "step": 700 + }, + { + "epoch": 26.02, + "learning_rate": 0.00025, + "loss": 3.3544, + "step": 750 + }, + { + "epoch": 28.01, + "learning_rate": 0.00022222222222222218, + "loss": 3.2914, + "step": 800 + }, + { + "epoch": 30.01, + "learning_rate": 0.00019444444444444443, + "loss": 3.2331, + "step": 850 + }, + { + "epoch": 32.0, + "learning_rate": 0.00016666666666666666, + "loss": 3.1797, + "step": 900 + }, + { + "epoch": 33.02, + "learning_rate": 0.0001388888888888889, + "loss": 3.1315, + "step": 950 + }, + { + "epoch": 35.02, + "learning_rate": 0.00011111111111111109, + "loss": 3.0876, + "step": 1000 + }, + { + "epoch": 35.02, + "eval_loss": 4.186110973358154, + "eval_runtime": 4.1625, + "eval_samples_per_second": 48.769, + "eval_steps_per_second": 3.123, + "step": 1000 + }, + { + "epoch": 35.02, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_loss": 4.186110973358154, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_ppl": 65.76652520256718, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_runtime": 4.1625, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 48.769, + "step": 1000 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.6758133121024e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1000/training_args.bin b/checkpoint-1000/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..bca3042c603d146089deb3309dc6c6828df7acbf --- /dev/null +++ b/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79b2014dac66e89fff48441e3acbe29cace7e30db0b4b50068d0d17e60e27a36 +size 3183 diff --git a/checkpoint-1100/config.json b/checkpoint-1100/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-1100/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-1100/optimizer.pt b/checkpoint-1100/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..87869757a19ee96f29f568ebc35efb8e9e2d219e --- /dev/null +++ b/checkpoint-1100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f902e92f67ed4958661c1d2b59a40bcaf5e401b3679867885dfcd8da370a2f99 +size 995604017 diff --git a/checkpoint-1100/pytorch_model.bin b/checkpoint-1100/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..e0b48e6d8d3f965e74dbab99b991ba8a43e8f510 --- /dev/null +++ b/checkpoint-1100/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2a78fffc7847c25f3e48b73a71e951309479ace9751d80d5cbc5623ab605707 +size 510396521 diff --git a/checkpoint-1100/rng_state.pth b/checkpoint-1100/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..835232f7356a88572653f8aba01bb5c1cce7db5b --- /dev/null +++ b/checkpoint-1100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4e9b5f5bcff132523e6783398262c35bc3d3bdadfce0237853d84370a0d4e70 +size 14567 diff --git a/checkpoint-1100/scaler.pt b/checkpoint-1100/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..37bf049fbd5fd721203bf0238edc8ff67dbd8f94 --- /dev/null +++ b/checkpoint-1100/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fb16c30b686aa43e110b0d33f9d46bf3127b7124542ca8dc34831233d4675a0 +size 559 diff --git a/checkpoint-1100/scheduler.pt b/checkpoint-1100/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..c158169ec0bb09620952d544fd4b4edea0cc9cf4 --- /dev/null +++ b/checkpoint-1100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f1b7713e4bb40428f29080b7d08d4a52f779ac863737861e4724292b2cf6c59 +size 623 diff --git a/checkpoint-1100/special_tokens_map.json b/checkpoint-1100/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-1100/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-1100/tokenizer_config.json b/checkpoint-1100/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-1100/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-1100/trainer_state.json b/checkpoint-1100/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..d0ba979b55eec772f8d72d9956b468ea0c895d4a --- /dev/null +++ b/checkpoint-1100/trainer_state.json @@ -0,0 +1,170 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 39.00666666666667, + "global_step": 1100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0201, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 7.8456, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 5.4866, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 4.9816, + "step": 150 + }, + { + "epoch": 7.0, + "learning_rate": 0.0005555555555555556, + "loss": 4.7462, + "step": 200 + }, + { + "epoch": 8.02, + "learning_rate": 0.0005277777777777777, + "loss": 4.6079, + "step": 250 + }, + { + "epoch": 10.02, + "learning_rate": 0.0005, + "loss": 4.4553, + "step": 300 + }, + { + "epoch": 12.01, + "learning_rate": 0.00047222222222222224, + "loss": 4.2711, + "step": 350 + }, + { + "epoch": 14.01, + "learning_rate": 0.00044444444444444436, + "loss": 4.0704, + "step": 400 + }, + { + "epoch": 16.0, + "learning_rate": 0.00041666666666666664, + "loss": 3.9149, + "step": 450 + }, + { + "epoch": 17.02, + "learning_rate": 0.00038888888888888887, + "loss": 3.7895, + "step": 500 + }, + { + "epoch": 19.02, + "learning_rate": 0.0003611111111111111, + "loss": 3.6784, + "step": 550 + }, + { + "epoch": 21.01, + "learning_rate": 0.0003333333333333333, + "loss": 3.5851, + "step": 600 + }, + { + "epoch": 23.0, + "learning_rate": 0.00030555555555555555, + "loss": 3.4989, + "step": 650 + }, + { + "epoch": 24.02, + "learning_rate": 0.0002777777777777778, + "loss": 3.4216, + "step": 700 + }, + { + "epoch": 26.02, + "learning_rate": 0.00025, + "loss": 3.3544, + "step": 750 + }, + { + "epoch": 28.01, + "learning_rate": 0.00022222222222222218, + "loss": 3.2914, + "step": 800 + }, + { + "epoch": 30.01, + "learning_rate": 0.00019444444444444443, + "loss": 3.2331, + "step": 850 + }, + { + "epoch": 32.0, + "learning_rate": 0.00016666666666666666, + "loss": 3.1797, + "step": 900 + }, + { + "epoch": 33.02, + "learning_rate": 0.0001388888888888889, + "loss": 3.1315, + "step": 950 + }, + { + "epoch": 35.02, + "learning_rate": 0.00011111111111111109, + "loss": 3.0876, + "step": 1000 + }, + { + "epoch": 35.02, + "eval_loss": 4.186110973358154, + "eval_runtime": 4.1625, + "eval_samples_per_second": 48.769, + "eval_steps_per_second": 3.123, + "step": 1000 + }, + { + "epoch": 35.02, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_loss": 4.186110973358154, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_ppl": 65.76652520256718, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_runtime": 4.1625, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 48.769, + "step": 1000 + }, + { + "epoch": 37.01, + "learning_rate": 8.333333333333333e-05, + "loss": 3.0482, + "step": 1050 + }, + { + "epoch": 39.01, + "learning_rate": 5.5555555555555545e-05, + "loss": 3.013, + "step": 1100 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.94339725623296e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1100/training_args.bin b/checkpoint-1100/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..bca3042c603d146089deb3309dc6c6828df7acbf --- /dev/null +++ b/checkpoint-1100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79b2014dac66e89fff48441e3acbe29cace7e30db0b4b50068d0d17e60e27a36 +size 3183 diff --git a/checkpoint-1200/config.json b/checkpoint-1200/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-1200/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-1200/optimizer.pt b/checkpoint-1200/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..828f7e6f5e4876098bed9c7c54eed19c745afdcf --- /dev/null +++ b/checkpoint-1200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36fd08377f15685acb3124149a4e5c514122ff06dcfeeaa6a7620c358073438d +size 995604017 diff --git a/checkpoint-1200/pytorch_model.bin b/checkpoint-1200/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..1435f70551d88361bd2e84bd9cb3dac06518f183 --- /dev/null +++ b/checkpoint-1200/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fd243ed4a838b0199d67b4f4d0099095fd65044f10e04d0d01e4d02edbfddc9 +size 510396521 diff --git a/checkpoint-1200/rng_state.pth b/checkpoint-1200/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..8d0eb57cecbebd8ec1c4819badb527bb03e18276 --- /dev/null +++ b/checkpoint-1200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c550b3b88c995ffb3e43da1e82c6747a8ddc9130936c98f4d879e200cbaf123 +size 14567 diff --git a/checkpoint-1200/scaler.pt b/checkpoint-1200/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..8953dddccbefc4703c09dcda27d83c15add2bade --- /dev/null +++ b/checkpoint-1200/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19c7277eaca0850ae3e9b6790b3d002d820169cce0671185e672c28c8ae8e056 +size 559 diff --git a/checkpoint-1200/scheduler.pt b/checkpoint-1200/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..310d39c17fc616a9c83286ed00f0f4cefba9f5df --- /dev/null +++ b/checkpoint-1200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:935a8fb09a6e9698d9894853b05e181b3f56098deaaecddde08e55f06bf000c4 +size 623 diff --git a/checkpoint-1200/special_tokens_map.json b/checkpoint-1200/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-1200/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-1200/tokenizer_config.json b/checkpoint-1200/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-1200/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-1200/trainer_state.json b/checkpoint-1200/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..dbd7e92a41d6024394f0fd866e6c97c7f83f5143 --- /dev/null +++ b/checkpoint-1200/trainer_state.json @@ -0,0 +1,182 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 42.02, + "global_step": 1200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0201, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 7.8456, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 5.4866, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 4.9816, + "step": 150 + }, + { + "epoch": 7.0, + "learning_rate": 0.0005555555555555556, + "loss": 4.7462, + "step": 200 + }, + { + "epoch": 8.02, + "learning_rate": 0.0005277777777777777, + "loss": 4.6079, + "step": 250 + }, + { + "epoch": 10.02, + "learning_rate": 0.0005, + "loss": 4.4553, + "step": 300 + }, + { + "epoch": 12.01, + "learning_rate": 0.00047222222222222224, + "loss": 4.2711, + "step": 350 + }, + { + "epoch": 14.01, + "learning_rate": 0.00044444444444444436, + "loss": 4.0704, + "step": 400 + }, + { + "epoch": 16.0, + "learning_rate": 0.00041666666666666664, + "loss": 3.9149, + "step": 450 + }, + { + "epoch": 17.02, + "learning_rate": 0.00038888888888888887, + "loss": 3.7895, + "step": 500 + }, + { + "epoch": 19.02, + "learning_rate": 0.0003611111111111111, + "loss": 3.6784, + "step": 550 + }, + { + "epoch": 21.01, + "learning_rate": 0.0003333333333333333, + "loss": 3.5851, + "step": 600 + }, + { + "epoch": 23.0, + "learning_rate": 0.00030555555555555555, + "loss": 3.4989, + "step": 650 + }, + { + "epoch": 24.02, + "learning_rate": 0.0002777777777777778, + "loss": 3.4216, + "step": 700 + }, + { + "epoch": 26.02, + "learning_rate": 0.00025, + "loss": 3.3544, + "step": 750 + }, + { + "epoch": 28.01, + "learning_rate": 0.00022222222222222218, + "loss": 3.2914, + "step": 800 + }, + { + "epoch": 30.01, + "learning_rate": 0.00019444444444444443, + "loss": 3.2331, + "step": 850 + }, + { + "epoch": 32.0, + "learning_rate": 0.00016666666666666666, + "loss": 3.1797, + "step": 900 + }, + { + "epoch": 33.02, + "learning_rate": 0.0001388888888888889, + "loss": 3.1315, + "step": 950 + }, + { + "epoch": 35.02, + "learning_rate": 0.00011111111111111109, + "loss": 3.0876, + "step": 1000 + }, + { + "epoch": 35.02, + "eval_loss": 4.186110973358154, + "eval_runtime": 4.1625, + "eval_samples_per_second": 48.769, + "eval_steps_per_second": 3.123, + "step": 1000 + }, + { + "epoch": 35.02, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_loss": 4.186110973358154, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_ppl": 65.76652520256718, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_runtime": 4.1625, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 48.769, + "step": 1000 + }, + { + "epoch": 37.01, + "learning_rate": 8.333333333333333e-05, + "loss": 3.0482, + "step": 1050 + }, + { + "epoch": 39.01, + "learning_rate": 5.5555555555555545e-05, + "loss": 3.013, + "step": 1100 + }, + { + "epoch": 41.0, + "learning_rate": 2.7777777777777772e-05, + "loss": 2.9831, + "step": 1150 + }, + { + "epoch": 42.02, + "learning_rate": 0.0, + "loss": 2.9595, + "step": 1200 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 3.21097597452288e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1200/training_args.bin b/checkpoint-1200/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..bca3042c603d146089deb3309dc6c6828df7acbf --- /dev/null +++ b/checkpoint-1200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79b2014dac66e89fff48441e3acbe29cace7e30db0b4b50068d0d17e60e27a36 +size 3183 diff --git a/checkpoint-200/config.json b/checkpoint-200/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-200/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-200/optimizer.pt b/checkpoint-200/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..5a8049517fe248393740769155a565302e9eea8b --- /dev/null +++ b/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fe8a28c9cab6577fee7add744ecb39c3c32efd78b25e7145aa00cb20d473bae +size 995603825 diff --git a/checkpoint-200/pytorch_model.bin b/checkpoint-200/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..6ebc2f5b4ddc4a4bb5509eb5b3cd6d3268fe302f --- /dev/null +++ b/checkpoint-200/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:335095dfa1ae4f880d5c1a3e63b68bfc0ebcf7cb1776b6222f77c4467c735e94 +size 510396521 diff --git a/checkpoint-200/rng_state.pth b/checkpoint-200/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..eccc97527b064b9e61e740a0a5faf9a397c79844 --- /dev/null +++ b/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28b91001a38068cf6afcdf4abd79620606c7030484896612674e3c89ff077696 +size 14567 diff --git a/checkpoint-200/scaler.pt b/checkpoint-200/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..365b52ebf376498237a843f6d7332e5a49b14902 --- /dev/null +++ b/checkpoint-200/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb6982c29cd162f49aeb531674acf574eccd46a8f556bec596040d7c3b95200a +size 559 diff --git a/checkpoint-200/scheduler.pt b/checkpoint-200/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..36cb8da739c80a63971a62f06f781c40ac0fceb2 --- /dev/null +++ b/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a992625ada6d884e508ff9392d16738b4a4163147f8fcbf9f46be82ecae9888 +size 623 diff --git a/checkpoint-200/special_tokens_map.json b/checkpoint-200/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-200/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-200/tokenizer_config.json b/checkpoint-200/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-200/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-200/trainer_state.json b/checkpoint-200/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..5fbda3b513d3d379cce51e16c5161b3ed25c5ad1 --- /dev/null +++ b/checkpoint-200/trainer_state.json @@ -0,0 +1,46 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.003333333333333, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0201, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 7.8456, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 5.4866, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 4.9816, + "step": 150 + }, + { + "epoch": 7.0, + "learning_rate": 0.0005555555555555556, + "loss": 4.7462, + "step": 200 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 5.3516266242048e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-200/training_args.bin b/checkpoint-200/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..bca3042c603d146089deb3309dc6c6828df7acbf --- /dev/null +++ b/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79b2014dac66e89fff48441e3acbe29cace7e30db0b4b50068d0d17e60e27a36 +size 3183 diff --git a/checkpoint-300/config.json b/checkpoint-300/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-300/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-300/optimizer.pt b/checkpoint-300/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..318367248a8ac77c16c5d7f23f592338120ff98b --- /dev/null +++ b/checkpoint-300/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1883d1df7b236b122fd1f4bd914445c41211955a7e9b3b73338094142bdee4eb +size 995604017 diff --git a/checkpoint-300/pytorch_model.bin b/checkpoint-300/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..1b66de7ae027a3620de649007cc8182c814cccae --- /dev/null +++ b/checkpoint-300/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:771f34f6ce867e5bf65f1e4d53749ffd74bda5ea7b18e376fdbd7a5dc422ee34 +size 510396521 diff --git a/checkpoint-300/rng_state.pth b/checkpoint-300/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..d8e792bf1c32554d6f6460cabefa697b98db20ff --- /dev/null +++ b/checkpoint-300/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4dfde358ec09ea14ac2ceeeb150fb1ecb4a3d78981c4cd2e858711f8a928e55 +size 14567 diff --git a/checkpoint-300/scaler.pt b/checkpoint-300/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..01066cf4761ea9d2f7962f5181762f7b08690b79 --- /dev/null +++ b/checkpoint-300/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0967b9f865f16344c55f5ccc3cf7d6e8e97ca61dda304e931ca6bad130f48dd1 +size 559 diff --git a/checkpoint-300/scheduler.pt b/checkpoint-300/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..681987f21e79cd4685aac32a5e6b74341e25d936 --- /dev/null +++ b/checkpoint-300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1db899b266916f792a0898ceb27a87eaf76647f10c29cc0c13ce22f12a12efd +size 623 diff --git a/checkpoint-300/special_tokens_map.json b/checkpoint-300/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-300/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-300/tokenizer_config.json b/checkpoint-300/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-300/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-300/trainer_state.json b/checkpoint-300/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..58ed275fa6d41a87c84289f5ae2188bccf4bcdcd --- /dev/null +++ b/checkpoint-300/trainer_state.json @@ -0,0 +1,58 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 10.016666666666667, + "global_step": 300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0201, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 7.8456, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 5.4866, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 4.9816, + "step": 150 + }, + { + "epoch": 7.0, + "learning_rate": 0.0005555555555555556, + "loss": 4.7462, + "step": 200 + }, + { + "epoch": 8.02, + "learning_rate": 0.0005277777777777777, + "loss": 4.6079, + "step": 250 + }, + { + "epoch": 10.02, + "learning_rate": 0.0005, + "loss": 4.4553, + "step": 300 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 8.027413807104e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-300/training_args.bin b/checkpoint-300/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..bca3042c603d146089deb3309dc6c6828df7acbf --- /dev/null +++ b/checkpoint-300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79b2014dac66e89fff48441e3acbe29cace7e30db0b4b50068d0d17e60e27a36 +size 3183 diff --git a/checkpoint-400/config.json b/checkpoint-400/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-400/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-400/optimizer.pt b/checkpoint-400/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..05e2cfd43e2a97490615b1be7a9246216903567f --- /dev/null +++ b/checkpoint-400/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1aba3a7aa823336283303b284e58070d9f9789de0ea02db6d4c4dd6659a35219 +size 995604017 diff --git a/checkpoint-400/pytorch_model.bin b/checkpoint-400/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..e99a504eee11776dec96071c53fd23c9161e36c7 --- /dev/null +++ b/checkpoint-400/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:339ac9a75ecfc69d0ef00c262de7c87871cd5c5b69280f3075c296402720f999 +size 510396521 diff --git a/checkpoint-400/rng_state.pth b/checkpoint-400/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..097de19620a0d6c720c27334051d1b70c514013a --- /dev/null +++ b/checkpoint-400/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9538a3e330e9b15cd4bb2830ad1ed33e94ee30ece72c6c9c772b5213b731dc17 +size 14567 diff --git a/checkpoint-400/scaler.pt b/checkpoint-400/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..9c7aef4199e98d81152810b661dbaffc01963383 --- /dev/null +++ b/checkpoint-400/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:476e510c8ea7edbd2b51d1e76a4e037820a5639381c0d8b5d32dafa492795a1e +size 559 diff --git a/checkpoint-400/scheduler.pt b/checkpoint-400/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..967673cfc91836d239beb5a7ede06992232ba309 --- /dev/null +++ b/checkpoint-400/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db087d678047b5c346bbb8511936612c1fdf223c6fd70321e97369bc31ed76a8 +size 623 diff --git a/checkpoint-400/special_tokens_map.json b/checkpoint-400/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-400/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-400/tokenizer_config.json b/checkpoint-400/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-400/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-400/trainer_state.json b/checkpoint-400/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..0abe2dad9d6a73c1305860613dc6a28ad4821557 --- /dev/null +++ b/checkpoint-400/trainer_state.json @@ -0,0 +1,70 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 14.006666666666666, + "global_step": 400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0201, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 7.8456, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 5.4866, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 4.9816, + "step": 150 + }, + { + "epoch": 7.0, + "learning_rate": 0.0005555555555555556, + "loss": 4.7462, + "step": 200 + }, + { + "epoch": 8.02, + "learning_rate": 0.0005277777777777777, + "loss": 4.6079, + "step": 250 + }, + { + "epoch": 10.02, + "learning_rate": 0.0005, + "loss": 4.4553, + "step": 300 + }, + { + "epoch": 12.01, + "learning_rate": 0.00047222222222222224, + "loss": 4.2711, + "step": 350 + }, + { + "epoch": 14.01, + "learning_rate": 0.00044444444444444436, + "loss": 4.0704, + "step": 400 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.07032532484096e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-400/training_args.bin b/checkpoint-400/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..bca3042c603d146089deb3309dc6c6828df7acbf --- /dev/null +++ b/checkpoint-400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79b2014dac66e89fff48441e3acbe29cace7e30db0b4b50068d0d17e60e27a36 +size 3183 diff --git a/checkpoint-500/config.json b/checkpoint-500/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-500/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-500/optimizer.pt b/checkpoint-500/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..e20d6c0073bfdf1395c138cb434138f2d861bfa2 --- /dev/null +++ b/checkpoint-500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fbca283a5a2ac9dc18a1526e9eb33d6a60de08a26b2d50592c91f1a4a8d4cc9 +size 995604017 diff --git a/checkpoint-500/pytorch_model.bin b/checkpoint-500/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..62d8aa0026e3ddd560e087e5292fb83fb9439cce --- /dev/null +++ b/checkpoint-500/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb3aab099650efa31622aa013fcaff57590699e15b38c3918c5b4d8dc06ad83f +size 510396521 diff --git a/checkpoint-500/rng_state.pth b/checkpoint-500/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..3b0734d7d21edb04901152c14444bcaf28f846db --- /dev/null +++ b/checkpoint-500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e232b1e8c239a6fcdec702104c79892f7f7b5d51d31003c309c1f9ae6f5c2cc9 +size 14567 diff --git a/checkpoint-500/scaler.pt b/checkpoint-500/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..e8b96c9c2837f2c95b1d07d4fc3f245f9ad1ef62 --- /dev/null +++ b/checkpoint-500/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fa4c7be44c959599b8b43bb9bc3371e9e4e5bbc5758b3ab5afcccfda3e72e67 +size 559 diff --git a/checkpoint-500/scheduler.pt b/checkpoint-500/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..77b958181a5b47b022b96b38a6207f274a1b6604 --- /dev/null +++ b/checkpoint-500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:026fae4a90d56c24de94b10dfa7a75b6ba4e43bd5c1a3fdb2d77356b81cd6f8a +size 623 diff --git a/checkpoint-500/special_tokens_map.json b/checkpoint-500/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-500/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-500/tokenizer_config.json b/checkpoint-500/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-500/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-500/trainer_state.json b/checkpoint-500/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..5c296c998369ba178e737c7316e820aa3d2101e9 --- /dev/null +++ b/checkpoint-500/trainer_state.json @@ -0,0 +1,82 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 17.02, + "global_step": 500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0201, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 7.8456, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 5.4866, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 4.9816, + "step": 150 + }, + { + "epoch": 7.0, + "learning_rate": 0.0005555555555555556, + "loss": 4.7462, + "step": 200 + }, + { + "epoch": 8.02, + "learning_rate": 0.0005277777777777777, + "loss": 4.6079, + "step": 250 + }, + { + "epoch": 10.02, + "learning_rate": 0.0005, + "loss": 4.4553, + "step": 300 + }, + { + "epoch": 12.01, + "learning_rate": 0.00047222222222222224, + "loss": 4.2711, + "step": 350 + }, + { + "epoch": 14.01, + "learning_rate": 0.00044444444444444436, + "loss": 4.0704, + "step": 400 + }, + { + "epoch": 16.0, + "learning_rate": 0.00041666666666666664, + "loss": 3.9149, + "step": 450 + }, + { + "epoch": 17.02, + "learning_rate": 0.00038888888888888887, + "loss": 3.7895, + "step": 500 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.33790404313088e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-500/training_args.bin b/checkpoint-500/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..bca3042c603d146089deb3309dc6c6828df7acbf --- /dev/null +++ b/checkpoint-500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79b2014dac66e89fff48441e3acbe29cace7e30db0b4b50068d0d17e60e27a36 +size 3183 diff --git a/checkpoint-600/config.json b/checkpoint-600/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-600/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-600/optimizer.pt b/checkpoint-600/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..138fe7c1cf91521ef242b7dbeceedc3770124463 --- /dev/null +++ b/checkpoint-600/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab7f52926904f524f9e6888ca28f7cf856d7a14b5defc48e7eea107fe2a3abf9 +size 995604017 diff --git a/checkpoint-600/pytorch_model.bin b/checkpoint-600/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..6294685758010cdf961d2d06d76427eb9b16248e --- /dev/null +++ b/checkpoint-600/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7912375a9ba588d1897b618ca89e58c0ad32a70e778421f62a2dfb384dfaf7a5 +size 510396521 diff --git a/checkpoint-600/rng_state.pth b/checkpoint-600/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..158725b0796dcaaa9d3e7f755f2e3b9085ac6f53 --- /dev/null +++ b/checkpoint-600/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa894d6912c078268238ffe889ab4cbcb77041e764329082f03d1ac3e2ed41bb +size 14567 diff --git a/checkpoint-600/scaler.pt b/checkpoint-600/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..f95b3e36da01561ec333a83ee8419ad225633e06 --- /dev/null +++ b/checkpoint-600/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e8415b86bbce347c0df306b84a695add049c2a3b2d0b6f4dda3bf036d341150 +size 559 diff --git a/checkpoint-600/scheduler.pt b/checkpoint-600/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..62e5359badd619e7dad2c51d98fa8043d9948f0b --- /dev/null +++ b/checkpoint-600/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:700940432b1c2117248896e2ce5a58d93c051d92ea97707f74d76bf1ef24deee +size 623 diff --git a/checkpoint-600/special_tokens_map.json b/checkpoint-600/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-600/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-600/tokenizer_config.json b/checkpoint-600/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-600/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-600/trainer_state.json b/checkpoint-600/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..4dde1315d394dec5843d18e0737b4ff6b064afc6 --- /dev/null +++ b/checkpoint-600/trainer_state.json @@ -0,0 +1,94 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 21.01, + "global_step": 600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0201, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 7.8456, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 5.4866, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 4.9816, + "step": 150 + }, + { + "epoch": 7.0, + "learning_rate": 0.0005555555555555556, + "loss": 4.7462, + "step": 200 + }, + { + "epoch": 8.02, + "learning_rate": 0.0005277777777777777, + "loss": 4.6079, + "step": 250 + }, + { + "epoch": 10.02, + "learning_rate": 0.0005, + "loss": 4.4553, + "step": 300 + }, + { + "epoch": 12.01, + "learning_rate": 0.00047222222222222224, + "loss": 4.2711, + "step": 350 + }, + { + "epoch": 14.01, + "learning_rate": 0.00044444444444444436, + "loss": 4.0704, + "step": 400 + }, + { + "epoch": 16.0, + "learning_rate": 0.00041666666666666664, + "loss": 3.9149, + "step": 450 + }, + { + "epoch": 17.02, + "learning_rate": 0.00038888888888888887, + "loss": 3.7895, + "step": 500 + }, + { + "epoch": 19.02, + "learning_rate": 0.0003611111111111111, + "loss": 3.6784, + "step": 550 + }, + { + "epoch": 21.01, + "learning_rate": 0.0003333333333333333, + "loss": 3.5851, + "step": 600 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.60548798726144e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-600/training_args.bin b/checkpoint-600/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..bca3042c603d146089deb3309dc6c6828df7acbf --- /dev/null +++ b/checkpoint-600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79b2014dac66e89fff48441e3acbe29cace7e30db0b4b50068d0d17e60e27a36 +size 3183 diff --git a/checkpoint-700/config.json b/checkpoint-700/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-700/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-700/optimizer.pt b/checkpoint-700/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..b1fd940d711cc838ae05396488a7a231d7b3fad3 --- /dev/null +++ b/checkpoint-700/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcf544ff79f52f6841d28089fec3ea45d1cd32153342a152f872bd57c51dd59f +size 995604017 diff --git a/checkpoint-700/pytorch_model.bin b/checkpoint-700/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..007c533339af647ff243c35ca1619d1a7415f2b7 --- /dev/null +++ b/checkpoint-700/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65cadbac6209c9ff2d200b2e04600c6e3c318de1db198b64775a46d2d914a0f0 +size 510396521 diff --git a/checkpoint-700/rng_state.pth b/checkpoint-700/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..a991b64ebce24a22bb1e6ec5a81ca807b4ae18a4 --- /dev/null +++ b/checkpoint-700/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4f7f974a51ca09a1978959556b8a39950b0ecf442efc05c55df9a24a45a624f +size 14567 diff --git a/checkpoint-700/scaler.pt b/checkpoint-700/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..12f2b8ec834e54a2bd7cfdd0e07b0c6e125b6490 --- /dev/null +++ b/checkpoint-700/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fb213daf5cce18a5f92167ca14da9df084d907f2b9796efc4666630f312b58c +size 559 diff --git a/checkpoint-700/scheduler.pt b/checkpoint-700/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..42408b6253265af34ae78746144fbba9316e0d7e --- /dev/null +++ b/checkpoint-700/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2d4910fd408e002ebeff50d62bfb043dcae5ef658777d0c3ee4a3bbb515ec15 +size 623 diff --git a/checkpoint-700/special_tokens_map.json b/checkpoint-700/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-700/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-700/tokenizer_config.json b/checkpoint-700/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-700/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-700/trainer_state.json b/checkpoint-700/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..4a9a5a502f34e5da81c9771bb71b886aa3ac8445 --- /dev/null +++ b/checkpoint-700/trainer_state.json @@ -0,0 +1,106 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 24.023333333333333, + "global_step": 700, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0201, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 7.8456, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 5.4866, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 4.9816, + "step": 150 + }, + { + "epoch": 7.0, + "learning_rate": 0.0005555555555555556, + "loss": 4.7462, + "step": 200 + }, + { + "epoch": 8.02, + "learning_rate": 0.0005277777777777777, + "loss": 4.6079, + "step": 250 + }, + { + "epoch": 10.02, + "learning_rate": 0.0005, + "loss": 4.4553, + "step": 300 + }, + { + "epoch": 12.01, + "learning_rate": 0.00047222222222222224, + "loss": 4.2711, + "step": 350 + }, + { + "epoch": 14.01, + "learning_rate": 0.00044444444444444436, + "loss": 4.0704, + "step": 400 + }, + { + "epoch": 16.0, + "learning_rate": 0.00041666666666666664, + "loss": 3.9149, + "step": 450 + }, + { + "epoch": 17.02, + "learning_rate": 0.00038888888888888887, + "loss": 3.7895, + "step": 500 + }, + { + "epoch": 19.02, + "learning_rate": 0.0003611111111111111, + "loss": 3.6784, + "step": 550 + }, + { + "epoch": 21.01, + "learning_rate": 0.0003333333333333333, + "loss": 3.5851, + "step": 600 + }, + { + "epoch": 23.0, + "learning_rate": 0.00030555555555555555, + "loss": 3.4989, + "step": 650 + }, + { + "epoch": 24.02, + "learning_rate": 0.0002777777777777778, + "loss": 3.4216, + "step": 700 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.87306670555136e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-700/training_args.bin b/checkpoint-700/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..bca3042c603d146089deb3309dc6c6828df7acbf --- /dev/null +++ b/checkpoint-700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79b2014dac66e89fff48441e3acbe29cace7e30db0b4b50068d0d17e60e27a36 +size 3183 diff --git a/checkpoint-800/config.json b/checkpoint-800/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-800/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-800/optimizer.pt b/checkpoint-800/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..69415beb52498d979c594b086c87ad5256ded94c --- /dev/null +++ b/checkpoint-800/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1983b4e5333562549c8fe0dba4ad1dd2babf4f4536a6cbcb110c9876f7ab6e9 +size 995604017 diff --git a/checkpoint-800/pytorch_model.bin b/checkpoint-800/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..b6955e224c4da6f4cab71c474a8f9ed7335dbb8d --- /dev/null +++ b/checkpoint-800/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbb9ce763a5715ca699e4dde7bf7c9a865421d52a340bda70f17de7d92201a07 +size 510396521 diff --git a/checkpoint-800/rng_state.pth b/checkpoint-800/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..2e08a16b0b964546979d871dadc261614c243e46 --- /dev/null +++ b/checkpoint-800/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92b9e79cf31c5e840671def3cbf6c9ade89ed13b7257061fd9d827f08d941c90 +size 14567 diff --git a/checkpoint-800/scaler.pt b/checkpoint-800/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..b3c73372264156b02df8dada2192ee3c96dd5fc4 --- /dev/null +++ b/checkpoint-800/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c2074cdcefbaa0a39f736d6b0f7bf018c350d49e85648bc8accc4f756ad816e +size 559 diff --git a/checkpoint-800/scheduler.pt b/checkpoint-800/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..75713dbce3771306ca00343ecc497c4f19a01d03 --- /dev/null +++ b/checkpoint-800/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b27fb255c84833fb6ab5d93679cb236a569de9a1c4f805f72a2f60a2bc7c7499 +size 623 diff --git a/checkpoint-800/special_tokens_map.json b/checkpoint-800/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-800/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-800/tokenizer_config.json b/checkpoint-800/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-800/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-800/trainer_state.json b/checkpoint-800/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..a47c2ff2b03d2cd3b39bd2da9a3ec1045bfc16e1 --- /dev/null +++ b/checkpoint-800/trainer_state.json @@ -0,0 +1,118 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 28.013333333333332, + "global_step": 800, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0201, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 7.8456, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 5.4866, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 4.9816, + "step": 150 + }, + { + "epoch": 7.0, + "learning_rate": 0.0005555555555555556, + "loss": 4.7462, + "step": 200 + }, + { + "epoch": 8.02, + "learning_rate": 0.0005277777777777777, + "loss": 4.6079, + "step": 250 + }, + { + "epoch": 10.02, + "learning_rate": 0.0005, + "loss": 4.4553, + "step": 300 + }, + { + "epoch": 12.01, + "learning_rate": 0.00047222222222222224, + "loss": 4.2711, + "step": 350 + }, + { + "epoch": 14.01, + "learning_rate": 0.00044444444444444436, + "loss": 4.0704, + "step": 400 + }, + { + "epoch": 16.0, + "learning_rate": 0.00041666666666666664, + "loss": 3.9149, + "step": 450 + }, + { + "epoch": 17.02, + "learning_rate": 0.00038888888888888887, + "loss": 3.7895, + "step": 500 + }, + { + "epoch": 19.02, + "learning_rate": 0.0003611111111111111, + "loss": 3.6784, + "step": 550 + }, + { + "epoch": 21.01, + "learning_rate": 0.0003333333333333333, + "loss": 3.5851, + "step": 600 + }, + { + "epoch": 23.0, + "learning_rate": 0.00030555555555555555, + "loss": 3.4989, + "step": 650 + }, + { + "epoch": 24.02, + "learning_rate": 0.0002777777777777778, + "loss": 3.4216, + "step": 700 + }, + { + "epoch": 26.02, + "learning_rate": 0.00025, + "loss": 3.3544, + "step": 750 + }, + { + "epoch": 28.01, + "learning_rate": 0.00022222222222222218, + "loss": 3.2914, + "step": 800 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.14065064968192e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-800/training_args.bin b/checkpoint-800/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..bca3042c603d146089deb3309dc6c6828df7acbf --- /dev/null +++ b/checkpoint-800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79b2014dac66e89fff48441e3acbe29cace7e30db0b4b50068d0d17e60e27a36 +size 3183 diff --git a/checkpoint-900/config.json b/checkpoint-900/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-900/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-900/optimizer.pt b/checkpoint-900/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..57acbb5bea42eaf1144c6e570796e98e96cd25c6 --- /dev/null +++ b/checkpoint-900/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b7117ecfed7715a0fe8ed6fc78ea605c30d82951e28561bb9b1586fa93197d3 +size 995604017 diff --git a/checkpoint-900/pytorch_model.bin b/checkpoint-900/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..067dad5d82bb238b5f5fc125d8716414c8175ea2 --- /dev/null +++ b/checkpoint-900/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84bee9fd07e244594e0c298870fb656bbd753bf3ab48dea1f803babc2c255bd5 +size 510396521 diff --git a/checkpoint-900/rng_state.pth b/checkpoint-900/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..0786364e4fee3ee54b8c5060336d6cbbceec51bd --- /dev/null +++ b/checkpoint-900/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e0e48b831ea7aaac3bc8b7090cba6fc69d3d3f61ce22b9a49587c2b3b55466e +size 14567 diff --git a/checkpoint-900/scaler.pt b/checkpoint-900/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..be54cb13c777bc6feccb478ff218e7e21fad482a --- /dev/null +++ b/checkpoint-900/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8695f57df923e22b943b0b0f2b9cc7007008e80b53ccee275b3a35963fe67e9 +size 559 diff --git a/checkpoint-900/scheduler.pt b/checkpoint-900/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..539d7c83ea252818dc9cbffac08cf340bb05a454 --- /dev/null +++ b/checkpoint-900/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2cb09e5db72772a15094286e93cbb61d745d9b63863703cf53da0bcb9827821 +size 623 diff --git a/checkpoint-900/special_tokens_map.json b/checkpoint-900/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-900/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-900/tokenizer_config.json b/checkpoint-900/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-900/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-900/trainer_state.json b/checkpoint-900/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..42998ffae27bb5fd20f0a96729e52023eb1dffbd --- /dev/null +++ b/checkpoint-900/trainer_state.json @@ -0,0 +1,130 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 32.00333333333333, + "global_step": 900, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.0201, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 7.8456, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 5.4866, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 4.9816, + "step": 150 + }, + { + "epoch": 7.0, + "learning_rate": 0.0005555555555555556, + "loss": 4.7462, + "step": 200 + }, + { + "epoch": 8.02, + "learning_rate": 0.0005277777777777777, + "loss": 4.6079, + "step": 250 + }, + { + "epoch": 10.02, + "learning_rate": 0.0005, + "loss": 4.4553, + "step": 300 + }, + { + "epoch": 12.01, + "learning_rate": 0.00047222222222222224, + "loss": 4.2711, + "step": 350 + }, + { + "epoch": 14.01, + "learning_rate": 0.00044444444444444436, + "loss": 4.0704, + "step": 400 + }, + { + "epoch": 16.0, + "learning_rate": 0.00041666666666666664, + "loss": 3.9149, + "step": 450 + }, + { + "epoch": 17.02, + "learning_rate": 0.00038888888888888887, + "loss": 3.7895, + "step": 500 + }, + { + "epoch": 19.02, + "learning_rate": 0.0003611111111111111, + "loss": 3.6784, + "step": 550 + }, + { + "epoch": 21.01, + "learning_rate": 0.0003333333333333333, + "loss": 3.5851, + "step": 600 + }, + { + "epoch": 23.0, + "learning_rate": 0.00030555555555555555, + "loss": 3.4989, + "step": 650 + }, + { + "epoch": 24.02, + "learning_rate": 0.0002777777777777778, + "loss": 3.4216, + "step": 700 + }, + { + "epoch": 26.02, + "learning_rate": 0.00025, + "loss": 3.3544, + "step": 750 + }, + { + "epoch": 28.01, + "learning_rate": 0.00022222222222222218, + "loss": 3.2914, + "step": 800 + }, + { + "epoch": 30.01, + "learning_rate": 0.00019444444444444443, + "loss": 3.2331, + "step": 850 + }, + { + "epoch": 32.0, + "learning_rate": 0.00016666666666666666, + "loss": 3.1797, + "step": 900 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.40823459381248e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-900/training_args.bin b/checkpoint-900/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..bca3042c603d146089deb3309dc6c6828df7acbf --- /dev/null +++ b/checkpoint-900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79b2014dac66e89fff48441e3acbe29cace7e30db0b4b50068d0d17e60e27a36 +size 3183 diff --git a/config.json b/config.json new file mode 100755 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/metrics.json b/metrics.json new file mode 100755 index 0000000000000000000000000000000000000000..2aa59f1846dfcc74d9d5918e4df7f762eeed55fe --- /dev/null +++ b/metrics.json @@ -0,0 +1,2503 @@ +{"num_parameters": 124439808, "trainable_parameters": 124439808, "step": 0} +{"train_info/time_between_train_steps": 3.6052422523498535, "step": 0} +{"info/global_step": 1, "train_info/time_within_train_step": 28.306174278259277, "step": 1} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 19761.71484375, "train_info/memory_reserved": 22624.0, "train_info/memory_max_reserved": 22624.0, "_timestamp": 1740823057, "_runtime": 54}, "step": 1} +{"logs": {"train/loss": 11.0201, "train/learning_rate": 4.9999999999999996e-06, "train/epoch": 0.0, "_timestamp": 1740823057, "_runtime": 54}, "step": 1} +{"train_info/time_between_train_steps": 0.026265859603881836, "step": 1} +{"info/global_step": 2, "train_info/time_within_train_step": 27.915833711624146, "step": 2} +{"train_info/time_between_train_steps": 0.005372762680053711, "step": 2} +{"info/global_step": 3, "train_info/time_within_train_step": 27.78085207939148, "step": 3} +{"train_info/time_between_train_steps": 0.005351543426513672, "step": 3} +{"info/global_step": 4, "train_info/time_within_train_step": 27.92499089241028, "step": 4} +{"train_info/time_between_train_steps": 0.005388975143432617, "step": 4} +{"info/global_step": 5, "train_info/time_within_train_step": 27.810957193374634, "step": 5} +{"train_info/time_between_train_steps": 0.005454540252685547, "step": 5} +{"info/global_step": 6, "train_info/time_within_train_step": 27.89962077140808, "step": 6} +{"train_info/time_between_train_steps": 0.0056951045989990234, "step": 6} +{"info/global_step": 7, "train_info/time_within_train_step": 27.821972131729126, "step": 7} +{"train_info/time_between_train_steps": 0.005143404006958008, "step": 7} +{"info/global_step": 8, "train_info/time_within_train_step": 27.822343349456787, "step": 8} +{"train_info/time_between_train_steps": 0.005264997482299805, "step": 8} +{"info/global_step": 9, "train_info/time_within_train_step": 27.793473482131958, "step": 9} +{"train_info/time_between_train_steps": 0.004975557327270508, "step": 9} +{"info/global_step": 10, "train_info/time_within_train_step": 27.72933840751648, "step": 10} +{"train_info/time_between_train_steps": 0.005141019821166992, "step": 10} +{"info/global_step": 11, "train_info/time_within_train_step": 27.71853280067444, "step": 11} +{"train_info/time_between_train_steps": 0.0051043033599853516, "step": 11} +{"info/global_step": 12, "train_info/time_within_train_step": 27.773264169692993, "step": 12} +{"train_info/time_between_train_steps": 0.005042314529418945, "step": 12} +{"info/global_step": 13, "train_info/time_within_train_step": 27.72661542892456, "step": 13} +{"train_info/time_between_train_steps": 0.005316972732543945, "step": 13} +{"info/global_step": 14, "train_info/time_within_train_step": 27.753652095794678, "step": 14} +{"train_info/time_between_train_steps": 0.005014896392822266, "step": 14} +{"info/global_step": 15, "train_info/time_within_train_step": 27.7544949054718, "step": 15} +{"train_info/time_between_train_steps": 0.0053632259368896484, "step": 15} +{"info/global_step": 16, "train_info/time_within_train_step": 27.815172910690308, "step": 16} +{"train_info/time_between_train_steps": 0.010040044784545898, "step": 16} +{"info/global_step": 17, "train_info/time_within_train_step": 27.78718066215515, "step": 17} +{"train_info/time_between_train_steps": 0.010289907455444336, "step": 17} +{"info/global_step": 18, "train_info/time_within_train_step": 27.785253047943115, "step": 18} +{"train_info/time_between_train_steps": 0.010444402694702148, "step": 18} +{"info/global_step": 19, "train_info/time_within_train_step": 27.760518789291382, "step": 19} +{"train_info/time_between_train_steps": 0.005131959915161133, "step": 19} +{"info/global_step": 20, "train_info/time_within_train_step": 27.78843903541565, "step": 20} +{"train_info/time_between_train_steps": 0.00519251823425293, "step": 20} +{"info/global_step": 21, "train_info/time_within_train_step": 27.719042539596558, "step": 21} +{"train_info/time_between_train_steps": 0.010291576385498047, "step": 21} +{"info/global_step": 22, "train_info/time_within_train_step": 27.756887197494507, "step": 22} +{"train_info/time_between_train_steps": 0.010204076766967773, "step": 22} +{"info/global_step": 23, "train_info/time_within_train_step": 27.71753764152527, "step": 23} +{"train_info/time_between_train_steps": 0.005140781402587891, "step": 23} +{"info/global_step": 24, "train_info/time_within_train_step": 27.735695123672485, "step": 24} +{"train_info/time_between_train_steps": 0.005156040191650391, "step": 24} +{"info/global_step": 25, "train_info/time_within_train_step": 27.76127028465271, "step": 25} +{"train_info/time_between_train_steps": 0.005408048629760742, "step": 25} +{"info/global_step": 26, "train_info/time_within_train_step": 27.740350008010864, "step": 26} +{"train_info/time_between_train_steps": 0.005242109298706055, "step": 26} +{"info/global_step": 27, "train_info/time_within_train_step": 27.7558012008667, "step": 27} +{"train_info/time_between_train_steps": 0.005690097808837891, "step": 27} +{"info/global_step": 28, "train_info/time_within_train_step": 27.789472341537476, "step": 28} +{"train_info/time_between_train_steps": 0.001615762710571289, "step": 28} +{"train_info/time_between_train_steps": 3.4457578659057617, "step": 28} +{"info/global_step": 29, "train_info/time_within_train_step": 27.713420152664185, "step": 29} +{"train_info/time_between_train_steps": 0.007556915283203125, "step": 29} +{"info/global_step": 30, "train_info/time_within_train_step": 27.908380270004272, "step": 30} +{"train_info/time_between_train_steps": 0.0057833194732666016, "step": 30} +{"info/global_step": 31, "train_info/time_within_train_step": 27.735467195510864, "step": 31} +{"train_info/time_between_train_steps": 0.005136013031005859, "step": 31} +{"info/global_step": 32, "train_info/time_within_train_step": 28.017489194869995, "step": 32} +{"train_info/time_between_train_steps": 0.005509376525878906, "step": 32} +{"info/global_step": 33, "train_info/time_within_train_step": 27.76132345199585, "step": 33} +{"train_info/time_between_train_steps": 0.0057773590087890625, "step": 33} +{"info/global_step": 34, "train_info/time_within_train_step": 27.90687084197998, "step": 34} +{"train_info/time_between_train_steps": 0.005280971527099609, "step": 34} +{"info/global_step": 35, "train_info/time_within_train_step": 27.772091388702393, "step": 35} +{"train_info/time_between_train_steps": 0.0057337284088134766, "step": 35} +{"info/global_step": 36, "train_info/time_within_train_step": 28.846611976623535, "step": 36} +{"train_info/time_between_train_steps": 0.0055141448974609375, "step": 36} +{"info/global_step": 37, "train_info/time_within_train_step": 27.728565216064453, "step": 37} +{"train_info/time_between_train_steps": 0.00514984130859375, "step": 37} +{"info/global_step": 38, "train_info/time_within_train_step": 27.711262464523315, "step": 38} +{"train_info/time_between_train_steps": 0.005135059356689453, "step": 38} +{"info/global_step": 39, "train_info/time_within_train_step": 27.71011972427368, "step": 39} +{"train_info/time_between_train_steps": 0.005075931549072266, "step": 39} +{"info/global_step": 40, "train_info/time_within_train_step": 27.74156641960144, "step": 40} +{"train_info/time_between_train_steps": 0.005389690399169922, "step": 40} +{"info/global_step": 41, "train_info/time_within_train_step": 27.698498249053955, "step": 41} +{"train_info/time_between_train_steps": 0.005129814147949219, "step": 41} +{"info/global_step": 42, "train_info/time_within_train_step": 27.699381589889526, "step": 42} +{"train_info/time_between_train_steps": 0.005113840103149414, "step": 42} +{"info/global_step": 43, "train_info/time_within_train_step": 27.71120262145996, "step": 43} +{"train_info/time_between_train_steps": 0.005162715911865234, "step": 43} +{"info/global_step": 44, "train_info/time_within_train_step": 27.691535711288452, "step": 44} +{"train_info/time_between_train_steps": 0.005071401596069336, "step": 44} +{"info/global_step": 45, "train_info/time_within_train_step": 27.700800895690918, "step": 45} +{"train_info/time_between_train_steps": 0.005290031433105469, "step": 45} +{"info/global_step": 46, "train_info/time_within_train_step": 27.697463274002075, "step": 46} +{"train_info/time_between_train_steps": 0.005062103271484375, "step": 46} +{"info/global_step": 47, "train_info/time_within_train_step": 27.78357458114624, "step": 47} +{"train_info/time_between_train_steps": 0.004996299743652344, "step": 47} +{"info/global_step": 48, "train_info/time_within_train_step": 27.747618436813354, "step": 48} +{"train_info/time_between_train_steps": 0.005247354507446289, "step": 48} +{"info/global_step": 49, "train_info/time_within_train_step": 27.717334747314453, "step": 49} +{"train_info/time_between_train_steps": 0.005135297775268555, "step": 49} +{"info/global_step": 50, "train_info/time_within_train_step": 27.74836015701294, "step": 50} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1740824425, "_runtime": 1422}, "step": 50} +{"logs": {"train/loss": 7.8456, "train/learning_rate": 0.00025, "train/epoch": 1.02, "_timestamp": 1740824425, "_runtime": 1422}, "step": 50} +{"train_info/time_between_train_steps": 0.027113676071166992, "step": 50} +{"info/global_step": 51, "train_info/time_within_train_step": 27.708496809005737, "step": 51} +{"train_info/time_between_train_steps": 0.005156993865966797, "step": 51} +{"info/global_step": 52, "train_info/time_within_train_step": 27.722633361816406, "step": 52} +{"train_info/time_between_train_steps": 0.005395650863647461, "step": 52} +{"info/global_step": 53, "train_info/time_within_train_step": 27.73313069343567, "step": 53} +{"train_info/time_between_train_steps": 0.005349397659301758, "step": 53} +{"info/global_step": 54, "train_info/time_within_train_step": 27.710838317871094, "step": 54} +{"train_info/time_between_train_steps": 0.0055239200592041016, "step": 54} +{"info/global_step": 55, "train_info/time_within_train_step": 27.778504133224487, "step": 55} +{"train_info/time_between_train_steps": 0.0059168338775634766, "step": 55} +{"info/global_step": 56, "train_info/time_within_train_step": 27.73719882965088, "step": 56} +{"train_info/time_between_train_steps": 0.0016849040985107422, "step": 56} +{"train_info/time_between_train_steps": 3.5913023948669434, "step": 56} +{"info/global_step": 57, "train_info/time_within_train_step": 27.739450931549072, "step": 57} +{"train_info/time_between_train_steps": 0.005236148834228516, "step": 57} +{"info/global_step": 58, "train_info/time_within_train_step": 27.878474712371826, "step": 58} +{"train_info/time_between_train_steps": 0.005525827407836914, "step": 58} +{"info/global_step": 59, "train_info/time_within_train_step": 27.718663930892944, "step": 59} +{"train_info/time_between_train_steps": 0.00513458251953125, "step": 59} +{"info/global_step": 60, "train_info/time_within_train_step": 27.830882787704468, "step": 60} +{"train_info/time_between_train_steps": 0.010393142700195312, "step": 60} +{"info/global_step": 61, "train_info/time_within_train_step": 27.73845863342285, "step": 61} +{"train_info/time_between_train_steps": 0.0055844783782958984, "step": 61} +{"info/global_step": 62, "train_info/time_within_train_step": 27.83816647529602, "step": 62} +{"train_info/time_between_train_steps": 0.005357027053833008, "step": 62} +{"info/global_step": 63, "train_info/time_within_train_step": 27.873485803604126, "step": 63} +{"train_info/time_between_train_steps": 0.005503177642822266, "step": 63} +{"info/global_step": 64, "train_info/time_within_train_step": 27.803720951080322, "step": 64} +{"train_info/time_between_train_steps": 0.010053157806396484, "step": 64} +{"info/global_step": 65, "train_info/time_within_train_step": 27.70879364013672, "step": 65} +{"train_info/time_between_train_steps": 0.010048151016235352, "step": 65} +{"info/global_step": 66, "train_info/time_within_train_step": 27.708519458770752, "step": 66} +{"train_info/time_between_train_steps": 0.005057334899902344, "step": 66} +{"info/global_step": 67, "train_info/time_within_train_step": 27.690303802490234, "step": 67} +{"train_info/time_between_train_steps": 0.0050356388092041016, "step": 67} +{"info/global_step": 68, "train_info/time_within_train_step": 27.719375371932983, "step": 68} +{"train_info/time_between_train_steps": 0.005095243453979492, "step": 68} +{"info/global_step": 69, "train_info/time_within_train_step": 27.690860271453857, "step": 69} +{"train_info/time_between_train_steps": 0.005024433135986328, "step": 69} +{"info/global_step": 70, "train_info/time_within_train_step": 27.699382305145264, "step": 70} +{"train_info/time_between_train_steps": 0.005037546157836914, "step": 70} +{"info/global_step": 71, "train_info/time_within_train_step": 27.697866916656494, "step": 71} +{"train_info/time_between_train_steps": 0.0050661563873291016, "step": 71} +{"info/global_step": 72, "train_info/time_within_train_step": 27.72448468208313, "step": 72} +{"train_info/time_between_train_steps": 0.004959821701049805, "step": 72} +{"info/global_step": 73, "train_info/time_within_train_step": 27.692836046218872, "step": 73} +{"train_info/time_between_train_steps": 0.005204677581787109, "step": 73} +{"info/global_step": 74, "train_info/time_within_train_step": 27.711772918701172, "step": 74} +{"train_info/time_between_train_steps": 0.004963397979736328, "step": 74} +{"info/global_step": 75, "train_info/time_within_train_step": 27.71631145477295, "step": 75} +{"train_info/time_between_train_steps": 0.005097389221191406, "step": 75} +{"info/global_step": 76, "train_info/time_within_train_step": 27.694385528564453, "step": 76} +{"train_info/time_between_train_steps": 0.005004167556762695, "step": 76} +{"info/global_step": 77, "train_info/time_within_train_step": 27.711626291275024, "step": 77} +{"train_info/time_between_train_steps": 0.0051975250244140625, "step": 77} +{"info/global_step": 78, "train_info/time_within_train_step": 27.708457469940186, "step": 78} +{"train_info/time_between_train_steps": 0.005167484283447266, "step": 78} +{"info/global_step": 79, "train_info/time_within_train_step": 27.79439353942871, "step": 79} +{"train_info/time_between_train_steps": 0.004893064498901367, "step": 79} +{"info/global_step": 80, "train_info/time_within_train_step": 27.70039200782776, "step": 80} +{"train_info/time_between_train_steps": 0.005194187164306641, "step": 80} +{"info/global_step": 81, "train_info/time_within_train_step": 27.74448800086975, "step": 81} +{"train_info/time_between_train_steps": 0.0050563812255859375, "step": 81} +{"info/global_step": 82, "train_info/time_within_train_step": 27.70373225212097, "step": 82} +{"train_info/time_between_train_steps": 0.0051724910736083984, "step": 82} +{"info/global_step": 83, "train_info/time_within_train_step": 27.731984615325928, "step": 83} +{"train_info/time_between_train_steps": 0.005581855773925781, "step": 83} +{"info/global_step": 84, "train_info/time_within_train_step": 27.735081911087036, "step": 84} +{"train_info/time_between_train_steps": 0.0013937950134277344, "step": 84} +{"train_info/time_between_train_steps": 3.905012607574463, "step": 84} +{"info/global_step": 85, "train_info/time_within_train_step": 27.743048429489136, "step": 85} +{"train_info/time_between_train_steps": 0.00503849983215332, "step": 85} +{"info/global_step": 86, "train_info/time_within_train_step": 27.814284563064575, "step": 86} +{"train_info/time_between_train_steps": 0.009536504745483398, "step": 86} +{"info/global_step": 87, "train_info/time_within_train_step": 27.787994861602783, "step": 87} +{"train_info/time_between_train_steps": 0.005285501480102539, "step": 87} +{"info/global_step": 88, "train_info/time_within_train_step": 27.844220876693726, "step": 88} +{"train_info/time_between_train_steps": 0.005364656448364258, "step": 88} +{"info/global_step": 89, "train_info/time_within_train_step": 27.70785665512085, "step": 89} +{"train_info/time_between_train_steps": 0.005304098129272461, "step": 89} +{"info/global_step": 90, "train_info/time_within_train_step": 27.85020661354065, "step": 90} +{"train_info/time_between_train_steps": 0.0052797794342041016, "step": 90} +{"info/global_step": 91, "train_info/time_within_train_step": 27.702523946762085, "step": 91} +{"train_info/time_between_train_steps": 0.0053751468658447266, "step": 91} +{"info/global_step": 92, "train_info/time_within_train_step": 27.793845176696777, "step": 92} +{"train_info/time_between_train_steps": 0.00528717041015625, "step": 92} +{"info/global_step": 93, "train_info/time_within_train_step": 27.755812644958496, "step": 93} +{"train_info/time_between_train_steps": 0.004891872406005859, "step": 93} +{"info/global_step": 94, "train_info/time_within_train_step": 27.804841995239258, "step": 94} +{"train_info/time_between_train_steps": 0.005080699920654297, "step": 94} +{"info/global_step": 95, "train_info/time_within_train_step": 27.688257217407227, "step": 95} +{"train_info/time_between_train_steps": 0.009843587875366211, "step": 95} +{"info/global_step": 96, "train_info/time_within_train_step": 27.68920874595642, "step": 96} +{"train_info/time_between_train_steps": 0.005047798156738281, "step": 96} +{"info/global_step": 97, "train_info/time_within_train_step": 27.692580699920654, "step": 97} +{"train_info/time_between_train_steps": 0.0050640106201171875, "step": 97} +{"info/global_step": 98, "train_info/time_within_train_step": 27.737404108047485, "step": 98} +{"train_info/time_between_train_steps": 0.005018472671508789, "step": 98} +{"info/global_step": 99, "train_info/time_within_train_step": 27.69377565383911, "step": 99} +{"train_info/time_between_train_steps": 0.005136728286743164, "step": 99} +{"info/global_step": 100, "train_info/time_within_train_step": 27.69162917137146, "step": 100} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1740825821, "_runtime": 2818}, "step": 100} +{"logs": {"train/loss": 5.4866, "train/learning_rate": 0.0005, "train/epoch": 3.01, "_timestamp": 1740825821, "_runtime": 2818}, "step": 100} +{"train_info/time_between_train_steps": 39.56464099884033, "step": 100} +{"info/global_step": 101, "train_info/time_within_train_step": 27.739362716674805, "step": 101} +{"train_info/time_between_train_steps": 0.0050618648529052734, "step": 101} +{"info/global_step": 102, "train_info/time_within_train_step": 27.685760021209717, "step": 102} +{"train_info/time_between_train_steps": 0.0051877498626708984, "step": 102} +{"info/global_step": 103, "train_info/time_within_train_step": 27.6888370513916, "step": 103} +{"train_info/time_between_train_steps": 0.004950046539306641, "step": 103} +{"info/global_step": 104, "train_info/time_within_train_step": 27.719547748565674, "step": 104} +{"train_info/time_between_train_steps": 0.005281686782836914, "step": 104} +{"info/global_step": 105, "train_info/time_within_train_step": 27.75596570968628, "step": 105} +{"train_info/time_between_train_steps": 0.005055904388427734, "step": 105} +{"info/global_step": 106, "train_info/time_within_train_step": 27.704564094543457, "step": 106} +{"train_info/time_between_train_steps": 0.005119800567626953, "step": 106} +{"info/global_step": 107, "train_info/time_within_train_step": 27.705071449279785, "step": 107} +{"train_info/time_between_train_steps": 0.009870767593383789, "step": 107} +{"info/global_step": 108, "train_info/time_within_train_step": 27.701624393463135, "step": 108} +{"train_info/time_between_train_steps": 0.010206460952758789, "step": 108} +{"info/global_step": 109, "train_info/time_within_train_step": 27.748356819152832, "step": 109} +{"train_info/time_between_train_steps": 0.010223388671875, "step": 109} +{"info/global_step": 110, "train_info/time_within_train_step": 27.822093725204468, "step": 110} +{"train_info/time_between_train_steps": 0.005299568176269531, "step": 110} +{"info/global_step": 111, "train_info/time_within_train_step": 27.725512981414795, "step": 111} +{"train_info/time_between_train_steps": 0.0062062740325927734, "step": 111} +{"info/global_step": 112, "train_info/time_within_train_step": 27.73716640472412, "step": 112} +{"train_info/time_between_train_steps": 0.0015895366668701172, "step": 112} +{"train_info/time_between_train_steps": 3.351865291595459, "step": 112} +{"info/global_step": 113, "train_info/time_within_train_step": 27.719728231430054, "step": 113} +{"train_info/time_between_train_steps": 0.005052089691162109, "step": 113} +{"info/global_step": 114, "train_info/time_within_train_step": 27.84002685546875, "step": 114} +{"train_info/time_between_train_steps": 0.005414724349975586, "step": 114} +{"info/global_step": 115, "train_info/time_within_train_step": 27.718138456344604, "step": 115} +{"train_info/time_between_train_steps": 0.0051021575927734375, "step": 115} +{"info/global_step": 116, "train_info/time_within_train_step": 27.881017923355103, "step": 116} +{"train_info/time_between_train_steps": 0.0052814483642578125, "step": 116} +{"info/global_step": 117, "train_info/time_within_train_step": 27.737507343292236, "step": 117} +{"train_info/time_between_train_steps": 0.005729198455810547, "step": 117} +{"info/global_step": 118, "train_info/time_within_train_step": 27.92425513267517, "step": 118} +{"train_info/time_between_train_steps": 0.0062408447265625, "step": 118} +{"info/global_step": 119, "train_info/time_within_train_step": 27.743521213531494, "step": 119} +{"train_info/time_between_train_steps": 0.010673999786376953, "step": 119} +{"info/global_step": 120, "train_info/time_within_train_step": 27.829177618026733, "step": 120} +{"train_info/time_between_train_steps": 0.0054552555084228516, "step": 120} +{"info/global_step": 121, "train_info/time_within_train_step": 27.744171619415283, "step": 121} +{"train_info/time_between_train_steps": 0.00502324104309082, "step": 121} +{"info/global_step": 122, "train_info/time_within_train_step": 27.69818663597107, "step": 122} +{"train_info/time_between_train_steps": 0.005121707916259766, "step": 122} +{"info/global_step": 123, "train_info/time_within_train_step": 27.726377248764038, "step": 123} +{"train_info/time_between_train_steps": 0.005019664764404297, "step": 123} +{"info/global_step": 124, "train_info/time_within_train_step": 27.716321229934692, "step": 124} +{"train_info/time_between_train_steps": 0.0054857730865478516, "step": 124} +{"info/global_step": 125, "train_info/time_within_train_step": 27.809871435165405, "step": 125} +{"train_info/time_between_train_steps": 0.004968404769897461, "step": 125} +{"info/global_step": 126, "train_info/time_within_train_step": 27.70608377456665, "step": 126} +{"train_info/time_between_train_steps": 0.00508880615234375, "step": 126} +{"info/global_step": 127, "train_info/time_within_train_step": 27.724413871765137, "step": 127} +{"train_info/time_between_train_steps": 0.005246400833129883, "step": 127} +{"info/global_step": 128, "train_info/time_within_train_step": 27.713305234909058, "step": 128} +{"train_info/time_between_train_steps": 0.004941463470458984, "step": 128} +{"info/global_step": 129, "train_info/time_within_train_step": 27.74606680870056, "step": 129} +{"train_info/time_between_train_steps": 0.005195140838623047, "step": 129} +{"info/global_step": 130, "train_info/time_within_train_step": 27.696228504180908, "step": 130} +{"train_info/time_between_train_steps": 0.004984617233276367, "step": 130} +{"info/global_step": 131, "train_info/time_within_train_step": 27.711559057235718, "step": 131} +{"train_info/time_between_train_steps": 0.005118131637573242, "step": 131} +{"info/global_step": 132, "train_info/time_within_train_step": 27.697635889053345, "step": 132} +{"train_info/time_between_train_steps": 0.005171775817871094, "step": 132} +{"info/global_step": 133, "train_info/time_within_train_step": 27.69790029525757, "step": 133} +{"train_info/time_between_train_steps": 0.005095720291137695, "step": 133} +{"info/global_step": 134, "train_info/time_within_train_step": 27.703453540802002, "step": 134} +{"train_info/time_between_train_steps": 0.005269050598144531, "step": 134} +{"info/global_step": 135, "train_info/time_within_train_step": 27.733638286590576, "step": 135} +{"train_info/time_between_train_steps": 0.013822078704833984, "step": 135} +{"info/global_step": 136, "train_info/time_within_train_step": 27.72551441192627, "step": 136} +{"train_info/time_between_train_steps": 0.005263328552246094, "step": 136} +{"info/global_step": 137, "train_info/time_within_train_step": 27.70936155319214, "step": 137} +{"train_info/time_between_train_steps": 0.005396366119384766, "step": 137} +{"info/global_step": 138, "train_info/time_within_train_step": 27.715844869613647, "step": 138} +{"train_info/time_between_train_steps": 0.00537109375, "step": 138} +{"info/global_step": 139, "train_info/time_within_train_step": 29.003594398498535, "step": 139} +{"train_info/time_between_train_steps": 0.006037473678588867, "step": 139} +{"info/global_step": 140, "train_info/time_within_train_step": 28.019713401794434, "step": 140} +{"train_info/time_between_train_steps": 0.0019190311431884766, "step": 140} +{"train_info/time_between_train_steps": 6.14596700668335, "step": 140} +{"info/global_step": 141, "train_info/time_within_train_step": 29.582706928253174, "step": 141} +{"train_info/time_between_train_steps": 0.005754232406616211, "step": 141} +{"info/global_step": 142, "train_info/time_within_train_step": 27.858425617218018, "step": 142} +{"train_info/time_between_train_steps": 0.005230903625488281, "step": 142} +{"info/global_step": 143, "train_info/time_within_train_step": 27.74598979949951, "step": 143} +{"train_info/time_between_train_steps": 0.005360603332519531, "step": 143} +{"info/global_step": 144, "train_info/time_within_train_step": 27.89507246017456, "step": 144} +{"train_info/time_between_train_steps": 0.005273103713989258, "step": 144} +{"info/global_step": 145, "train_info/time_within_train_step": 27.70950174331665, "step": 145} +{"train_info/time_between_train_steps": 0.005227327346801758, "step": 145} +{"info/global_step": 146, "train_info/time_within_train_step": 27.92802095413208, "step": 146} +{"train_info/time_between_train_steps": 0.005249977111816406, "step": 146} +{"info/global_step": 147, "train_info/time_within_train_step": 27.71375823020935, "step": 147} +{"train_info/time_between_train_steps": 0.01006317138671875, "step": 147} +{"info/global_step": 148, "train_info/time_within_train_step": 27.833178758621216, "step": 148} +{"train_info/time_between_train_steps": 0.005327701568603516, "step": 148} +{"info/global_step": 149, "train_info/time_within_train_step": 27.737983465194702, "step": 149} +{"train_info/time_between_train_steps": 0.010071754455566406, "step": 149} +{"info/global_step": 150, "train_info/time_within_train_step": 27.772772073745728, "step": 150} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1740827264, "_runtime": 4261}, "step": 150} +{"logs": {"train/loss": 4.9816, "train/learning_rate": 0.0005833333333333333, "train/epoch": 5.01, "_timestamp": 1740827264, "_runtime": 4261}, "step": 150} +{"train_info/time_between_train_steps": 0.025712251663208008, "step": 150} +{"info/global_step": 151, "train_info/time_within_train_step": 27.697516679763794, "step": 151} +{"train_info/time_between_train_steps": 0.0059549808502197266, "step": 151} +{"info/global_step": 152, "train_info/time_within_train_step": 27.70101237297058, "step": 152} +{"train_info/time_between_train_steps": 0.005465984344482422, "step": 152} +{"info/global_step": 153, "train_info/time_within_train_step": 27.790162801742554, "step": 153} +{"train_info/time_between_train_steps": 0.004935026168823242, "step": 153} +{"info/global_step": 154, "train_info/time_within_train_step": 27.70830488204956, "step": 154} +{"train_info/time_between_train_steps": 0.00510406494140625, "step": 154} +{"info/global_step": 155, "train_info/time_within_train_step": 27.708121061325073, "step": 155} +{"train_info/time_between_train_steps": 0.005185365676879883, "step": 155} +{"info/global_step": 156, "train_info/time_within_train_step": 27.75645160675049, "step": 156} +{"train_info/time_between_train_steps": 0.005307674407958984, "step": 156} +{"info/global_step": 157, "train_info/time_within_train_step": 27.828431129455566, "step": 157} +{"train_info/time_between_train_steps": 0.005241870880126953, "step": 157} +{"info/global_step": 158, "train_info/time_within_train_step": 27.71593689918518, "step": 158} +{"train_info/time_between_train_steps": 0.0049974918365478516, "step": 158} +{"info/global_step": 159, "train_info/time_within_train_step": 27.75526213645935, "step": 159} +{"train_info/time_between_train_steps": 0.0053441524505615234, "step": 159} +{"info/global_step": 160, "train_info/time_within_train_step": 27.69250798225403, "step": 160} +{"train_info/time_between_train_steps": 0.005082130432128906, "step": 160} +{"info/global_step": 161, "train_info/time_within_train_step": 27.699613571166992, "step": 161} +{"train_info/time_between_train_steps": 0.005092620849609375, "step": 161} +{"info/global_step": 162, "train_info/time_within_train_step": 27.700764179229736, "step": 162} +{"train_info/time_between_train_steps": 0.0051593780517578125, "step": 162} +{"info/global_step": 163, "train_info/time_within_train_step": 27.71697211265564, "step": 163} +{"train_info/time_between_train_steps": 0.005063295364379883, "step": 163} +{"info/global_step": 164, "train_info/time_within_train_step": 27.70611310005188, "step": 164} +{"train_info/time_between_train_steps": 0.005463600158691406, "step": 164} +{"info/global_step": 165, "train_info/time_within_train_step": 27.70601511001587, "step": 165} +{"train_info/time_between_train_steps": 0.005258798599243164, "step": 165} +{"info/global_step": 166, "train_info/time_within_train_step": 27.857712745666504, "step": 166} +{"train_info/time_between_train_steps": 0.005395174026489258, "step": 166} +{"info/global_step": 167, "train_info/time_within_train_step": 27.75184154510498, "step": 167} +{"train_info/time_between_train_steps": 0.0062105655670166016, "step": 167} +{"info/global_step": 168, "train_info/time_within_train_step": 27.746352434158325, "step": 168} +{"train_info/time_between_train_steps": 0.0017669200897216797, "step": 168} +{"train_info/time_between_train_steps": 3.5875701904296875, "step": 168} +{"info/global_step": 169, "train_info/time_within_train_step": 27.764508485794067, "step": 169} +{"train_info/time_between_train_steps": 0.005332469940185547, "step": 169} +{"info/global_step": 170, "train_info/time_within_train_step": 27.857368230819702, "step": 170} +{"train_info/time_between_train_steps": 0.005234479904174805, "step": 170} +{"info/global_step": 171, "train_info/time_within_train_step": 27.70790410041809, "step": 171} +{"train_info/time_between_train_steps": 0.005182981491088867, "step": 171} +{"info/global_step": 172, "train_info/time_within_train_step": 27.964198350906372, "step": 172} +{"train_info/time_between_train_steps": 0.0052967071533203125, "step": 172} +{"info/global_step": 173, "train_info/time_within_train_step": 27.700034618377686, "step": 173} +{"train_info/time_between_train_steps": 0.0053255558013916016, "step": 173} +{"info/global_step": 174, "train_info/time_within_train_step": 27.89145064353943, "step": 174} +{"train_info/time_between_train_steps": 0.005527496337890625, "step": 174} +{"info/global_step": 175, "train_info/time_within_train_step": 27.800476789474487, "step": 175} +{"train_info/time_between_train_steps": 0.005186319351196289, "step": 175} +{"info/global_step": 176, "train_info/time_within_train_step": 27.820345878601074, "step": 176} +{"train_info/time_between_train_steps": 0.0053577423095703125, "step": 176} +{"info/global_step": 177, "train_info/time_within_train_step": 27.73871612548828, "step": 177} +{"train_info/time_between_train_steps": 0.0050525665283203125, "step": 177} +{"info/global_step": 178, "train_info/time_within_train_step": 27.694300651550293, "step": 178} +{"train_info/time_between_train_steps": 0.005120515823364258, "step": 178} +{"info/global_step": 179, "train_info/time_within_train_step": 27.697869539260864, "step": 179} +{"train_info/time_between_train_steps": 0.0049741268157958984, "step": 179} +{"info/global_step": 180, "train_info/time_within_train_step": 27.727925062179565, "step": 180} +{"train_info/time_between_train_steps": 0.005095481872558594, "step": 180} +{"info/global_step": 181, "train_info/time_within_train_step": 27.686463832855225, "step": 181} +{"train_info/time_between_train_steps": 0.009554862976074219, "step": 181} +{"info/global_step": 182, "train_info/time_within_train_step": 27.690966844558716, "step": 182} +{"train_info/time_between_train_steps": 0.009850502014160156, "step": 182} +{"info/global_step": 183, "train_info/time_within_train_step": 27.705030918121338, "step": 183} +{"train_info/time_between_train_steps": 0.004876136779785156, "step": 183} +{"info/global_step": 184, "train_info/time_within_train_step": 27.686469078063965, "step": 184} +{"train_info/time_between_train_steps": 0.0049970149993896484, "step": 184} +{"info/global_step": 185, "train_info/time_within_train_step": 27.68359875679016, "step": 185} +{"train_info/time_between_train_steps": 0.005041360855102539, "step": 185} +{"info/global_step": 186, "train_info/time_within_train_step": 27.697149515151978, "step": 186} +{"train_info/time_between_train_steps": 0.004897356033325195, "step": 186} +{"info/global_step": 187, "train_info/time_within_train_step": 27.725845336914062, "step": 187} +{"train_info/time_between_train_steps": 0.0050122737884521484, "step": 187} +{"info/global_step": 188, "train_info/time_within_train_step": 27.77428388595581, "step": 188} +{"train_info/time_between_train_steps": 0.004947185516357422, "step": 188} +{"info/global_step": 189, "train_info/time_within_train_step": 27.71755814552307, "step": 189} +{"train_info/time_between_train_steps": 0.005152702331542969, "step": 189} +{"info/global_step": 190, "train_info/time_within_train_step": 27.687400817871094, "step": 190} +{"train_info/time_between_train_steps": 0.005053281784057617, "step": 190} +{"info/global_step": 191, "train_info/time_within_train_step": 27.72167706489563, "step": 191} +{"train_info/time_between_train_steps": 0.0050122737884521484, "step": 191} +{"info/global_step": 192, "train_info/time_within_train_step": 27.84461283683777, "step": 192} +{"train_info/time_between_train_steps": 0.0052525997161865234, "step": 192} +{"info/global_step": 193, "train_info/time_within_train_step": 27.716827630996704, "step": 193} +{"train_info/time_between_train_steps": 0.0051233768463134766, "step": 193} +{"info/global_step": 194, "train_info/time_within_train_step": 27.722903966903687, "step": 194} +{"train_info/time_between_train_steps": 0.005633831024169922, "step": 194} +{"info/global_step": 195, "train_info/time_within_train_step": 27.789586544036865, "step": 195} +{"train_info/time_between_train_steps": 0.010677099227905273, "step": 195} +{"info/global_step": 196, "train_info/time_within_train_step": 27.761637926101685, "step": 196} +{"train_info/time_between_train_steps": 0.0015959739685058594, "step": 196} +{"train_info/time_between_train_steps": 3.2239444255828857, "step": 196} +{"info/global_step": 197, "train_info/time_within_train_step": 27.748108863830566, "step": 197} +{"train_info/time_between_train_steps": 0.005095243453979492, "step": 197} +{"info/global_step": 198, "train_info/time_within_train_step": 27.81316900253296, "step": 198} +{"train_info/time_between_train_steps": 0.004945278167724609, "step": 198} +{"info/global_step": 199, "train_info/time_within_train_step": 27.70173740386963, "step": 199} +{"train_info/time_between_train_steps": 0.0054607391357421875, "step": 199} +{"info/global_step": 200, "train_info/time_within_train_step": 27.841782331466675, "step": 200} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1740828659, "_runtime": 5656}, "step": 200} +{"logs": {"train/loss": 4.7462, "train/learning_rate": 0.0005555555555555556, "train/epoch": 7.0, "_timestamp": 1740828659, "_runtime": 5656}, "step": 200} +{"train_info/time_between_train_steps": 40.940735816955566, "step": 200} +{"info/global_step": 201, "train_info/time_within_train_step": 27.71708393096924, "step": 201} +{"train_info/time_between_train_steps": 0.0051424503326416016, "step": 201} +{"info/global_step": 202, "train_info/time_within_train_step": 27.882899045944214, "step": 202} +{"train_info/time_between_train_steps": 0.0052754878997802734, "step": 202} +{"info/global_step": 203, "train_info/time_within_train_step": 27.712076663970947, "step": 203} +{"train_info/time_between_train_steps": 0.00529170036315918, "step": 203} +{"info/global_step": 204, "train_info/time_within_train_step": 27.89966893196106, "step": 204} +{"train_info/time_between_train_steps": 0.005300998687744141, "step": 204} +{"info/global_step": 205, "train_info/time_within_train_step": 27.808109045028687, "step": 205} +{"train_info/time_between_train_steps": 0.009169816970825195, "step": 205} +{"info/global_step": 206, "train_info/time_within_train_step": 27.69108271598816, "step": 206} +{"train_info/time_between_train_steps": 0.00972127914428711, "step": 206} +{"info/global_step": 207, "train_info/time_within_train_step": 27.704325437545776, "step": 207} +{"train_info/time_between_train_steps": 0.005242347717285156, "step": 207} +{"info/global_step": 208, "train_info/time_within_train_step": 27.692498445510864, "step": 208} +{"train_info/time_between_train_steps": 0.009759664535522461, "step": 208} +{"info/global_step": 209, "train_info/time_within_train_step": 27.705087900161743, "step": 209} +{"train_info/time_between_train_steps": 0.00518035888671875, "step": 209} +{"info/global_step": 210, "train_info/time_within_train_step": 27.710387468338013, "step": 210} +{"train_info/time_between_train_steps": 0.005004405975341797, "step": 210} +{"info/global_step": 211, "train_info/time_within_train_step": 27.739455699920654, "step": 211} +{"train_info/time_between_train_steps": 0.005051851272583008, "step": 211} +{"info/global_step": 212, "train_info/time_within_train_step": 27.69718313217163, "step": 212} +{"train_info/time_between_train_steps": 0.005100250244140625, "step": 212} +{"info/global_step": 213, "train_info/time_within_train_step": 27.699547290802002, "step": 213} +{"train_info/time_between_train_steps": 0.005053281784057617, "step": 213} +{"info/global_step": 214, "train_info/time_within_train_step": 27.711347818374634, "step": 214} +{"train_info/time_between_train_steps": 0.005253791809082031, "step": 214} +{"info/global_step": 215, "train_info/time_within_train_step": 27.686366319656372, "step": 215} +{"train_info/time_between_train_steps": 0.004966020584106445, "step": 215} +{"info/global_step": 216, "train_info/time_within_train_step": 27.684874057769775, "step": 216} +{"train_info/time_between_train_steps": 0.005079030990600586, "step": 216} +{"info/global_step": 217, "train_info/time_within_train_step": 27.701635122299194, "step": 217} +{"train_info/time_between_train_steps": 0.005189657211303711, "step": 217} +{"info/global_step": 218, "train_info/time_within_train_step": 27.724916219711304, "step": 218} +{"train_info/time_between_train_steps": 0.00496363639831543, "step": 218} +{"info/global_step": 219, "train_info/time_within_train_step": 27.78380012512207, "step": 219} +{"train_info/time_between_train_steps": 0.005084514617919922, "step": 219} +{"info/global_step": 220, "train_info/time_within_train_step": 27.71561884880066, "step": 220} +{"train_info/time_between_train_steps": 0.005199432373046875, "step": 220} +{"info/global_step": 221, "train_info/time_within_train_step": 27.699029445648193, "step": 221} +{"train_info/time_between_train_steps": 0.005192279815673828, "step": 221} +{"info/global_step": 222, "train_info/time_within_train_step": 27.76396107673645, "step": 222} +{"train_info/time_between_train_steps": 0.005364179611206055, "step": 222} +{"info/global_step": 223, "train_info/time_within_train_step": 27.743305921554565, "step": 223} +{"train_info/time_between_train_steps": 0.005518198013305664, "step": 223} +{"info/global_step": 224, "train_info/time_within_train_step": 27.780107736587524, "step": 224} +{"train_info/time_between_train_steps": 0.0017504692077636719, "step": 224} +{"train_info/time_between_train_steps": 3.3490002155303955, "step": 224} +{"info/global_step": 225, "train_info/time_within_train_step": 27.714378356933594, "step": 225} +{"train_info/time_between_train_steps": 0.004884958267211914, "step": 225} +{"info/global_step": 226, "train_info/time_within_train_step": 27.890945434570312, "step": 226} +{"train_info/time_between_train_steps": 0.00515294075012207, "step": 226} +{"info/global_step": 227, "train_info/time_within_train_step": 27.723843812942505, "step": 227} +{"train_info/time_between_train_steps": 0.009637594223022461, "step": 227} +{"info/global_step": 228, "train_info/time_within_train_step": 27.818846464157104, "step": 228} +{"train_info/time_between_train_steps": 0.005101919174194336, "step": 228} +{"info/global_step": 229, "train_info/time_within_train_step": 27.6976056098938, "step": 229} +{"train_info/time_between_train_steps": 0.005288124084472656, "step": 229} +{"info/global_step": 230, "train_info/time_within_train_step": 27.8833167552948, "step": 230} +{"train_info/time_between_train_steps": 0.0051403045654296875, "step": 230} +{"info/global_step": 231, "train_info/time_within_train_step": 27.71744966506958, "step": 231} +{"train_info/time_between_train_steps": 0.0050733089447021484, "step": 231} +{"info/global_step": 232, "train_info/time_within_train_step": 27.793073892593384, "step": 232} +{"train_info/time_between_train_steps": 0.005218029022216797, "step": 232} +{"info/global_step": 233, "train_info/time_within_train_step": 27.722352266311646, "step": 233} +{"train_info/time_between_train_steps": 0.004911184310913086, "step": 233} +{"info/global_step": 234, "train_info/time_within_train_step": 27.68429136276245, "step": 234} +{"train_info/time_between_train_steps": 0.005029916763305664, "step": 234} +{"info/global_step": 235, "train_info/time_within_train_step": 27.791194200515747, "step": 235} +{"train_info/time_between_train_steps": 0.005021333694458008, "step": 235} +{"info/global_step": 236, "train_info/time_within_train_step": 27.685101747512817, "step": 236} +{"train_info/time_between_train_steps": 0.004984617233276367, "step": 236} +{"info/global_step": 237, "train_info/time_within_train_step": 27.68641185760498, "step": 237} +{"train_info/time_between_train_steps": 0.005111217498779297, "step": 237} +{"info/global_step": 238, "train_info/time_within_train_step": 27.69443988800049, "step": 238} +{"train_info/time_between_train_steps": 0.004939079284667969, "step": 238} +{"info/global_step": 239, "train_info/time_within_train_step": 27.684537887573242, "step": 239} +{"train_info/time_between_train_steps": 0.004969596862792969, "step": 239} +{"info/global_step": 240, "train_info/time_within_train_step": 27.691861867904663, "step": 240} +{"train_info/time_between_train_steps": 0.005124807357788086, "step": 240} +{"info/global_step": 241, "train_info/time_within_train_step": 27.741676807403564, "step": 241} +{"train_info/time_between_train_steps": 0.004942655563354492, "step": 241} +{"info/global_step": 242, "train_info/time_within_train_step": 27.68910026550293, "step": 242} +{"train_info/time_between_train_steps": 0.0050313472747802734, "step": 242} +{"info/global_step": 243, "train_info/time_within_train_step": 27.74131488800049, "step": 243} +{"train_info/time_between_train_steps": 0.0049283504486083984, "step": 243} +{"info/global_step": 244, "train_info/time_within_train_step": 27.87916374206543, "step": 244} +{"train_info/time_between_train_steps": 0.005243778228759766, "step": 244} +{"info/global_step": 245, "train_info/time_within_train_step": 29.432770252227783, "step": 245} +{"train_info/time_between_train_steps": 0.005312204360961914, "step": 245} +{"info/global_step": 246, "train_info/time_within_train_step": 28.10598134994507, "step": 246} +{"train_info/time_between_train_steps": 0.005156040191650391, "step": 246} +{"info/global_step": 247, "train_info/time_within_train_step": 27.724334001541138, "step": 247} +{"train_info/time_between_train_steps": 0.0052928924560546875, "step": 247} +{"info/global_step": 248, "train_info/time_within_train_step": 27.7454936504364, "step": 248} +{"train_info/time_between_train_steps": 0.0050585269927978516, "step": 248} +{"info/global_step": 249, "train_info/time_within_train_step": 27.694144248962402, "step": 249} +{"train_info/time_between_train_steps": 0.005307674407958984, "step": 249} +{"info/global_step": 250, "train_info/time_within_train_step": 27.85146427154541, "step": 250} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1740830096, "_runtime": 7093}, "step": 250} +{"logs": {"train/loss": 4.6079, "train/learning_rate": 0.0005277777777777777, "train/epoch": 8.02, "_timestamp": 1740830096, "_runtime": 7093}, "step": 250} +{"train_info/time_between_train_steps": 0.02572011947631836, "step": 250} +{"info/global_step": 251, "train_info/time_within_train_step": 27.712528467178345, "step": 251} +{"train_info/time_between_train_steps": 0.0055768489837646484, "step": 251} +{"info/global_step": 252, "train_info/time_within_train_step": 27.730660915374756, "step": 252} +{"train_info/time_between_train_steps": 0.0016086101531982422, "step": 252} +{"train_info/time_between_train_steps": 3.501786231994629, "step": 252} +{"info/global_step": 253, "train_info/time_within_train_step": 27.690714120864868, "step": 253} +{"train_info/time_between_train_steps": 0.004881858825683594, "step": 253} +{"info/global_step": 254, "train_info/time_within_train_step": 27.845524787902832, "step": 254} +{"train_info/time_between_train_steps": 0.00501561164855957, "step": 254} +{"info/global_step": 255, "train_info/time_within_train_step": 27.73601770401001, "step": 255} +{"train_info/time_between_train_steps": 0.005357503890991211, "step": 255} +{"info/global_step": 256, "train_info/time_within_train_step": 27.86463952064514, "step": 256} +{"train_info/time_between_train_steps": 0.00513768196105957, "step": 256} +{"info/global_step": 257, "train_info/time_within_train_step": 27.696871519088745, "step": 257} +{"train_info/time_between_train_steps": 0.005297422409057617, "step": 257} +{"info/global_step": 258, "train_info/time_within_train_step": 27.86182427406311, "step": 258} +{"train_info/time_between_train_steps": 0.005403280258178711, "step": 258} +{"info/global_step": 259, "train_info/time_within_train_step": 27.700433492660522, "step": 259} +{"train_info/time_between_train_steps": 0.00514674186706543, "step": 259} +{"info/global_step": 260, "train_info/time_within_train_step": 27.790852546691895, "step": 260} +{"train_info/time_between_train_steps": 0.005325794219970703, "step": 260} +{"info/global_step": 261, "train_info/time_within_train_step": 27.71757674217224, "step": 261} +{"train_info/time_between_train_steps": 0.004836559295654297, "step": 261} +{"info/global_step": 262, "train_info/time_within_train_step": 27.693193435668945, "step": 262} +{"train_info/time_between_train_steps": 0.004954338073730469, "step": 262} +{"info/global_step": 263, "train_info/time_within_train_step": 27.740575075149536, "step": 263} +{"train_info/time_between_train_steps": 0.005039215087890625, "step": 263} +{"info/global_step": 264, "train_info/time_within_train_step": 27.690110445022583, "step": 264} +{"train_info/time_between_train_steps": 0.0049512386322021484, "step": 264} +{"info/global_step": 265, "train_info/time_within_train_step": 27.689056873321533, "step": 265} +{"train_info/time_between_train_steps": 0.005091667175292969, "step": 265} +{"info/global_step": 266, "train_info/time_within_train_step": 27.78900384902954, "step": 266} +{"train_info/time_between_train_steps": 0.005105018615722656, "step": 266} +{"info/global_step": 267, "train_info/time_within_train_step": 27.712255716323853, "step": 267} +{"train_info/time_between_train_steps": 0.00504755973815918, "step": 267} +{"info/global_step": 268, "train_info/time_within_train_step": 27.700186729431152, "step": 268} +{"train_info/time_between_train_steps": 0.004971742630004883, "step": 268} +{"info/global_step": 269, "train_info/time_within_train_step": 27.694979906082153, "step": 269} +{"train_info/time_between_train_steps": 0.004935741424560547, "step": 269} +{"info/global_step": 270, "train_info/time_within_train_step": 27.69291043281555, "step": 270} +{"train_info/time_between_train_steps": 0.005109548568725586, "step": 270} +{"info/global_step": 271, "train_info/time_within_train_step": 27.6983425617218, "step": 271} +{"train_info/time_between_train_steps": 0.0048961639404296875, "step": 271} +{"info/global_step": 272, "train_info/time_within_train_step": 27.688483476638794, "step": 272} +{"train_info/time_between_train_steps": 0.00505375862121582, "step": 272} +{"info/global_step": 273, "train_info/time_within_train_step": 27.745879888534546, "step": 273} +{"train_info/time_between_train_steps": 0.005034208297729492, "step": 273} +{"info/global_step": 274, "train_info/time_within_train_step": 27.700404167175293, "step": 274} +{"train_info/time_between_train_steps": 0.0049784183502197266, "step": 274} +{"info/global_step": 275, "train_info/time_within_train_step": 27.69112777709961, "step": 275} +{"train_info/time_between_train_steps": 0.0050547122955322266, "step": 275} +{"info/global_step": 276, "train_info/time_within_train_step": 27.69839882850647, "step": 276} +{"train_info/time_between_train_steps": 0.005109310150146484, "step": 276} +{"info/global_step": 277, "train_info/time_within_train_step": 27.694045066833496, "step": 277} +{"train_info/time_between_train_steps": 0.00542902946472168, "step": 277} +{"info/global_step": 278, "train_info/time_within_train_step": 27.762952089309692, "step": 278} +{"train_info/time_between_train_steps": 0.005435943603515625, "step": 278} +{"info/global_step": 279, "train_info/time_within_train_step": 27.718945741653442, "step": 279} +{"train_info/time_between_train_steps": 0.0056111812591552734, "step": 279} +{"info/global_step": 280, "train_info/time_within_train_step": 27.747326374053955, "step": 280} +{"train_info/time_between_train_steps": 0.0015499591827392578, "step": 280} +{"train_info/time_between_train_steps": 3.1902458667755127, "step": 280} +{"info/global_step": 281, "train_info/time_within_train_step": 27.705312728881836, "step": 281} +{"train_info/time_between_train_steps": 0.009455680847167969, "step": 281} +{"info/global_step": 282, "train_info/time_within_train_step": 27.92928194999695, "step": 282} +{"train_info/time_between_train_steps": 0.005140781402587891, "step": 282} +{"info/global_step": 283, "train_info/time_within_train_step": 27.72032904624939, "step": 283} +{"train_info/time_between_train_steps": 0.010077238082885742, "step": 283} +{"info/global_step": 284, "train_info/time_within_train_step": 27.867970943450928, "step": 284} +{"train_info/time_between_train_steps": 0.009922266006469727, "step": 284} +{"info/global_step": 285, "train_info/time_within_train_step": 27.708299160003662, "step": 285} +{"train_info/time_between_train_steps": 0.01024770736694336, "step": 285} +{"info/global_step": 286, "train_info/time_within_train_step": 27.845473051071167, "step": 286} +{"train_info/time_between_train_steps": 0.010071992874145508, "step": 286} +{"info/global_step": 287, "train_info/time_within_train_step": 27.701050281524658, "step": 287} +{"train_info/time_between_train_steps": 0.00995016098022461, "step": 287} +{"info/global_step": 288, "train_info/time_within_train_step": 27.795931577682495, "step": 288} +{"train_info/time_between_train_steps": 0.009820222854614258, "step": 288} +{"info/global_step": 289, "train_info/time_within_train_step": 27.72023367881775, "step": 289} +{"train_info/time_between_train_steps": 0.004887104034423828, "step": 289} +{"info/global_step": 290, "train_info/time_within_train_step": 27.690797328948975, "step": 290} +{"train_info/time_between_train_steps": 0.0050432682037353516, "step": 290} +{"info/global_step": 291, "train_info/time_within_train_step": 27.754319429397583, "step": 291} +{"train_info/time_between_train_steps": 0.005100727081298828, "step": 291} +{"info/global_step": 292, "train_info/time_within_train_step": 27.69392681121826, "step": 292} +{"train_info/time_between_train_steps": 0.0050280094146728516, "step": 292} +{"info/global_step": 293, "train_info/time_within_train_step": 27.68929147720337, "step": 293} +{"train_info/time_between_train_steps": 0.00507664680480957, "step": 293} +{"info/global_step": 294, "train_info/time_within_train_step": 27.687732934951782, "step": 294} +{"train_info/time_between_train_steps": 0.004940509796142578, "step": 294} +{"info/global_step": 295, "train_info/time_within_train_step": 27.7191424369812, "step": 295} +{"train_info/time_between_train_steps": 0.005344390869140625, "step": 295} +{"info/global_step": 296, "train_info/time_within_train_step": 27.69671082496643, "step": 296} +{"train_info/time_between_train_steps": 0.005946159362792969, "step": 296} +{"info/global_step": 297, "train_info/time_within_train_step": 27.820070505142212, "step": 297} +{"train_info/time_between_train_steps": 0.005114078521728516, "step": 297} +{"info/global_step": 298, "train_info/time_within_train_step": 27.704684019088745, "step": 298} +{"train_info/time_between_train_steps": 0.005087137222290039, "step": 298} +{"info/global_step": 299, "train_info/time_within_train_step": 27.69574213027954, "step": 299} +{"train_info/time_between_train_steps": 0.005105018615722656, "step": 299} +{"info/global_step": 300, "train_info/time_within_train_step": 27.712485551834106, "step": 300} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1740831491, "_runtime": 8488}, "step": 300} +{"logs": {"train/loss": 4.4553, "train/learning_rate": 0.0005, "train/epoch": 10.02, "_timestamp": 1740831491, "_runtime": 8488}, "step": 300} +{"train_info/time_between_train_steps": 42.45728898048401, "step": 300} +{"info/global_step": 301, "train_info/time_within_train_step": 27.67371439933777, "step": 301} +{"train_info/time_between_train_steps": 0.0046541690826416016, "step": 301} +{"info/global_step": 302, "train_info/time_within_train_step": 27.692644834518433, "step": 302} +{"train_info/time_between_train_steps": 0.009370565414428711, "step": 302} +{"info/global_step": 303, "train_info/time_within_train_step": 27.701410055160522, "step": 303} +{"train_info/time_between_train_steps": 0.009828329086303711, "step": 303} +{"info/global_step": 304, "train_info/time_within_train_step": 27.698275089263916, "step": 304} +{"train_info/time_between_train_steps": 0.005104780197143555, "step": 304} +{"info/global_step": 305, "train_info/time_within_train_step": 27.701502799987793, "step": 305} +{"train_info/time_between_train_steps": 0.0055997371673583984, "step": 305} +{"info/global_step": 306, "train_info/time_within_train_step": 27.729403495788574, "step": 306} +{"train_info/time_between_train_steps": 0.005167961120605469, "step": 306} +{"info/global_step": 307, "train_info/time_within_train_step": 27.716084718704224, "step": 307} +{"train_info/time_between_train_steps": 0.005940914154052734, "step": 307} +{"info/global_step": 308, "train_info/time_within_train_step": 27.738102912902832, "step": 308} +{"train_info/time_between_train_steps": 0.002666473388671875, "step": 308} +{"train_info/time_between_train_steps": 3.181494951248169, "step": 308} +{"info/global_step": 309, "train_info/time_within_train_step": 27.737369298934937, "step": 309} +{"train_info/time_between_train_steps": 0.006779193878173828, "step": 309} +{"info/global_step": 310, "train_info/time_within_train_step": 27.837159872055054, "step": 310} +{"train_info/time_between_train_steps": 0.005101203918457031, "step": 310} +{"info/global_step": 311, "train_info/time_within_train_step": 27.741724491119385, "step": 311} +{"train_info/time_between_train_steps": 0.005343198776245117, "step": 311} +{"info/global_step": 312, "train_info/time_within_train_step": 27.814943313598633, "step": 312} +{"train_info/time_between_train_steps": 0.005086660385131836, "step": 312} +{"info/global_step": 313, "train_info/time_within_train_step": 27.856441020965576, "step": 313} +{"train_info/time_between_train_steps": 0.0052149295806884766, "step": 313} +{"info/global_step": 314, "train_info/time_within_train_step": 27.827441215515137, "step": 314} +{"train_info/time_between_train_steps": 0.00519561767578125, "step": 314} +{"info/global_step": 315, "train_info/time_within_train_step": 27.754327535629272, "step": 315} +{"train_info/time_between_train_steps": 0.005276918411254883, "step": 315} +{"info/global_step": 316, "train_info/time_within_train_step": 27.793094158172607, "step": 316} +{"train_info/time_between_train_steps": 0.00510716438293457, "step": 316} +{"info/global_step": 317, "train_info/time_within_train_step": 27.70595645904541, "step": 317} +{"train_info/time_between_train_steps": 0.004888296127319336, "step": 317} +{"info/global_step": 318, "train_info/time_within_train_step": 27.692731142044067, "step": 318} +{"train_info/time_between_train_steps": 0.004965066909790039, "step": 318} +{"info/global_step": 319, "train_info/time_within_train_step": 27.682445764541626, "step": 319} +{"train_info/time_between_train_steps": 0.0049436092376708984, "step": 319} +{"info/global_step": 320, "train_info/time_within_train_step": 27.68649649620056, "step": 320} +{"train_info/time_between_train_steps": 0.004987478256225586, "step": 320} +{"info/global_step": 321, "train_info/time_within_train_step": 27.688776969909668, "step": 321} +{"train_info/time_between_train_steps": 0.004988431930541992, "step": 321} +{"info/global_step": 322, "train_info/time_within_train_step": 27.70352578163147, "step": 322} +{"train_info/time_between_train_steps": 0.004880666732788086, "step": 322} +{"info/global_step": 323, "train_info/time_within_train_step": 27.689570903778076, "step": 323} +{"train_info/time_between_train_steps": 0.005070209503173828, "step": 323} +{"info/global_step": 324, "train_info/time_within_train_step": 27.73066234588623, "step": 324} +{"train_info/time_between_train_steps": 0.004934787750244141, "step": 324} +{"info/global_step": 325, "train_info/time_within_train_step": 27.687833309173584, "step": 325} +{"train_info/time_between_train_steps": 0.005039691925048828, "step": 325} +{"info/global_step": 326, "train_info/time_within_train_step": 27.736400604248047, "step": 326} +{"train_info/time_between_train_steps": 0.004967451095581055, "step": 326} +{"info/global_step": 327, "train_info/time_within_train_step": 27.696091175079346, "step": 327} +{"train_info/time_between_train_steps": 0.004912614822387695, "step": 327} +{"info/global_step": 328, "train_info/time_within_train_step": 27.841203689575195, "step": 328} +{"train_info/time_between_train_steps": 0.0050792694091796875, "step": 328} +{"info/global_step": 329, "train_info/time_within_train_step": 27.688955783843994, "step": 329} +{"train_info/time_between_train_steps": 0.005010843276977539, "step": 329} +{"info/global_step": 330, "train_info/time_within_train_step": 27.68764567375183, "step": 330} +{"train_info/time_between_train_steps": 0.005306243896484375, "step": 330} +{"info/global_step": 331, "train_info/time_within_train_step": 27.69805884361267, "step": 331} +{"train_info/time_between_train_steps": 0.005178928375244141, "step": 331} +{"info/global_step": 332, "train_info/time_within_train_step": 27.74022388458252, "step": 332} +{"train_info/time_between_train_steps": 0.005496501922607422, "step": 332} +{"info/global_step": 333, "train_info/time_within_train_step": 27.700398921966553, "step": 333} +{"train_info/time_between_train_steps": 0.008987665176391602, "step": 333} +{"info/global_step": 334, "train_info/time_within_train_step": 27.703734159469604, "step": 334} +{"train_info/time_between_train_steps": 0.005474567413330078, "step": 334} +{"info/global_step": 335, "train_info/time_within_train_step": 27.722431659698486, "step": 335} +{"train_info/time_between_train_steps": 0.005728244781494141, "step": 335} +{"info/global_step": 336, "train_info/time_within_train_step": 27.75047254562378, "step": 336} +{"train_info/time_between_train_steps": 0.002797365188598633, "step": 336} +{"train_info/time_between_train_steps": 3.465125560760498, "step": 336} +{"info/global_step": 337, "train_info/time_within_train_step": 27.753882884979248, "step": 337} +{"train_info/time_between_train_steps": 0.004895210266113281, "step": 337} +{"info/global_step": 338, "train_info/time_within_train_step": 27.841928720474243, "step": 338} +{"train_info/time_between_train_steps": 0.005143880844116211, "step": 338} +{"info/global_step": 339, "train_info/time_within_train_step": 27.697590827941895, "step": 339} +{"train_info/time_between_train_steps": 0.0052182674407958984, "step": 339} +{"info/global_step": 340, "train_info/time_within_train_step": 27.832372665405273, "step": 340} +{"train_info/time_between_train_steps": 0.00513148307800293, "step": 340} +{"info/global_step": 341, "train_info/time_within_train_step": 27.759871006011963, "step": 341} +{"train_info/time_between_train_steps": 0.00538325309753418, "step": 341} +{"info/global_step": 342, "train_info/time_within_train_step": 27.842289686203003, "step": 342} +{"train_info/time_between_train_steps": 0.0050661563873291016, "step": 342} +{"info/global_step": 343, "train_info/time_within_train_step": 27.698862075805664, "step": 343} +{"train_info/time_between_train_steps": 0.005149364471435547, "step": 343} +{"info/global_step": 344, "train_info/time_within_train_step": 27.88767957687378, "step": 344} +{"train_info/time_between_train_steps": 0.005190372467041016, "step": 344} +{"info/global_step": 345, "train_info/time_within_train_step": 27.757140159606934, "step": 345} +{"train_info/time_between_train_steps": 0.0048406124114990234, "step": 345} +{"info/global_step": 346, "train_info/time_within_train_step": 27.69309091567993, "step": 346} +{"train_info/time_between_train_steps": 0.004973411560058594, "step": 346} +{"info/global_step": 347, "train_info/time_within_train_step": 27.70459747314453, "step": 347} +{"train_info/time_between_train_steps": 0.0049991607666015625, "step": 347} +{"info/global_step": 348, "train_info/time_within_train_step": 27.698131322860718, "step": 348} +{"train_info/time_between_train_steps": 0.004997730255126953, "step": 348} +{"info/global_step": 349, "train_info/time_within_train_step": 27.847985982894897, "step": 349} +{"train_info/time_between_train_steps": 0.005676984786987305, "step": 349} +{"info/global_step": 350, "train_info/time_within_train_step": 29.799797534942627, "step": 350} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1740832932, "_runtime": 9929}, "step": 350} +{"logs": {"train/loss": 4.2711, "train/learning_rate": 0.00047222222222222224, "train/epoch": 12.01, "_timestamp": 1740832932, "_runtime": 9929}, "step": 350} +{"train_info/time_between_train_steps": 0.47991466522216797, "step": 350} +{"info/global_step": 351, "train_info/time_within_train_step": 28.08719253540039, "step": 351} +{"train_info/time_between_train_steps": 0.005764961242675781, "step": 351} +{"info/global_step": 352, "train_info/time_within_train_step": 27.699890851974487, "step": 352} +{"train_info/time_between_train_steps": 0.004907369613647461, "step": 352} +{"info/global_step": 353, "train_info/time_within_train_step": 27.68859338760376, "step": 353} +{"train_info/time_between_train_steps": 0.00976252555847168, "step": 353} +{"info/global_step": 354, "train_info/time_within_train_step": 27.711734294891357, "step": 354} +{"train_info/time_between_train_steps": 0.009714841842651367, "step": 354} +{"info/global_step": 355, "train_info/time_within_train_step": 27.687541484832764, "step": 355} +{"train_info/time_between_train_steps": 0.009588479995727539, "step": 355} +{"info/global_step": 356, "train_info/time_within_train_step": 27.69487237930298, "step": 356} +{"train_info/time_between_train_steps": 0.005003929138183594, "step": 356} +{"info/global_step": 357, "train_info/time_within_train_step": 27.694591283798218, "step": 357} +{"train_info/time_between_train_steps": 0.005086660385131836, "step": 357} +{"info/global_step": 358, "train_info/time_within_train_step": 27.701341152191162, "step": 358} +{"train_info/time_between_train_steps": 0.0049631595611572266, "step": 358} +{"info/global_step": 359, "train_info/time_within_train_step": 27.69906210899353, "step": 359} +{"train_info/time_between_train_steps": 0.0050678253173828125, "step": 359} +{"info/global_step": 360, "train_info/time_within_train_step": 27.803274631500244, "step": 360} +{"train_info/time_between_train_steps": 0.00503993034362793, "step": 360} +{"info/global_step": 361, "train_info/time_within_train_step": 27.710896968841553, "step": 361} +{"train_info/time_between_train_steps": 0.005178928375244141, "step": 361} +{"info/global_step": 362, "train_info/time_within_train_step": 27.71125555038452, "step": 362} +{"train_info/time_between_train_steps": 0.005258083343505859, "step": 362} +{"info/global_step": 363, "train_info/time_within_train_step": 27.705116748809814, "step": 363} +{"train_info/time_between_train_steps": 0.006430387496948242, "step": 363} +{"info/global_step": 364, "train_info/time_within_train_step": 27.738272190093994, "step": 364} +{"train_info/time_between_train_steps": 0.0015239715576171875, "step": 364} +{"train_info/time_between_train_steps": 3.6479740142822266, "step": 364} +{"info/global_step": 365, "train_info/time_within_train_step": 27.70964813232422, "step": 365} +{"train_info/time_between_train_steps": 0.004936933517456055, "step": 365} +{"info/global_step": 366, "train_info/time_within_train_step": 27.834609508514404, "step": 366} +{"train_info/time_between_train_steps": 0.0052149295806884766, "step": 366} +{"info/global_step": 367, "train_info/time_within_train_step": 27.774315357208252, "step": 367} +{"train_info/time_between_train_steps": 0.005227804183959961, "step": 367} +{"info/global_step": 368, "train_info/time_within_train_step": 27.81905221939087, "step": 368} +{"train_info/time_between_train_steps": 0.005124568939208984, "step": 368} +{"info/global_step": 369, "train_info/time_within_train_step": 27.83138632774353, "step": 369} +{"train_info/time_between_train_steps": 0.0053637027740478516, "step": 369} +{"info/global_step": 370, "train_info/time_within_train_step": 27.855085134506226, "step": 370} +{"train_info/time_between_train_steps": 0.0052759647369384766, "step": 370} +{"info/global_step": 371, "train_info/time_within_train_step": 27.712387084960938, "step": 371} +{"train_info/time_between_train_steps": 0.005076885223388672, "step": 371} +{"info/global_step": 372, "train_info/time_within_train_step": 27.79671597480774, "step": 372} +{"train_info/time_between_train_steps": 0.005141735076904297, "step": 372} +{"info/global_step": 373, "train_info/time_within_train_step": 27.709895133972168, "step": 373} +{"train_info/time_between_train_steps": 0.004842996597290039, "step": 373} +{"info/global_step": 374, "train_info/time_within_train_step": 27.821279048919678, "step": 374} +{"train_info/time_between_train_steps": 0.004971504211425781, "step": 374} +{"info/global_step": 375, "train_info/time_within_train_step": 27.83532452583313, "step": 375} +{"train_info/time_between_train_steps": 0.005066871643066406, "step": 375} +{"info/global_step": 376, "train_info/time_within_train_step": 27.710305213928223, "step": 376} +{"train_info/time_between_train_steps": 0.0048596858978271484, "step": 376} +{"info/global_step": 377, "train_info/time_within_train_step": 27.69676685333252, "step": 377} +{"train_info/time_between_train_steps": 0.009778261184692383, "step": 377} +{"info/global_step": 378, "train_info/time_within_train_step": 27.684163570404053, "step": 378} +{"train_info/time_between_train_steps": 0.0096893310546875, "step": 378} +{"info/global_step": 379, "train_info/time_within_train_step": 27.686808109283447, "step": 379} +{"train_info/time_between_train_steps": 0.009704828262329102, "step": 379} +{"info/global_step": 380, "train_info/time_within_train_step": 27.711195945739746, "step": 380} +{"train_info/time_between_train_steps": 0.009671926498413086, "step": 380} +{"info/global_step": 381, "train_info/time_within_train_step": 27.694191455841064, "step": 381} +{"train_info/time_between_train_steps": 0.0049800872802734375, "step": 381} +{"info/global_step": 382, "train_info/time_within_train_step": 27.700822591781616, "step": 382} +{"train_info/time_between_train_steps": 0.005026817321777344, "step": 382} +{"info/global_step": 383, "train_info/time_within_train_step": 27.692628383636475, "step": 383} +{"train_info/time_between_train_steps": 0.0048944950103759766, "step": 383} +{"info/global_step": 384, "train_info/time_within_train_step": 27.70743751525879, "step": 384} +{"train_info/time_between_train_steps": 0.0050201416015625, "step": 384} +{"info/global_step": 385, "train_info/time_within_train_step": 27.69071912765503, "step": 385} +{"train_info/time_between_train_steps": 0.005132198333740234, "step": 385} +{"info/global_step": 386, "train_info/time_within_train_step": 27.69153904914856, "step": 386} +{"train_info/time_between_train_steps": 0.004994392395019531, "step": 386} +{"info/global_step": 387, "train_info/time_within_train_step": 27.698445558547974, "step": 387} +{"train_info/time_between_train_steps": 0.005151033401489258, "step": 387} +{"info/global_step": 388, "train_info/time_within_train_step": 27.716333866119385, "step": 388} +{"train_info/time_between_train_steps": 0.005112648010253906, "step": 388} +{"info/global_step": 389, "train_info/time_within_train_step": 27.70372200012207, "step": 389} +{"train_info/time_between_train_steps": 0.005332231521606445, "step": 389} +{"info/global_step": 390, "train_info/time_within_train_step": 27.737510919570923, "step": 390} +{"train_info/time_between_train_steps": 0.005438804626464844, "step": 390} +{"info/global_step": 391, "train_info/time_within_train_step": 27.80538272857666, "step": 391} +{"train_info/time_between_train_steps": 0.005414009094238281, "step": 391} +{"info/global_step": 392, "train_info/time_within_train_step": 27.731926202774048, "step": 392} +{"train_info/time_between_train_steps": 0.0017037391662597656, "step": 392} +{"train_info/time_between_train_steps": 3.209066152572632, "step": 392} +{"info/global_step": 393, "train_info/time_within_train_step": 27.721611738204956, "step": 393} +{"train_info/time_between_train_steps": 0.00492095947265625, "step": 393} +{"info/global_step": 394, "train_info/time_within_train_step": 27.8265118598938, "step": 394} +{"train_info/time_between_train_steps": 0.0050220489501953125, "step": 394} +{"info/global_step": 395, "train_info/time_within_train_step": 27.710840225219727, "step": 395} +{"train_info/time_between_train_steps": 0.005392313003540039, "step": 395} +{"info/global_step": 396, "train_info/time_within_train_step": 27.835167407989502, "step": 396} +{"train_info/time_between_train_steps": 0.005084991455078125, "step": 396} +{"info/global_step": 397, "train_info/time_within_train_step": 27.749477863311768, "step": 397} +{"train_info/time_between_train_steps": 0.00522613525390625, "step": 397} +{"info/global_step": 398, "train_info/time_within_train_step": 27.886828184127808, "step": 398} +{"train_info/time_between_train_steps": 0.005320072174072266, "step": 398} +{"info/global_step": 399, "train_info/time_within_train_step": 27.743941068649292, "step": 399} +{"train_info/time_between_train_steps": 0.005156040191650391, "step": 399} +{"info/global_step": 400, "train_info/time_within_train_step": 27.80372428894043, "step": 400} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1740834328, "_runtime": 11325}, "step": 400} +{"logs": {"train/loss": 4.0704, "train/learning_rate": 0.00044444444444444436, "train/epoch": 14.01, "_timestamp": 1740834328, "_runtime": 11325}, "step": 400} +{"train_info/time_between_train_steps": 45.29352426528931, "step": 400} +{"info/global_step": 401, "train_info/time_within_train_step": 27.712905883789062, "step": 401} +{"train_info/time_between_train_steps": 0.004780292510986328, "step": 401} +{"info/global_step": 402, "train_info/time_within_train_step": 27.703795194625854, "step": 402} +{"train_info/time_between_train_steps": 0.009914875030517578, "step": 402} +{"info/global_step": 403, "train_info/time_within_train_step": 27.694003343582153, "step": 403} +{"train_info/time_between_train_steps": 0.00501561164855957, "step": 403} +{"info/global_step": 404, "train_info/time_within_train_step": 27.69199538230896, "step": 404} +{"train_info/time_between_train_steps": 0.004891395568847656, "step": 404} +{"info/global_step": 405, "train_info/time_within_train_step": 27.694244861602783, "step": 405} +{"train_info/time_between_train_steps": 0.0050373077392578125, "step": 405} +{"info/global_step": 406, "train_info/time_within_train_step": 27.68876552581787, "step": 406} +{"train_info/time_between_train_steps": 0.004998445510864258, "step": 406} +{"info/global_step": 407, "train_info/time_within_train_step": 27.79495120048523, "step": 407} +{"train_info/time_between_train_steps": 0.00504612922668457, "step": 407} +{"info/global_step": 408, "train_info/time_within_train_step": 27.950071573257446, "step": 408} +{"train_info/time_between_train_steps": 0.005013465881347656, "step": 408} +{"info/global_step": 409, "train_info/time_within_train_step": 27.698648691177368, "step": 409} +{"train_info/time_between_train_steps": 0.005040884017944336, "step": 409} +{"info/global_step": 410, "train_info/time_within_train_step": 27.701534032821655, "step": 410} +{"train_info/time_between_train_steps": 0.0051212310791015625, "step": 410} +{"info/global_step": 411, "train_info/time_within_train_step": 27.701975107192993, "step": 411} +{"train_info/time_between_train_steps": 0.004912376403808594, "step": 411} +{"info/global_step": 412, "train_info/time_within_train_step": 27.705368757247925, "step": 412} +{"train_info/time_between_train_steps": 0.005023002624511719, "step": 412} +{"info/global_step": 413, "train_info/time_within_train_step": 27.711755514144897, "step": 413} +{"train_info/time_between_train_steps": 0.005048513412475586, "step": 413} +{"info/global_step": 414, "train_info/time_within_train_step": 27.691945552825928, "step": 414} +{"train_info/time_between_train_steps": 0.00501251220703125, "step": 414} +{"info/global_step": 415, "train_info/time_within_train_step": 27.725512266159058, "step": 415} +{"train_info/time_between_train_steps": 0.005190849304199219, "step": 415} +{"info/global_step": 416, "train_info/time_within_train_step": 27.69051766395569, "step": 416} +{"train_info/time_between_train_steps": 0.005151033401489258, "step": 416} +{"info/global_step": 417, "train_info/time_within_train_step": 27.758514165878296, "step": 417} +{"train_info/time_between_train_steps": 0.005203723907470703, "step": 417} +{"info/global_step": 418, "train_info/time_within_train_step": 27.7173969745636, "step": 418} +{"train_info/time_between_train_steps": 0.0053293704986572266, "step": 418} +{"info/global_step": 419, "train_info/time_within_train_step": 27.75711941719055, "step": 419} +{"train_info/time_between_train_steps": 0.005949497222900391, "step": 419} +{"info/global_step": 420, "train_info/time_within_train_step": 27.74432110786438, "step": 420} +{"train_info/time_between_train_steps": 0.0015535354614257812, "step": 420} +{"train_info/time_between_train_steps": 3.4924557209014893, "step": 420} +{"info/global_step": 421, "train_info/time_within_train_step": 27.68761682510376, "step": 421} +{"train_info/time_between_train_steps": 0.004868268966674805, "step": 421} +{"info/global_step": 422, "train_info/time_within_train_step": 28.047622680664062, "step": 422} +{"train_info/time_between_train_steps": 0.007210731506347656, "step": 422} +{"info/global_step": 423, "train_info/time_within_train_step": 27.75974440574646, "step": 423} +{"train_info/time_between_train_steps": 0.005342245101928711, "step": 423} +{"info/global_step": 424, "train_info/time_within_train_step": 27.82738471031189, "step": 424} +{"train_info/time_between_train_steps": 0.005090475082397461, "step": 424} +{"info/global_step": 425, "train_info/time_within_train_step": 27.69625759124756, "step": 425} +{"train_info/time_between_train_steps": 0.005227565765380859, "step": 425} +{"info/global_step": 426, "train_info/time_within_train_step": 27.851099967956543, "step": 426} +{"train_info/time_between_train_steps": 0.0052490234375, "step": 426} +{"info/global_step": 427, "train_info/time_within_train_step": 27.71845030784607, "step": 427} +{"train_info/time_between_train_steps": 0.009924888610839844, "step": 427} +{"info/global_step": 428, "train_info/time_within_train_step": 27.87037491798401, "step": 428} +{"train_info/time_between_train_steps": 0.005197048187255859, "step": 428} +{"info/global_step": 429, "train_info/time_within_train_step": 27.695905685424805, "step": 429} +{"train_info/time_between_train_steps": 0.005017757415771484, "step": 429} +{"info/global_step": 430, "train_info/time_within_train_step": 27.6898455619812, "step": 430} +{"train_info/time_between_train_steps": 0.0049669742584228516, "step": 430} +{"info/global_step": 431, "train_info/time_within_train_step": 27.688291311264038, "step": 431} +{"train_info/time_between_train_steps": 0.0050048828125, "step": 431} +{"info/global_step": 432, "train_info/time_within_train_step": 27.728540897369385, "step": 432} +{"train_info/time_between_train_steps": 0.004919528961181641, "step": 432} +{"info/global_step": 433, "train_info/time_within_train_step": 27.70131230354309, "step": 433} +{"train_info/time_between_train_steps": 0.005013942718505859, "step": 433} +{"info/global_step": 434, "train_info/time_within_train_step": 27.814404487609863, "step": 434} +{"train_info/time_between_train_steps": 0.009632349014282227, "step": 434} +{"info/global_step": 435, "train_info/time_within_train_step": 27.689112663269043, "step": 435} +{"train_info/time_between_train_steps": 0.004969120025634766, "step": 435} +{"info/global_step": 436, "train_info/time_within_train_step": 27.703641414642334, "step": 436} +{"train_info/time_between_train_steps": 0.004956960678100586, "step": 436} +{"info/global_step": 437, "train_info/time_within_train_step": 27.695871114730835, "step": 437} +{"train_info/time_between_train_steps": 0.005060911178588867, "step": 437} +{"info/global_step": 438, "train_info/time_within_train_step": 27.81069827079773, "step": 438} +{"train_info/time_between_train_steps": 0.005212306976318359, "step": 438} +{"info/global_step": 439, "train_info/time_within_train_step": 27.69150996208191, "step": 439} +{"train_info/time_between_train_steps": 0.004812002182006836, "step": 439} +{"info/global_step": 440, "train_info/time_within_train_step": 27.689276218414307, "step": 440} +{"train_info/time_between_train_steps": 0.005367279052734375, "step": 440} +{"info/global_step": 441, "train_info/time_within_train_step": 27.69016981124878, "step": 441} +{"train_info/time_between_train_steps": 0.005397319793701172, "step": 441} +{"info/global_step": 442, "train_info/time_within_train_step": 27.6893367767334, "step": 442} +{"train_info/time_between_train_steps": 0.004914283752441406, "step": 442} +{"info/global_step": 443, "train_info/time_within_train_step": 27.697376251220703, "step": 443} +{"train_info/time_between_train_steps": 0.005065441131591797, "step": 443} +{"info/global_step": 444, "train_info/time_within_train_step": 27.702757358551025, "step": 444} +{"train_info/time_between_train_steps": 0.005169391632080078, "step": 444} +{"info/global_step": 445, "train_info/time_within_train_step": 27.699671983718872, "step": 445} +{"train_info/time_between_train_steps": 0.005212306976318359, "step": 445} +{"info/global_step": 446, "train_info/time_within_train_step": 27.73615789413452, "step": 446} +{"train_info/time_between_train_steps": 0.005381345748901367, "step": 446} +{"info/global_step": 447, "train_info/time_within_train_step": 27.74904465675354, "step": 447} +{"train_info/time_between_train_steps": 0.005526065826416016, "step": 447} +{"info/global_step": 448, "train_info/time_within_train_step": 27.732484817504883, "step": 448} +{"train_info/time_between_train_steps": 0.0015878677368164062, "step": 448} +{"train_info/time_between_train_steps": 3.5285868644714355, "step": 448} +{"info/global_step": 449, "train_info/time_within_train_step": 27.684348344802856, "step": 449} +{"train_info/time_between_train_steps": 0.004837751388549805, "step": 449} +{"info/global_step": 450, "train_info/time_within_train_step": 27.813748121261597, "step": 450} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1740835768, "_runtime": 12765}, "step": 450} +{"logs": {"train/loss": 3.9149, "train/learning_rate": 0.00041666666666666664, "train/epoch": 16.0, "_timestamp": 1740835768, "_runtime": 12765}, "step": 450} +{"train_info/time_between_train_steps": 0.025011539459228516, "step": 450} +{"info/global_step": 451, "train_info/time_within_train_step": 27.75791573524475, "step": 451} +{"train_info/time_between_train_steps": 0.0054357051849365234, "step": 451} +{"info/global_step": 452, "train_info/time_within_train_step": 27.861753940582275, "step": 452} +{"train_info/time_between_train_steps": 0.005484819412231445, "step": 452} +{"info/global_step": 453, "train_info/time_within_train_step": 29.83572554588318, "step": 453} +{"train_info/time_between_train_steps": 0.00588679313659668, "step": 453} +{"info/global_step": 454, "train_info/time_within_train_step": 28.138453245162964, "step": 454} +{"train_info/time_between_train_steps": 0.0056798458099365234, "step": 454} +{"info/global_step": 455, "train_info/time_within_train_step": 27.737321138381958, "step": 455} +{"train_info/time_between_train_steps": 0.005068063735961914, "step": 455} +{"info/global_step": 456, "train_info/time_within_train_step": 27.798051834106445, "step": 456} +{"train_info/time_between_train_steps": 0.0051419734954833984, "step": 456} +{"info/global_step": 457, "train_info/time_within_train_step": 27.71160125732422, "step": 457} +{"train_info/time_between_train_steps": 0.00489497184753418, "step": 457} +{"info/global_step": 458, "train_info/time_within_train_step": 27.741770029067993, "step": 458} +{"train_info/time_between_train_steps": 0.004968881607055664, "step": 458} +{"info/global_step": 459, "train_info/time_within_train_step": 27.69365882873535, "step": 459} +{"train_info/time_between_train_steps": 0.009770870208740234, "step": 459} +{"info/global_step": 460, "train_info/time_within_train_step": 27.74277424812317, "step": 460} +{"train_info/time_between_train_steps": 0.004888772964477539, "step": 460} +{"info/global_step": 461, "train_info/time_within_train_step": 27.699869632720947, "step": 461} +{"train_info/time_between_train_steps": 0.005124092102050781, "step": 461} +{"info/global_step": 462, "train_info/time_within_train_step": 27.691447019577026, "step": 462} +{"train_info/time_between_train_steps": 0.004918575286865234, "step": 462} +{"info/global_step": 463, "train_info/time_within_train_step": 27.70453977584839, "step": 463} +{"train_info/time_between_train_steps": 0.004973649978637695, "step": 463} +{"info/global_step": 464, "train_info/time_within_train_step": 27.696696758270264, "step": 464} +{"train_info/time_between_train_steps": 0.004971504211425781, "step": 464} +{"info/global_step": 465, "train_info/time_within_train_step": 27.710965394973755, "step": 465} +{"train_info/time_between_train_steps": 0.004988670349121094, "step": 465} +{"info/global_step": 466, "train_info/time_within_train_step": 27.738935470581055, "step": 466} +{"train_info/time_between_train_steps": 0.0050542354583740234, "step": 466} +{"info/global_step": 467, "train_info/time_within_train_step": 27.723322868347168, "step": 467} +{"train_info/time_between_train_steps": 0.004898548126220703, "step": 467} +{"info/global_step": 468, "train_info/time_within_train_step": 27.736276388168335, "step": 468} +{"train_info/time_between_train_steps": 0.005021095275878906, "step": 468} +{"info/global_step": 469, "train_info/time_within_train_step": 27.806195497512817, "step": 469} +{"train_info/time_between_train_steps": 0.00509953498840332, "step": 469} +{"info/global_step": 470, "train_info/time_within_train_step": 27.694531202316284, "step": 470} +{"train_info/time_between_train_steps": 0.005021095275878906, "step": 470} +{"info/global_step": 471, "train_info/time_within_train_step": 27.69517707824707, "step": 471} +{"train_info/time_between_train_steps": 0.005087614059448242, "step": 471} +{"info/global_step": 472, "train_info/time_within_train_step": 27.69912028312683, "step": 472} +{"train_info/time_between_train_steps": 0.005068540573120117, "step": 472} +{"info/global_step": 473, "train_info/time_within_train_step": 27.7019305229187, "step": 473} +{"train_info/time_between_train_steps": 0.005430698394775391, "step": 473} +{"info/global_step": 474, "train_info/time_within_train_step": 27.70891308784485, "step": 474} +{"train_info/time_between_train_steps": 0.0055124759674072266, "step": 474} +{"info/global_step": 475, "train_info/time_within_train_step": 27.71860647201538, "step": 475} +{"train_info/time_between_train_steps": 0.005529165267944336, "step": 475} +{"info/global_step": 476, "train_info/time_within_train_step": 27.73746109008789, "step": 476} +{"train_info/time_between_train_steps": 0.0014650821685791016, "step": 476} +{"train_info/time_between_train_steps": 3.2078146934509277, "step": 476} +{"info/global_step": 477, "train_info/time_within_train_step": 27.74687957763672, "step": 477} +{"train_info/time_between_train_steps": 0.004839897155761719, "step": 477} +{"info/global_step": 478, "train_info/time_within_train_step": 27.8090660572052, "step": 478} +{"train_info/time_between_train_steps": 0.004831075668334961, "step": 478} +{"info/global_step": 479, "train_info/time_within_train_step": 27.697497844696045, "step": 479} +{"train_info/time_between_train_steps": 0.0053081512451171875, "step": 479} +{"info/global_step": 480, "train_info/time_within_train_step": 27.842634201049805, "step": 480} +{"train_info/time_between_train_steps": 0.005189418792724609, "step": 480} +{"info/global_step": 481, "train_info/time_within_train_step": 27.74658513069153, "step": 481} +{"train_info/time_between_train_steps": 0.005194902420043945, "step": 481} +{"info/global_step": 482, "train_info/time_within_train_step": 27.88545536994934, "step": 482} +{"train_info/time_between_train_steps": 0.005296230316162109, "step": 482} +{"info/global_step": 483, "train_info/time_within_train_step": 27.732072353363037, "step": 483} +{"train_info/time_between_train_steps": 0.005217790603637695, "step": 483} +{"info/global_step": 484, "train_info/time_within_train_step": 27.82475733757019, "step": 484} +{"train_info/time_between_train_steps": 0.0074231624603271484, "step": 484} +{"info/global_step": 485, "train_info/time_within_train_step": 27.813594579696655, "step": 485} +{"train_info/time_between_train_steps": 0.00499725341796875, "step": 485} +{"info/global_step": 486, "train_info/time_within_train_step": 27.75372076034546, "step": 486} +{"train_info/time_between_train_steps": 0.004954099655151367, "step": 486} +{"info/global_step": 487, "train_info/time_within_train_step": 27.68913722038269, "step": 487} +{"train_info/time_between_train_steps": 0.005041837692260742, "step": 487} +{"info/global_step": 488, "train_info/time_within_train_step": 27.7503879070282, "step": 488} +{"train_info/time_between_train_steps": 0.005044698715209961, "step": 488} +{"info/global_step": 489, "train_info/time_within_train_step": 27.692918062210083, "step": 489} +{"train_info/time_between_train_steps": 0.0050585269927978516, "step": 489} +{"info/global_step": 490, "train_info/time_within_train_step": 27.72455620765686, "step": 490} +{"train_info/time_between_train_steps": 0.005028963088989258, "step": 490} +{"info/global_step": 491, "train_info/time_within_train_step": 27.70636773109436, "step": 491} +{"train_info/time_between_train_steps": 0.004918575286865234, "step": 491} +{"info/global_step": 492, "train_info/time_within_train_step": 27.69416618347168, "step": 492} +{"train_info/time_between_train_steps": 0.005094766616821289, "step": 492} +{"info/global_step": 493, "train_info/time_within_train_step": 27.69286847114563, "step": 493} +{"train_info/time_between_train_steps": 0.004991292953491211, "step": 493} +{"info/global_step": 494, "train_info/time_within_train_step": 27.731329441070557, "step": 494} +{"train_info/time_between_train_steps": 0.0049343109130859375, "step": 494} +{"info/global_step": 495, "train_info/time_within_train_step": 27.693562984466553, "step": 495} +{"train_info/time_between_train_steps": 0.005104780197143555, "step": 495} +{"info/global_step": 496, "train_info/time_within_train_step": 27.709129095077515, "step": 496} +{"train_info/time_between_train_steps": 0.005033016204833984, "step": 496} +{"info/global_step": 497, "train_info/time_within_train_step": 27.696277618408203, "step": 497} +{"train_info/time_between_train_steps": 0.005319356918334961, "step": 497} +{"info/global_step": 498, "train_info/time_within_train_step": 27.692534685134888, "step": 498} +{"train_info/time_between_train_steps": 0.004992008209228516, "step": 498} +{"info/global_step": 499, "train_info/time_within_train_step": 27.69730019569397, "step": 499} +{"train_info/time_between_train_steps": 0.0051839351654052734, "step": 499} +{"info/global_step": 500, "train_info/time_within_train_step": 27.793909788131714, "step": 500} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1740837164, "_runtime": 14161}, "step": 500} +{"logs": {"train/loss": 3.7895, "train/learning_rate": 0.00038888888888888887, "train/epoch": 17.02, "_timestamp": 1740837164, "_runtime": 14161}, "step": 500} +{"train_info/time_between_train_steps": 41.23707699775696, "step": 500} +{"info/global_step": 501, "train_info/time_within_train_step": 27.708420753479004, "step": 501} +{"train_info/time_between_train_steps": 0.005089998245239258, "step": 501} +{"info/global_step": 502, "train_info/time_within_train_step": 27.703459978103638, "step": 502} +{"train_info/time_between_train_steps": 0.005524158477783203, "step": 502} +{"info/global_step": 503, "train_info/time_within_train_step": 27.71467089653015, "step": 503} +{"train_info/time_between_train_steps": 0.00559234619140625, "step": 503} +{"info/global_step": 504, "train_info/time_within_train_step": 27.736638069152832, "step": 504} +{"train_info/time_between_train_steps": 0.0014772415161132812, "step": 504} +{"train_info/time_between_train_steps": 3.347965955734253, "step": 504} +{"info/global_step": 505, "train_info/time_within_train_step": 27.687434434890747, "step": 505} +{"train_info/time_between_train_steps": 0.004825115203857422, "step": 505} +{"info/global_step": 506, "train_info/time_within_train_step": 27.87270998954773, "step": 506} +{"train_info/time_between_train_steps": 0.0048978328704833984, "step": 506} +{"info/global_step": 507, "train_info/time_within_train_step": 27.716115713119507, "step": 507} +{"train_info/time_between_train_steps": 0.005372524261474609, "step": 507} +{"info/global_step": 508, "train_info/time_within_train_step": 27.85343098640442, "step": 508} +{"train_info/time_between_train_steps": 0.005252838134765625, "step": 508} +{"info/global_step": 509, "train_info/time_within_train_step": 27.947285413742065, "step": 509} +{"train_info/time_between_train_steps": 0.005145072937011719, "step": 509} +{"info/global_step": 510, "train_info/time_within_train_step": 27.90312433242798, "step": 510} +{"train_info/time_between_train_steps": 0.009965896606445312, "step": 510} +{"info/global_step": 511, "train_info/time_within_train_step": 27.70726180076599, "step": 511} +{"train_info/time_between_train_steps": 0.005095005035400391, "step": 511} +{"info/global_step": 512, "train_info/time_within_train_step": 27.80394220352173, "step": 512} +{"train_info/time_between_train_steps": 0.0051691532135009766, "step": 512} +{"info/global_step": 513, "train_info/time_within_train_step": 27.707289695739746, "step": 513} +{"train_info/time_between_train_steps": 0.005063533782958984, "step": 513} +{"info/global_step": 514, "train_info/time_within_train_step": 27.715444803237915, "step": 514} +{"train_info/time_between_train_steps": 0.009322404861450195, "step": 514} +{"info/global_step": 515, "train_info/time_within_train_step": 27.704405069351196, "step": 515} +{"train_info/time_between_train_steps": 0.009781599044799805, "step": 515} +{"info/global_step": 516, "train_info/time_within_train_step": 27.784651279449463, "step": 516} +{"train_info/time_between_train_steps": 0.004861116409301758, "step": 516} +{"info/global_step": 517, "train_info/time_within_train_step": 27.70297122001648, "step": 517} +{"train_info/time_between_train_steps": 0.004987239837646484, "step": 517} +{"info/global_step": 518, "train_info/time_within_train_step": 27.68667459487915, "step": 518} +{"train_info/time_between_train_steps": 0.004973173141479492, "step": 518} +{"info/global_step": 519, "train_info/time_within_train_step": 27.703004121780396, "step": 519} +{"train_info/time_between_train_steps": 0.004912376403808594, "step": 519} +{"info/global_step": 520, "train_info/time_within_train_step": 27.685704946517944, "step": 520} +{"train_info/time_between_train_steps": 0.005001068115234375, "step": 520} +{"info/global_step": 521, "train_info/time_within_train_step": 27.71438479423523, "step": 521} +{"train_info/time_between_train_steps": 0.004984140396118164, "step": 521} +{"info/global_step": 522, "train_info/time_within_train_step": 27.696325302124023, "step": 522} +{"train_info/time_between_train_steps": 0.00490260124206543, "step": 522} +{"info/global_step": 523, "train_info/time_within_train_step": 27.713549852371216, "step": 523} +{"train_info/time_between_train_steps": 0.009686470031738281, "step": 523} +{"info/global_step": 524, "train_info/time_within_train_step": 27.689327001571655, "step": 524} +{"train_info/time_between_train_steps": 0.009623527526855469, "step": 524} +{"info/global_step": 525, "train_info/time_within_train_step": 27.688891887664795, "step": 525} +{"train_info/time_between_train_steps": 0.010039329528808594, "step": 525} +{"info/global_step": 526, "train_info/time_within_train_step": 27.68747353553772, "step": 526} +{"train_info/time_between_train_steps": 0.004967451095581055, "step": 526} +{"info/global_step": 527, "train_info/time_within_train_step": 27.68968653678894, "step": 527} +{"train_info/time_between_train_steps": 0.010191679000854492, "step": 527} +{"info/global_step": 528, "train_info/time_within_train_step": 27.692152500152588, "step": 528} +{"train_info/time_between_train_steps": 0.005052328109741211, "step": 528} +{"info/global_step": 529, "train_info/time_within_train_step": 27.69168496131897, "step": 529} +{"train_info/time_between_train_steps": 0.005286693572998047, "step": 529} +{"info/global_step": 530, "train_info/time_within_train_step": 27.699914693832397, "step": 530} +{"train_info/time_between_train_steps": 0.005428314208984375, "step": 530} +{"info/global_step": 531, "train_info/time_within_train_step": 27.803982973098755, "step": 531} +{"train_info/time_between_train_steps": 0.00555729866027832, "step": 531} +{"info/global_step": 532, "train_info/time_within_train_step": 27.736003160476685, "step": 532} +{"train_info/time_between_train_steps": 0.0015761852264404297, "step": 532} +{"train_info/time_between_train_steps": 3.536268472671509, "step": 532} +{"info/global_step": 533, "train_info/time_within_train_step": 27.69612407684326, "step": 533} +{"train_info/time_between_train_steps": 0.004822254180908203, "step": 533} +{"info/global_step": 534, "train_info/time_within_train_step": 27.831682920455933, "step": 534} +{"train_info/time_between_train_steps": 0.004901409149169922, "step": 534} +{"info/global_step": 535, "train_info/time_within_train_step": 27.71023964881897, "step": 535} +{"train_info/time_between_train_steps": 0.005272626876831055, "step": 535} +{"info/global_step": 536, "train_info/time_within_train_step": 27.828911781311035, "step": 536} +{"train_info/time_between_train_steps": 0.005223274230957031, "step": 536} +{"info/global_step": 537, "train_info/time_within_train_step": 27.69343900680542, "step": 537} +{"train_info/time_between_train_steps": 0.005048513412475586, "step": 537} +{"info/global_step": 538, "train_info/time_within_train_step": 27.895780324935913, "step": 538} +{"train_info/time_between_train_steps": 0.005361795425415039, "step": 538} +{"info/global_step": 539, "train_info/time_within_train_step": 27.70144486427307, "step": 539} +{"train_info/time_between_train_steps": 0.005017757415771484, "step": 539} +{"info/global_step": 540, "train_info/time_within_train_step": 27.745589017868042, "step": 540} +{"train_info/time_between_train_steps": 0.005102872848510742, "step": 540} +{"info/global_step": 541, "train_info/time_within_train_step": 27.70478916168213, "step": 541} +{"train_info/time_between_train_steps": 0.004939079284667969, "step": 541} +{"info/global_step": 542, "train_info/time_within_train_step": 27.711983680725098, "step": 542} +{"train_info/time_between_train_steps": 0.004856586456298828, "step": 542} +{"info/global_step": 543, "train_info/time_within_train_step": 27.69168758392334, "step": 543} +{"train_info/time_between_train_steps": 0.0051190853118896484, "step": 543} +{"info/global_step": 544, "train_info/time_within_train_step": 27.690138339996338, "step": 544} +{"train_info/time_between_train_steps": 0.0049076080322265625, "step": 544} +{"info/global_step": 545, "train_info/time_within_train_step": 27.698413133621216, "step": 545} +{"train_info/time_between_train_steps": 0.0049285888671875, "step": 545} +{"info/global_step": 546, "train_info/time_within_train_step": 27.6901113986969, "step": 546} +{"train_info/time_between_train_steps": 0.005093097686767578, "step": 546} +{"info/global_step": 547, "train_info/time_within_train_step": 27.78780722618103, "step": 547} +{"train_info/time_between_train_steps": 0.0049626827239990234, "step": 547} +{"info/global_step": 548, "train_info/time_within_train_step": 27.68016242980957, "step": 548} +{"train_info/time_between_train_steps": 0.004949331283569336, "step": 548} +{"info/global_step": 549, "train_info/time_within_train_step": 27.6899254322052, "step": 549} +{"train_info/time_between_train_steps": 0.0050258636474609375, "step": 549} +{"info/global_step": 550, "train_info/time_within_train_step": 27.680280208587646, "step": 550} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1740838600, "_runtime": 15597}, "step": 550} +{"logs": {"train/loss": 3.6784, "train/learning_rate": 0.0003611111111111111, "train/epoch": 19.02, "_timestamp": 1740838600, "_runtime": 15597}, "step": 550} +{"train_info/time_between_train_steps": 0.025004863739013672, "step": 550} +{"info/global_step": 551, "train_info/time_within_train_step": 27.68845558166504, "step": 551} +{"train_info/time_between_train_steps": 0.005024909973144531, "step": 551} +{"info/global_step": 552, "train_info/time_within_train_step": 27.70281147956848, "step": 552} +{"train_info/time_between_train_steps": 0.0049304962158203125, "step": 552} +{"info/global_step": 553, "train_info/time_within_train_step": 27.810566902160645, "step": 553} +{"train_info/time_between_train_steps": 0.004975080490112305, "step": 553} +{"info/global_step": 554, "train_info/time_within_train_step": 27.694202661514282, "step": 554} +{"train_info/time_between_train_steps": 0.005084991455078125, "step": 554} +{"info/global_step": 555, "train_info/time_within_train_step": 27.69926404953003, "step": 555} +{"train_info/time_between_train_steps": 0.006163597106933594, "step": 555} +{"info/global_step": 556, "train_info/time_within_train_step": 27.71111798286438, "step": 556} +{"train_info/time_between_train_steps": 0.00528264045715332, "step": 556} +{"info/global_step": 557, "train_info/time_within_train_step": 28.323028326034546, "step": 557} +{"train_info/time_between_train_steps": 0.005532026290893555, "step": 557} +{"info/global_step": 558, "train_info/time_within_train_step": 29.448145151138306, "step": 558} +{"train_info/time_between_train_steps": 0.005807399749755859, "step": 558} +{"info/global_step": 559, "train_info/time_within_train_step": 28.21396040916443, "step": 559} +{"train_info/time_between_train_steps": 0.006059408187866211, "step": 559} +{"info/global_step": 560, "train_info/time_within_train_step": 28.12070393562317, "step": 560} +{"train_info/time_between_train_steps": 0.0013773441314697266, "step": 560} +{"train_info/time_between_train_steps": 3.225266456604004, "step": 560} +{"info/global_step": 561, "train_info/time_within_train_step": 27.69726800918579, "step": 561} +{"train_info/time_between_train_steps": 0.005209445953369141, "step": 561} +{"info/global_step": 562, "train_info/time_within_train_step": 27.846566200256348, "step": 562} +{"train_info/time_between_train_steps": 0.005297183990478516, "step": 562} +{"info/global_step": 563, "train_info/time_within_train_step": 27.805835008621216, "step": 563} +{"train_info/time_between_train_steps": 0.005063056945800781, "step": 563} +{"info/global_step": 564, "train_info/time_within_train_step": 27.8647243976593, "step": 564} +{"train_info/time_between_train_steps": 0.005179643630981445, "step": 564} +{"info/global_step": 565, "train_info/time_within_train_step": 27.70156216621399, "step": 565} +{"train_info/time_between_train_steps": 0.006906032562255859, "step": 565} +{"info/global_step": 566, "train_info/time_within_train_step": 27.844183921813965, "step": 566} +{"train_info/time_between_train_steps": 0.005240678787231445, "step": 566} +{"info/global_step": 567, "train_info/time_within_train_step": 27.721083641052246, "step": 567} +{"train_info/time_between_train_steps": 0.005263328552246094, "step": 567} +{"info/global_step": 568, "train_info/time_within_train_step": 27.83459782600403, "step": 568} +{"train_info/time_between_train_steps": 0.007765054702758789, "step": 568} +{"info/global_step": 569, "train_info/time_within_train_step": 27.6952543258667, "step": 569} +{"train_info/time_between_train_steps": 0.005160331726074219, "step": 569} +{"info/global_step": 570, "train_info/time_within_train_step": 27.681498527526855, "step": 570} +{"train_info/time_between_train_steps": 0.004968404769897461, "step": 570} +{"info/global_step": 571, "train_info/time_within_train_step": 27.699301719665527, "step": 571} +{"train_info/time_between_train_steps": 0.00493168830871582, "step": 571} +{"info/global_step": 572, "train_info/time_within_train_step": 27.714843034744263, "step": 572} +{"train_info/time_between_train_steps": 0.005185842514038086, "step": 572} +{"info/global_step": 573, "train_info/time_within_train_step": 27.699862241744995, "step": 573} +{"train_info/time_between_train_steps": 0.005034685134887695, "step": 573} +{"info/global_step": 574, "train_info/time_within_train_step": 27.70215129852295, "step": 574} +{"train_info/time_between_train_steps": 0.004882335662841797, "step": 574} +{"info/global_step": 575, "train_info/time_within_train_step": 27.73314332962036, "step": 575} +{"train_info/time_between_train_steps": 0.0051424503326416016, "step": 575} +{"info/global_step": 576, "train_info/time_within_train_step": 27.68706727027893, "step": 576} +{"train_info/time_between_train_steps": 0.004930019378662109, "step": 576} +{"info/global_step": 577, "train_info/time_within_train_step": 27.68189024925232, "step": 577} +{"train_info/time_between_train_steps": 0.00532078742980957, "step": 577} +{"info/global_step": 578, "train_info/time_within_train_step": 27.774579763412476, "step": 578} +{"train_info/time_between_train_steps": 0.0049762725830078125, "step": 578} +{"info/global_step": 579, "train_info/time_within_train_step": 27.71674156188965, "step": 579} +{"train_info/time_between_train_steps": 0.004934072494506836, "step": 579} +{"info/global_step": 580, "train_info/time_within_train_step": 27.690786838531494, "step": 580} +{"train_info/time_between_train_steps": 0.005055665969848633, "step": 580} +{"info/global_step": 581, "train_info/time_within_train_step": 27.704076051712036, "step": 581} +{"train_info/time_between_train_steps": 0.005099773406982422, "step": 581} +{"info/global_step": 582, "train_info/time_within_train_step": 27.708842992782593, "step": 582} +{"train_info/time_between_train_steps": 0.005192995071411133, "step": 582} +{"info/global_step": 583, "train_info/time_within_train_step": 27.75750470161438, "step": 583} +{"train_info/time_between_train_steps": 0.005022764205932617, "step": 583} +{"info/global_step": 584, "train_info/time_within_train_step": 27.69664978981018, "step": 584} +{"train_info/time_between_train_steps": 0.006125926971435547, "step": 584} +{"info/global_step": 585, "train_info/time_within_train_step": 27.698200225830078, "step": 585} +{"train_info/time_between_train_steps": 0.00537872314453125, "step": 585} +{"info/global_step": 586, "train_info/time_within_train_step": 27.7758948802948, "step": 586} +{"train_info/time_between_train_steps": 0.005289554595947266, "step": 586} +{"info/global_step": 587, "train_info/time_within_train_step": 27.710065126419067, "step": 587} +{"train_info/time_between_train_steps": 0.0056722164154052734, "step": 587} +{"info/global_step": 588, "train_info/time_within_train_step": 27.744730710983276, "step": 588} +{"train_info/time_between_train_steps": 0.0014796257019042969, "step": 588} +{"train_info/time_between_train_steps": 3.575528383255005, "step": 588} +{"info/global_step": 589, "train_info/time_within_train_step": 27.700137853622437, "step": 589} +{"train_info/time_between_train_steps": 0.005125284194946289, "step": 589} +{"info/global_step": 590, "train_info/time_within_train_step": 27.904836177825928, "step": 590} +{"train_info/time_between_train_steps": 0.005290031433105469, "step": 590} +{"info/global_step": 591, "train_info/time_within_train_step": 27.728494882583618, "step": 591} +{"train_info/time_between_train_steps": 0.0051343441009521484, "step": 591} +{"info/global_step": 592, "train_info/time_within_train_step": 27.8601655960083, "step": 592} +{"train_info/time_between_train_steps": 0.005112886428833008, "step": 592} +{"info/global_step": 593, "train_info/time_within_train_step": 27.70261287689209, "step": 593} +{"train_info/time_between_train_steps": 0.00537419319152832, "step": 593} +{"info/global_step": 594, "train_info/time_within_train_step": 27.98085117340088, "step": 594} +{"train_info/time_between_train_steps": 0.005124330520629883, "step": 594} +{"info/global_step": 595, "train_info/time_within_train_step": 27.707832098007202, "step": 595} +{"train_info/time_between_train_steps": 0.010184288024902344, "step": 595} +{"info/global_step": 596, "train_info/time_within_train_step": 27.784398555755615, "step": 596} +{"train_info/time_between_train_steps": 0.010283708572387695, "step": 596} +{"info/global_step": 597, "train_info/time_within_train_step": 27.694567680358887, "step": 597} +{"train_info/time_between_train_steps": 0.004965066909790039, "step": 597} +{"info/global_step": 598, "train_info/time_within_train_step": 27.686442852020264, "step": 598} +{"train_info/time_between_train_steps": 0.00506138801574707, "step": 598} +{"info/global_step": 599, "train_info/time_within_train_step": 27.680354356765747, "step": 599} +{"train_info/time_between_train_steps": 0.004934072494506836, "step": 599} +{"info/global_step": 600, "train_info/time_within_train_step": 27.701460123062134, "step": 600} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1740840000, "_runtime": 16997}, "step": 600} +{"logs": {"train/loss": 3.5851, "train/learning_rate": 0.0003333333333333333, "train/epoch": 21.01, "_timestamp": 1740840000, "_runtime": 16997}, "step": 600} +{"train_info/time_between_train_steps": 100.35306906700134, "step": 600} +{"info/global_step": 601, "train_info/time_within_train_step": 27.711413860321045, "step": 601} +{"train_info/time_between_train_steps": 0.005037069320678711, "step": 601} +{"info/global_step": 602, "train_info/time_within_train_step": 27.688926935195923, "step": 602} +{"train_info/time_between_train_steps": 0.0047910213470458984, "step": 602} +{"info/global_step": 603, "train_info/time_within_train_step": 27.70055055618286, "step": 603} +{"train_info/time_between_train_steps": 0.005080699920654297, "step": 603} +{"info/global_step": 604, "train_info/time_within_train_step": 27.69854474067688, "step": 604} +{"train_info/time_between_train_steps": 0.004879474639892578, "step": 604} +{"info/global_step": 605, "train_info/time_within_train_step": 27.68789577484131, "step": 605} +{"train_info/time_between_train_steps": 0.005242824554443359, "step": 605} +{"info/global_step": 606, "train_info/time_within_train_step": 27.717498302459717, "step": 606} +{"train_info/time_between_train_steps": 0.005006551742553711, "step": 606} +{"info/global_step": 607, "train_info/time_within_train_step": 27.690204858779907, "step": 607} +{"train_info/time_between_train_steps": 0.004933357238769531, "step": 607} +{"info/global_step": 608, "train_info/time_within_train_step": 27.73018741607666, "step": 608} +{"train_info/time_between_train_steps": 0.00510859489440918, "step": 608} +{"info/global_step": 609, "train_info/time_within_train_step": 27.687421083450317, "step": 609} +{"train_info/time_between_train_steps": 0.00502467155456543, "step": 609} +{"info/global_step": 610, "train_info/time_within_train_step": 27.77945637702942, "step": 610} +{"train_info/time_between_train_steps": 0.009390830993652344, "step": 610} +{"info/global_step": 611, "train_info/time_within_train_step": 27.68695640563965, "step": 611} +{"train_info/time_between_train_steps": 0.0049321651458740234, "step": 611} +{"info/global_step": 612, "train_info/time_within_train_step": 27.708086252212524, "step": 612} +{"train_info/time_between_train_steps": 0.005377531051635742, "step": 612} +{"info/global_step": 613, "train_info/time_within_train_step": 27.701955318450928, "step": 613} +{"train_info/time_between_train_steps": 0.01031494140625, "step": 613} +{"info/global_step": 614, "train_info/time_within_train_step": 27.705931425094604, "step": 614} +{"train_info/time_between_train_steps": 0.010334491729736328, "step": 614} +{"info/global_step": 615, "train_info/time_within_train_step": 27.71450114250183, "step": 615} +{"train_info/time_between_train_steps": 0.01049041748046875, "step": 615} +{"info/global_step": 616, "train_info/time_within_train_step": 27.726925373077393, "step": 616} +{"train_info/time_between_train_steps": 0.0024871826171875, "step": 616} +{"train_info/time_between_train_steps": 3.6564712524414062, "step": 616} +{"info/global_step": 617, "train_info/time_within_train_step": 27.703296184539795, "step": 617} +{"train_info/time_between_train_steps": 0.009639978408813477, "step": 617} +{"info/global_step": 618, "train_info/time_within_train_step": 27.853147506713867, "step": 618} +{"train_info/time_between_train_steps": 0.005210399627685547, "step": 618} +{"info/global_step": 619, "train_info/time_within_train_step": 27.716031789779663, "step": 619} +{"train_info/time_between_train_steps": 0.005639553070068359, "step": 619} +{"info/global_step": 620, "train_info/time_within_train_step": 27.875967502593994, "step": 620} +{"train_info/time_between_train_steps": 0.005225181579589844, "step": 620} +{"info/global_step": 621, "train_info/time_within_train_step": 27.70564365386963, "step": 621} +{"train_info/time_between_train_steps": 0.00538325309753418, "step": 621} +{"info/global_step": 622, "train_info/time_within_train_step": 27.847106456756592, "step": 622} +{"train_info/time_between_train_steps": 0.00519871711730957, "step": 622} +{"info/global_step": 623, "train_info/time_within_train_step": 27.751660346984863, "step": 623} +{"train_info/time_between_train_steps": 0.005437612533569336, "step": 623} +{"info/global_step": 624, "train_info/time_within_train_step": 27.78173065185547, "step": 624} +{"train_info/time_between_train_steps": 0.005098581314086914, "step": 624} +{"info/global_step": 625, "train_info/time_within_train_step": 27.81395387649536, "step": 625} +{"train_info/time_between_train_steps": 0.005114555358886719, "step": 625} +{"info/global_step": 626, "train_info/time_within_train_step": 27.69040298461914, "step": 626} +{"train_info/time_between_train_steps": 0.004914999008178711, "step": 626} +{"info/global_step": 627, "train_info/time_within_train_step": 27.7145938873291, "step": 627} +{"train_info/time_between_train_steps": 0.005115985870361328, "step": 627} +{"info/global_step": 628, "train_info/time_within_train_step": 27.716265439987183, "step": 628} +{"train_info/time_between_train_steps": 0.00509333610534668, "step": 628} +{"info/global_step": 629, "train_info/time_within_train_step": 27.694745779037476, "step": 629} +{"train_info/time_between_train_steps": 0.004877328872680664, "step": 629} +{"info/global_step": 630, "train_info/time_within_train_step": 27.715574264526367, "step": 630} +{"train_info/time_between_train_steps": 0.005080699920654297, "step": 630} +{"info/global_step": 631, "train_info/time_within_train_step": 27.711655616760254, "step": 631} +{"train_info/time_between_train_steps": 0.0050220489501953125, "step": 631} +{"info/global_step": 632, "train_info/time_within_train_step": 27.69781494140625, "step": 632} +{"train_info/time_between_train_steps": 0.004985809326171875, "step": 632} +{"info/global_step": 633, "train_info/time_within_train_step": 27.706093549728394, "step": 633} +{"train_info/time_between_train_steps": 0.00512385368347168, "step": 633} +{"info/global_step": 634, "train_info/time_within_train_step": 27.720401525497437, "step": 634} +{"train_info/time_between_train_steps": 0.004889726638793945, "step": 634} +{"info/global_step": 635, "train_info/time_within_train_step": 27.709187269210815, "step": 635} +{"train_info/time_between_train_steps": 0.005094766616821289, "step": 635} +{"info/global_step": 636, "train_info/time_within_train_step": 27.699150323867798, "step": 636} +{"train_info/time_between_train_steps": 0.005000591278076172, "step": 636} +{"info/global_step": 637, "train_info/time_within_train_step": 27.703221321105957, "step": 637} +{"train_info/time_between_train_steps": 0.005290508270263672, "step": 637} +{"info/global_step": 638, "train_info/time_within_train_step": 27.690943717956543, "step": 638} +{"train_info/time_between_train_steps": 0.009346485137939453, "step": 638} +{"info/global_step": 639, "train_info/time_within_train_step": 27.703498125076294, "step": 639} +{"train_info/time_between_train_steps": 0.00534367561340332, "step": 639} +{"info/global_step": 640, "train_info/time_within_train_step": 27.746168613433838, "step": 640} +{"train_info/time_between_train_steps": 0.0053250789642333984, "step": 640} +{"info/global_step": 641, "train_info/time_within_train_step": 27.797264337539673, "step": 641} +{"train_info/time_between_train_steps": 0.005201578140258789, "step": 641} +{"info/global_step": 642, "train_info/time_within_train_step": 27.71025586128235, "step": 642} +{"train_info/time_between_train_steps": 0.005465269088745117, "step": 642} +{"info/global_step": 643, "train_info/time_within_train_step": 27.709912061691284, "step": 643} +{"train_info/time_between_train_steps": 0.005545139312744141, "step": 643} +{"info/global_step": 644, "train_info/time_within_train_step": 27.730528831481934, "step": 644} +{"train_info/time_between_train_steps": 0.0017633438110351562, "step": 644} +{"train_info/time_between_train_steps": 3.247823715209961, "step": 644} +{"info/global_step": 645, "train_info/time_within_train_step": 27.70371699333191, "step": 645} +{"train_info/time_between_train_steps": 0.004847526550292969, "step": 645} +{"info/global_step": 646, "train_info/time_within_train_step": 27.79657530784607, "step": 646} +{"train_info/time_between_train_steps": 0.004849433898925781, "step": 646} +{"info/global_step": 647, "train_info/time_within_train_step": 27.685652494430542, "step": 647} +{"train_info/time_between_train_steps": 0.00504612922668457, "step": 647} +{"info/global_step": 648, "train_info/time_within_train_step": 27.846961975097656, "step": 648} +{"train_info/time_between_train_steps": 0.005275726318359375, "step": 648} +{"info/global_step": 649, "train_info/time_within_train_step": 27.70909571647644, "step": 649} +{"train_info/time_between_train_steps": 0.005120754241943359, "step": 649} +{"info/global_step": 650, "train_info/time_within_train_step": 27.843520164489746, "step": 650} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1740841496, "_runtime": 18493}, "step": 650} +{"logs": {"train/loss": 3.4989, "train/learning_rate": 0.00030555555555555555, "train/epoch": 23.0, "_timestamp": 1740841496, "_runtime": 18493}, "step": 650} +{"train_info/time_between_train_steps": 0.025977611541748047, "step": 650} +{"info/global_step": 651, "train_info/time_within_train_step": 27.805676698684692, "step": 651} +{"train_info/time_between_train_steps": 0.010377883911132812, "step": 651} +{"info/global_step": 652, "train_info/time_within_train_step": 27.872225522994995, "step": 652} +{"train_info/time_between_train_steps": 0.005066871643066406, "step": 652} +{"info/global_step": 653, "train_info/time_within_train_step": 27.711646795272827, "step": 653} +{"train_info/time_between_train_steps": 0.005052089691162109, "step": 653} +{"info/global_step": 654, "train_info/time_within_train_step": 27.69183921813965, "step": 654} +{"train_info/time_between_train_steps": 0.0048236846923828125, "step": 654} +{"info/global_step": 655, "train_info/time_within_train_step": 27.694700956344604, "step": 655} +{"train_info/time_between_train_steps": 0.005633354187011719, "step": 655} +{"info/global_step": 656, "train_info/time_within_train_step": 27.789692401885986, "step": 656} +{"train_info/time_between_train_steps": 0.005006313323974609, "step": 656} +{"info/global_step": 657, "train_info/time_within_train_step": 27.694444179534912, "step": 657} +{"train_info/time_between_train_steps": 0.00490117073059082, "step": 657} +{"info/global_step": 658, "train_info/time_within_train_step": 27.69456195831299, "step": 658} +{"train_info/time_between_train_steps": 0.00503849983215332, "step": 658} +{"info/global_step": 659, "train_info/time_within_train_step": 27.71712350845337, "step": 659} +{"train_info/time_between_train_steps": 0.004912614822387695, "step": 659} +{"info/global_step": 660, "train_info/time_within_train_step": 27.85288667678833, "step": 660} +{"train_info/time_between_train_steps": 0.0054018497467041016, "step": 660} +{"info/global_step": 661, "train_info/time_within_train_step": 28.12901210784912, "step": 661} +{"train_info/time_between_train_steps": 0.005361795425415039, "step": 661} +{"info/global_step": 662, "train_info/time_within_train_step": 29.29439687728882, "step": 662} +{"train_info/time_between_train_steps": 0.0049817562103271484, "step": 662} +{"info/global_step": 663, "train_info/time_within_train_step": 27.697261571884155, "step": 663} +{"train_info/time_between_train_steps": 0.005024909973144531, "step": 663} +{"info/global_step": 664, "train_info/time_within_train_step": 27.69074034690857, "step": 664} +{"train_info/time_between_train_steps": 0.00496673583984375, "step": 664} +{"info/global_step": 665, "train_info/time_within_train_step": 27.68789315223694, "step": 665} +{"train_info/time_between_train_steps": 0.0050852298736572266, "step": 665} +{"info/global_step": 666, "train_info/time_within_train_step": 27.696208477020264, "step": 666} +{"train_info/time_between_train_steps": 0.0050580501556396484, "step": 666} +{"info/global_step": 667, "train_info/time_within_train_step": 27.703442811965942, "step": 667} +{"train_info/time_between_train_steps": 0.005002737045288086, "step": 667} +{"info/global_step": 668, "train_info/time_within_train_step": 27.744688272476196, "step": 668} +{"train_info/time_between_train_steps": 0.005229949951171875, "step": 668} +{"info/global_step": 669, "train_info/time_within_train_step": 27.696346044540405, "step": 669} +{"train_info/time_between_train_steps": 0.005968570709228516, "step": 669} +{"info/global_step": 670, "train_info/time_within_train_step": 27.704741716384888, "step": 670} +{"train_info/time_between_train_steps": 0.005435943603515625, "step": 670} +{"info/global_step": 671, "train_info/time_within_train_step": 27.71406126022339, "step": 671} +{"train_info/time_between_train_steps": 0.005460262298583984, "step": 671} +{"info/global_step": 672, "train_info/time_within_train_step": 27.831825256347656, "step": 672} +{"train_info/time_between_train_steps": 0.0016260147094726562, "step": 672} +{"train_info/time_between_train_steps": 3.359981060028076, "step": 672} +{"info/global_step": 673, "train_info/time_within_train_step": 27.701510429382324, "step": 673} +{"train_info/time_between_train_steps": 0.004807949066162109, "step": 673} +{"info/global_step": 674, "train_info/time_within_train_step": 27.78240394592285, "step": 674} +{"train_info/time_between_train_steps": 0.0048329830169677734, "step": 674} +{"info/global_step": 675, "train_info/time_within_train_step": 27.680402278900146, "step": 675} +{"train_info/time_between_train_steps": 0.005074262619018555, "step": 675} +{"info/global_step": 676, "train_info/time_within_train_step": 27.833250284194946, "step": 676} +{"train_info/time_between_train_steps": 0.0052490234375, "step": 676} +{"info/global_step": 677, "train_info/time_within_train_step": 27.752604722976685, "step": 677} +{"train_info/time_between_train_steps": 0.0051386356353759766, "step": 677} +{"info/global_step": 678, "train_info/time_within_train_step": 27.856308698654175, "step": 678} +{"train_info/time_between_train_steps": 0.005135297775268555, "step": 678} +{"info/global_step": 679, "train_info/time_within_train_step": 27.692993879318237, "step": 679} +{"train_info/time_between_train_steps": 0.005125999450683594, "step": 679} +{"info/global_step": 680, "train_info/time_within_train_step": 27.743289947509766, "step": 680} +{"train_info/time_between_train_steps": 0.005023002624511719, "step": 680} +{"info/global_step": 681, "train_info/time_within_train_step": 27.712548971176147, "step": 681} +{"train_info/time_between_train_steps": 0.004980802536010742, "step": 681} +{"info/global_step": 682, "train_info/time_within_train_step": 27.69251585006714, "step": 682} +{"train_info/time_between_train_steps": 0.004841804504394531, "step": 682} +{"info/global_step": 683, "train_info/time_within_train_step": 27.690463542938232, "step": 683} +{"train_info/time_between_train_steps": 0.005207538604736328, "step": 683} +{"info/global_step": 684, "train_info/time_within_train_step": 27.69432759284973, "step": 684} +{"train_info/time_between_train_steps": 0.0051457881927490234, "step": 684} +{"info/global_step": 685, "train_info/time_within_train_step": 27.750369787216187, "step": 685} +{"train_info/time_between_train_steps": 0.004860877990722656, "step": 685} +{"info/global_step": 686, "train_info/time_within_train_step": 27.69981837272644, "step": 686} +{"train_info/time_between_train_steps": 0.009666681289672852, "step": 686} +{"info/global_step": 687, "train_info/time_within_train_step": 27.71483588218689, "step": 687} +{"train_info/time_between_train_steps": 0.004995584487915039, "step": 687} +{"info/global_step": 688, "train_info/time_within_train_step": 27.84404683113098, "step": 688} +{"train_info/time_between_train_steps": 0.01027822494506836, "step": 688} +{"info/global_step": 689, "train_info/time_within_train_step": 27.713651418685913, "step": 689} +{"train_info/time_between_train_steps": 0.004959583282470703, "step": 689} +{"info/global_step": 690, "train_info/time_within_train_step": 27.74330186843872, "step": 690} +{"train_info/time_between_train_steps": 0.004926443099975586, "step": 690} +{"info/global_step": 691, "train_info/time_within_train_step": 27.696632862091064, "step": 691} +{"train_info/time_between_train_steps": 0.005063772201538086, "step": 691} +{"info/global_step": 692, "train_info/time_within_train_step": 27.694790363311768, "step": 692} +{"train_info/time_between_train_steps": 0.004886627197265625, "step": 692} +{"info/global_step": 693, "train_info/time_within_train_step": 27.721911430358887, "step": 693} +{"train_info/time_between_train_steps": 0.005141496658325195, "step": 693} +{"info/global_step": 694, "train_info/time_within_train_step": 27.692262411117554, "step": 694} +{"train_info/time_between_train_steps": 0.005394935607910156, "step": 694} +{"info/global_step": 695, "train_info/time_within_train_step": 27.71251940727234, "step": 695} +{"train_info/time_between_train_steps": 0.00503230094909668, "step": 695} +{"info/global_step": 696, "train_info/time_within_train_step": 27.76023244857788, "step": 696} +{"train_info/time_between_train_steps": 0.0054473876953125, "step": 696} +{"info/global_step": 697, "train_info/time_within_train_step": 27.720767498016357, "step": 697} +{"train_info/time_between_train_steps": 0.005074739456176758, "step": 697} +{"info/global_step": 698, "train_info/time_within_train_step": 27.71663212776184, "step": 698} +{"train_info/time_between_train_steps": 0.005479335784912109, "step": 698} +{"info/global_step": 699, "train_info/time_within_train_step": 27.71782898902893, "step": 699} +{"train_info/time_between_train_steps": 0.0056993961334228516, "step": 699} +{"info/global_step": 700, "train_info/time_within_train_step": 27.78555989265442, "step": 700} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1740842890, "_runtime": 19887}, "step": 700} +{"logs": {"train/loss": 3.4216, "train/learning_rate": 0.0002777777777777778, "train/epoch": 24.02, "_timestamp": 1740842890, "_runtime": 19887}, "step": 700} +{"train_info/time_between_train_steps": 42.80957746505737, "step": 700} +{"train_info/time_between_train_steps": 46.27500247955322, "step": 700} +{"info/global_step": 701, "train_info/time_within_train_step": 27.686132192611694, "step": 701} +{"train_info/time_between_train_steps": 0.00487208366394043, "step": 701} +{"info/global_step": 702, "train_info/time_within_train_step": 27.80207586288452, "step": 702} +{"train_info/time_between_train_steps": 0.0048334598541259766, "step": 702} +{"info/global_step": 703, "train_info/time_within_train_step": 27.793689489364624, "step": 703} +{"train_info/time_between_train_steps": 0.005223989486694336, "step": 703} +{"info/global_step": 704, "train_info/time_within_train_step": 27.861220121383667, "step": 704} +{"train_info/time_between_train_steps": 0.005254268646240234, "step": 704} +{"info/global_step": 705, "train_info/time_within_train_step": 27.714876890182495, "step": 705} +{"train_info/time_between_train_steps": 0.005085945129394531, "step": 705} +{"info/global_step": 706, "train_info/time_within_train_step": 27.869492530822754, "step": 706} +{"train_info/time_between_train_steps": 0.005203723907470703, "step": 706} +{"info/global_step": 707, "train_info/time_within_train_step": 27.77540135383606, "step": 707} +{"train_info/time_between_train_steps": 0.005069732666015625, "step": 707} +{"info/global_step": 708, "train_info/time_within_train_step": 27.75020694732666, "step": 708} +{"train_info/time_between_train_steps": 0.010298728942871094, "step": 708} +{"info/global_step": 709, "train_info/time_within_train_step": 27.72922110557556, "step": 709} +{"train_info/time_between_train_steps": 0.005120515823364258, "step": 709} +{"info/global_step": 710, "train_info/time_within_train_step": 27.709623098373413, "step": 710} +{"train_info/time_between_train_steps": 0.0048580169677734375, "step": 710} +{"info/global_step": 711, "train_info/time_within_train_step": 27.69423818588257, "step": 711} +{"train_info/time_between_train_steps": 0.005027055740356445, "step": 711} +{"info/global_step": 712, "train_info/time_within_train_step": 27.750043630599976, "step": 712} +{"train_info/time_between_train_steps": 0.005093812942504883, "step": 712} +{"info/global_step": 713, "train_info/time_within_train_step": 27.73405623435974, "step": 713} +{"train_info/time_between_train_steps": 0.00513458251953125, "step": 713} +{"info/global_step": 714, "train_info/time_within_train_step": 27.723658561706543, "step": 714} +{"train_info/time_between_train_steps": 0.005033731460571289, "step": 714} +{"info/global_step": 715, "train_info/time_within_train_step": 27.713316917419434, "step": 715} +{"train_info/time_between_train_steps": 0.0049288272857666016, "step": 715} +{"info/global_step": 716, "train_info/time_within_train_step": 27.715816259384155, "step": 716} +{"train_info/time_between_train_steps": 0.004959821701049805, "step": 716} +{"info/global_step": 717, "train_info/time_within_train_step": 27.7034592628479, "step": 717} +{"train_info/time_between_train_steps": 0.005287647247314453, "step": 717} +{"info/global_step": 718, "train_info/time_within_train_step": 27.70335555076599, "step": 718} +{"train_info/time_between_train_steps": 0.004913330078125, "step": 718} +{"info/global_step": 719, "train_info/time_within_train_step": 27.80866289138794, "step": 719} +{"train_info/time_between_train_steps": 0.005023479461669922, "step": 719} +{"info/global_step": 720, "train_info/time_within_train_step": 27.746705293655396, "step": 720} +{"train_info/time_between_train_steps": 0.004984140396118164, "step": 720} +{"info/global_step": 721, "train_info/time_within_train_step": 27.72645592689514, "step": 721} +{"train_info/time_between_train_steps": 0.005197048187255859, "step": 721} +{"info/global_step": 722, "train_info/time_within_train_step": 27.724870204925537, "step": 722} +{"train_info/time_between_train_steps": 0.005125761032104492, "step": 722} +{"info/global_step": 723, "train_info/time_within_train_step": 27.71535563468933, "step": 723} +{"train_info/time_between_train_steps": 0.004895210266113281, "step": 723} +{"info/global_step": 724, "train_info/time_within_train_step": 27.721309423446655, "step": 724} +{"train_info/time_between_train_steps": 0.00533747673034668, "step": 724} +{"info/global_step": 725, "train_info/time_within_train_step": 27.70637035369873, "step": 725} +{"train_info/time_between_train_steps": 0.005324602127075195, "step": 725} +{"info/global_step": 726, "train_info/time_within_train_step": 27.727813720703125, "step": 726} +{"train_info/time_between_train_steps": 0.005333423614501953, "step": 726} +{"info/global_step": 727, "train_info/time_within_train_step": 27.773075819015503, "step": 727} +{"train_info/time_between_train_steps": 0.005458354949951172, "step": 727} +{"info/global_step": 728, "train_info/time_within_train_step": 27.75032114982605, "step": 728} +{"train_info/time_between_train_steps": 0.002264738082885742, "step": 728} +{"train_info/time_between_train_steps": 3.215179920196533, "step": 728} +{"info/global_step": 729, "train_info/time_within_train_step": 27.733758687973022, "step": 729} +{"train_info/time_between_train_steps": 0.010220527648925781, "step": 729} +{"info/global_step": 730, "train_info/time_within_train_step": 27.844091415405273, "step": 730} +{"train_info/time_between_train_steps": 0.010209083557128906, "step": 730} +{"info/global_step": 731, "train_info/time_within_train_step": 27.69576644897461, "step": 731} +{"train_info/time_between_train_steps": 0.005034923553466797, "step": 731} +{"info/global_step": 732, "train_info/time_within_train_step": 27.809707641601562, "step": 732} +{"train_info/time_between_train_steps": 0.0051457881927490234, "step": 732} +{"info/global_step": 733, "train_info/time_within_train_step": 27.750197887420654, "step": 733} +{"train_info/time_between_train_steps": 0.005448341369628906, "step": 733} +{"info/global_step": 734, "train_info/time_within_train_step": 27.953293085098267, "step": 734} +{"train_info/time_between_train_steps": 0.010109901428222656, "step": 734} +{"info/global_step": 735, "train_info/time_within_train_step": 27.756603956222534, "step": 735} +{"train_info/time_between_train_steps": 0.01025390625, "step": 735} +{"info/global_step": 736, "train_info/time_within_train_step": 27.794487714767456, "step": 736} +{"train_info/time_between_train_steps": 0.005143642425537109, "step": 736} +{"info/global_step": 737, "train_info/time_within_train_step": 27.696928024291992, "step": 737} +{"train_info/time_between_train_steps": 0.004910707473754883, "step": 737} +{"info/global_step": 738, "train_info/time_within_train_step": 27.68697190284729, "step": 738} +{"train_info/time_between_train_steps": 0.0049991607666015625, "step": 738} +{"info/global_step": 739, "train_info/time_within_train_step": 27.689237117767334, "step": 739} +{"train_info/time_between_train_steps": 0.0049686431884765625, "step": 739} +{"info/global_step": 740, "train_info/time_within_train_step": 27.695965051651, "step": 740} +{"train_info/time_between_train_steps": 0.005018711090087891, "step": 740} +{"info/global_step": 741, "train_info/time_within_train_step": 27.728337049484253, "step": 741} +{"train_info/time_between_train_steps": 0.0050563812255859375, "step": 741} +{"info/global_step": 742, "train_info/time_within_train_step": 27.69136333465576, "step": 742} +{"train_info/time_between_train_steps": 0.005014657974243164, "step": 742} +{"info/global_step": 743, "train_info/time_within_train_step": 27.69124746322632, "step": 743} +{"train_info/time_between_train_steps": 0.0051729679107666016, "step": 743} +{"info/global_step": 744, "train_info/time_within_train_step": 27.74429225921631, "step": 744} +{"train_info/time_between_train_steps": 0.004896640777587891, "step": 744} +{"info/global_step": 745, "train_info/time_within_train_step": 27.69081974029541, "step": 745} +{"train_info/time_between_train_steps": 0.005044460296630859, "step": 745} +{"info/global_step": 746, "train_info/time_within_train_step": 27.693158626556396, "step": 746} +{"train_info/time_between_train_steps": 0.005045652389526367, "step": 746} +{"info/global_step": 747, "train_info/time_within_train_step": 27.68879508972168, "step": 747} +{"train_info/time_between_train_steps": 0.004929065704345703, "step": 747} +{"info/global_step": 748, "train_info/time_within_train_step": 27.687434911727905, "step": 748} +{"train_info/time_between_train_steps": 0.005268573760986328, "step": 748} +{"info/global_step": 749, "train_info/time_within_train_step": 27.70016574859619, "step": 749} +{"train_info/time_between_train_steps": 0.005064964294433594, "step": 749} +{"info/global_step": 750, "train_info/time_within_train_step": 27.793599843978882, "step": 750} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1740844328, "_runtime": 21325}, "step": 750} +{"logs": {"train/loss": 3.3544, "train/learning_rate": 0.00025, "train/epoch": 26.02, "_timestamp": 1740844328, "_runtime": 21325}, "step": 750} +{"train_info/time_between_train_steps": 0.025327205657958984, "step": 750} +{"info/global_step": 751, "train_info/time_within_train_step": 27.742754697799683, "step": 751} +{"train_info/time_between_train_steps": 0.005155324935913086, "step": 751} +{"info/global_step": 752, "train_info/time_within_train_step": 27.69778561592102, "step": 752} +{"train_info/time_between_train_steps": 0.005048036575317383, "step": 752} +{"info/global_step": 753, "train_info/time_within_train_step": 27.72203040122986, "step": 753} +{"train_info/time_between_train_steps": 0.0052568912506103516, "step": 753} +{"info/global_step": 754, "train_info/time_within_train_step": 27.72691798210144, "step": 754} +{"train_info/time_between_train_steps": 0.00520777702331543, "step": 754} +{"info/global_step": 755, "train_info/time_within_train_step": 27.714696884155273, "step": 755} +{"train_info/time_between_train_steps": 0.0055658817291259766, "step": 755} +{"info/global_step": 756, "train_info/time_within_train_step": 27.726507425308228, "step": 756} +{"train_info/time_between_train_steps": 0.0015163421630859375, "step": 756} +{"train_info/time_between_train_steps": 3.3411803245544434, "step": 756} +{"info/global_step": 757, "train_info/time_within_train_step": 27.73341941833496, "step": 757} +{"train_info/time_between_train_steps": 0.004923343658447266, "step": 757} +{"info/global_step": 758, "train_info/time_within_train_step": 27.8407781124115, "step": 758} +{"train_info/time_between_train_steps": 0.005113363265991211, "step": 758} +{"info/global_step": 759, "train_info/time_within_train_step": 27.695552825927734, "step": 759} +{"train_info/time_between_train_steps": 0.005135297775268555, "step": 759} +{"info/global_step": 760, "train_info/time_within_train_step": 27.811612606048584, "step": 760} +{"train_info/time_between_train_steps": 0.005088090896606445, "step": 760} +{"info/global_step": 761, "train_info/time_within_train_step": 27.748504877090454, "step": 761} +{"train_info/time_between_train_steps": 0.005246162414550781, "step": 761} +{"info/global_step": 762, "train_info/time_within_train_step": 27.839649438858032, "step": 762} +{"train_info/time_between_train_steps": 0.0052144527435302734, "step": 762} +{"info/global_step": 763, "train_info/time_within_train_step": 33.540178298950195, "step": 763} +{"train_info/time_between_train_steps": 0.005487203598022461, "step": 763} +{"info/global_step": 764, "train_info/time_within_train_step": 28.95550513267517, "step": 764} +{"train_info/time_between_train_steps": 0.0054547786712646484, "step": 764} +{"info/global_step": 765, "train_info/time_within_train_step": 33.252726793289185, "step": 765} +{"train_info/time_between_train_steps": 0.004976511001586914, "step": 765} +{"info/global_step": 766, "train_info/time_within_train_step": 28.858247995376587, "step": 766} +{"train_info/time_between_train_steps": 0.005213260650634766, "step": 766} +{"info/global_step": 767, "train_info/time_within_train_step": 33.02670884132385, "step": 767} +{"train_info/time_between_train_steps": 0.005475521087646484, "step": 767} +{"info/global_step": 768, "train_info/time_within_train_step": 28.70181965827942, "step": 768} +{"train_info/time_between_train_steps": 0.005223989486694336, "step": 768} +{"info/global_step": 769, "train_info/time_within_train_step": 33.05906128883362, "step": 769} +{"train_info/time_between_train_steps": 0.00528717041015625, "step": 769} +{"info/global_step": 770, "train_info/time_within_train_step": 27.693272590637207, "step": 770} +{"train_info/time_between_train_steps": 0.0049285888671875, "step": 770} +{"info/global_step": 771, "train_info/time_within_train_step": 27.718508005142212, "step": 771} +{"train_info/time_between_train_steps": 0.0054798126220703125, "step": 771} +{"info/global_step": 772, "train_info/time_within_train_step": 27.695959091186523, "step": 772} +{"train_info/time_between_train_steps": 0.004892587661743164, "step": 772} +{"info/global_step": 773, "train_info/time_within_train_step": 27.698535680770874, "step": 773} +{"train_info/time_between_train_steps": 0.00495147705078125, "step": 773} +{"info/global_step": 774, "train_info/time_within_train_step": 27.702802181243896, "step": 774} +{"train_info/time_between_train_steps": 0.005091190338134766, "step": 774} +{"info/global_step": 775, "train_info/time_within_train_step": 27.69405508041382, "step": 775} +{"train_info/time_between_train_steps": 0.0048940181732177734, "step": 775} +{"info/global_step": 776, "train_info/time_within_train_step": 27.706730604171753, "step": 776} +{"train_info/time_between_train_steps": 0.0050699710845947266, "step": 776} +{"info/global_step": 777, "train_info/time_within_train_step": 27.71089482307434, "step": 777} +{"train_info/time_between_train_steps": 0.005059957504272461, "step": 777} +{"info/global_step": 778, "train_info/time_within_train_step": 27.683895587921143, "step": 778} +{"train_info/time_between_train_steps": 0.0050089359283447266, "step": 778} +{"info/global_step": 779, "train_info/time_within_train_step": 27.715999364852905, "step": 779} +{"train_info/time_between_train_steps": 0.0050580501556396484, "step": 779} +{"info/global_step": 780, "train_info/time_within_train_step": 27.687727212905884, "step": 780} +{"train_info/time_between_train_steps": 0.005111217498779297, "step": 780} +{"info/global_step": 781, "train_info/time_within_train_step": 27.795575618743896, "step": 781} +{"train_info/time_between_train_steps": 0.005473613739013672, "step": 781} +{"info/global_step": 782, "train_info/time_within_train_step": 27.70793581008911, "step": 782} +{"train_info/time_between_train_steps": 0.005246162414550781, "step": 782} +{"info/global_step": 783, "train_info/time_within_train_step": 27.711445808410645, "step": 783} +{"train_info/time_between_train_steps": 0.0057337284088134766, "step": 783} +{"info/global_step": 784, "train_info/time_within_train_step": 27.747435569763184, "step": 784} +{"train_info/time_between_train_steps": 0.0015790462493896484, "step": 784} +{"train_info/time_between_train_steps": 3.836681365966797, "step": 784} +{"info/global_step": 785, "train_info/time_within_train_step": 27.681520223617554, "step": 785} +{"train_info/time_between_train_steps": 0.0048978328704833984, "step": 785} +{"info/global_step": 786, "train_info/time_within_train_step": 27.845381259918213, "step": 786} +{"train_info/time_between_train_steps": 0.005904197692871094, "step": 786} +{"info/global_step": 787, "train_info/time_within_train_step": 27.724011421203613, "step": 787} +{"train_info/time_between_train_steps": 0.005448102951049805, "step": 787} +{"info/global_step": 788, "train_info/time_within_train_step": 27.869181871414185, "step": 788} +{"train_info/time_between_train_steps": 0.005153179168701172, "step": 788} +{"info/global_step": 789, "train_info/time_within_train_step": 27.711547136306763, "step": 789} +{"train_info/time_between_train_steps": 0.005251169204711914, "step": 789} +{"info/global_step": 790, "train_info/time_within_train_step": 27.85970449447632, "step": 790} +{"train_info/time_between_train_steps": 0.005303382873535156, "step": 790} +{"info/global_step": 791, "train_info/time_within_train_step": 27.69825506210327, "step": 791} +{"train_info/time_between_train_steps": 0.005128622055053711, "step": 791} +{"info/global_step": 792, "train_info/time_within_train_step": 27.847865343093872, "step": 792} +{"train_info/time_between_train_steps": 0.005214691162109375, "step": 792} +{"info/global_step": 793, "train_info/time_within_train_step": 27.70948886871338, "step": 793} +{"train_info/time_between_train_steps": 0.004800558090209961, "step": 793} +{"info/global_step": 794, "train_info/time_within_train_step": 27.73362112045288, "step": 794} +{"train_info/time_between_train_steps": 0.0049207210540771484, "step": 794} +{"info/global_step": 795, "train_info/time_within_train_step": 27.695247411727905, "step": 795} +{"train_info/time_between_train_steps": 0.005029201507568359, "step": 795} +{"info/global_step": 796, "train_info/time_within_train_step": 27.702844619750977, "step": 796} +{"train_info/time_between_train_steps": 0.004879474639892578, "step": 796} +{"info/global_step": 797, "train_info/time_within_train_step": 27.825533390045166, "step": 797} +{"train_info/time_between_train_steps": 0.007612705230712891, "step": 797} +{"info/global_step": 798, "train_info/time_within_train_step": 27.68595862388611, "step": 798} +{"train_info/time_between_train_steps": 0.0049016475677490234, "step": 798} +{"info/global_step": 799, "train_info/time_within_train_step": 27.742519855499268, "step": 799} +{"train_info/time_between_train_steps": 0.004983425140380859, "step": 799} +{"info/global_step": 800, "train_info/time_within_train_step": 27.700145959854126, "step": 800} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1740845762, "_runtime": 22759}, "step": 800} +{"logs": {"train/loss": 3.2914, "train/learning_rate": 0.00022222222222222218, "train/epoch": 28.01, "_timestamp": 1740845762, "_runtime": 22759}, "step": 800} +{"train_info/time_between_train_steps": 94.39965748786926, "step": 800} +{"info/global_step": 801, "train_info/time_within_train_step": 28.13985323905945, "step": 801} +{"train_info/time_between_train_steps": 0.014551639556884766, "step": 801} +{"info/global_step": 802, "train_info/time_within_train_step": 28.122788906097412, "step": 802} +{"train_info/time_between_train_steps": 0.008404731750488281, "step": 802} +{"info/global_step": 803, "train_info/time_within_train_step": 27.965886116027832, "step": 803} +{"train_info/time_between_train_steps": 0.009660720825195312, "step": 803} +{"info/global_step": 804, "train_info/time_within_train_step": 27.871659517288208, "step": 804} +{"train_info/time_between_train_steps": 0.005012035369873047, "step": 804} +{"info/global_step": 805, "train_info/time_within_train_step": 27.695966958999634, "step": 805} +{"train_info/time_between_train_steps": 0.0052258968353271484, "step": 805} +{"info/global_step": 806, "train_info/time_within_train_step": 27.7021746635437, "step": 806} +{"train_info/time_between_train_steps": 0.005218982696533203, "step": 806} +{"info/global_step": 807, "train_info/time_within_train_step": 27.700650215148926, "step": 807} +{"train_info/time_between_train_steps": 0.0067291259765625, "step": 807} +{"info/global_step": 808, "train_info/time_within_train_step": 27.722511053085327, "step": 808} +{"train_info/time_between_train_steps": 0.00989389419555664, "step": 808} +{"info/global_step": 809, "train_info/time_within_train_step": 27.89733648300171, "step": 809} +{"train_info/time_between_train_steps": 0.015107393264770508, "step": 809} +{"info/global_step": 810, "train_info/time_within_train_step": 27.745254278182983, "step": 810} +{"train_info/time_between_train_steps": 0.005595207214355469, "step": 810} +{"info/global_step": 811, "train_info/time_within_train_step": 27.742340087890625, "step": 811} +{"train_info/time_between_train_steps": 0.005656003952026367, "step": 811} +{"info/global_step": 812, "train_info/time_within_train_step": 27.7336847782135, "step": 812} +{"train_info/time_between_train_steps": 0.002470254898071289, "step": 812} +{"train_info/time_between_train_steps": 3.2342209815979004, "step": 812} +{"info/global_step": 813, "train_info/time_within_train_step": 27.81101083755493, "step": 813} +{"train_info/time_between_train_steps": 0.004800558090209961, "step": 813} +{"info/global_step": 814, "train_info/time_within_train_step": 27.80008292198181, "step": 814} +{"train_info/time_between_train_steps": 0.004840373992919922, "step": 814} +{"info/global_step": 815, "train_info/time_within_train_step": 27.724828481674194, "step": 815} +{"train_info/time_between_train_steps": 0.0052490234375, "step": 815} +{"info/global_step": 816, "train_info/time_within_train_step": 27.85899019241333, "step": 816} +{"train_info/time_between_train_steps": 0.005253791809082031, "step": 816} +{"info/global_step": 817, "train_info/time_within_train_step": 27.698028564453125, "step": 817} +{"train_info/time_between_train_steps": 0.0051229000091552734, "step": 817} +{"info/global_step": 818, "train_info/time_within_train_step": 27.866198778152466, "step": 818} +{"train_info/time_between_train_steps": 0.005264759063720703, "step": 818} +{"info/global_step": 819, "train_info/time_within_train_step": 27.758527755737305, "step": 819} +{"train_info/time_between_train_steps": 0.005087614059448242, "step": 819} +{"info/global_step": 820, "train_info/time_within_train_step": 27.801769256591797, "step": 820} +{"train_info/time_between_train_steps": 0.005095958709716797, "step": 820} +{"info/global_step": 821, "train_info/time_within_train_step": 27.70434284210205, "step": 821} +{"train_info/time_between_train_steps": 0.004932880401611328, "step": 821} +{"info/global_step": 822, "train_info/time_within_train_step": 27.68131422996521, "step": 822} +{"train_info/time_between_train_steps": 0.00504755973815918, "step": 822} +{"info/global_step": 823, "train_info/time_within_train_step": 27.725656032562256, "step": 823} +{"train_info/time_between_train_steps": 0.0051271915435791016, "step": 823} +{"info/global_step": 824, "train_info/time_within_train_step": 27.776959657669067, "step": 824} +{"train_info/time_between_train_steps": 0.005007028579711914, "step": 824} +{"info/global_step": 825, "train_info/time_within_train_step": 27.709770679473877, "step": 825} +{"train_info/time_between_train_steps": 0.005064725875854492, "step": 825} +{"info/global_step": 826, "train_info/time_within_train_step": 27.70147728919983, "step": 826} +{"train_info/time_between_train_steps": 0.005063772201538086, "step": 826} +{"info/global_step": 827, "train_info/time_within_train_step": 27.693557739257812, "step": 827} +{"train_info/time_between_train_steps": 0.004915952682495117, "step": 827} +{"info/global_step": 828, "train_info/time_within_train_step": 27.948241472244263, "step": 828} +{"train_info/time_between_train_steps": 0.014736413955688477, "step": 828} +{"info/global_step": 829, "train_info/time_within_train_step": 28.093982219696045, "step": 829} +{"train_info/time_between_train_steps": 0.014746427536010742, "step": 829} +{"info/global_step": 830, "train_info/time_within_train_step": 28.14142608642578, "step": 830} +{"train_info/time_between_train_steps": 0.01008296012878418, "step": 830} +{"info/global_step": 831, "train_info/time_within_train_step": 27.764824151992798, "step": 831} +{"train_info/time_between_train_steps": 0.01476907730102539, "step": 831} +{"info/global_step": 832, "train_info/time_within_train_step": 27.722601413726807, "step": 832} +{"train_info/time_between_train_steps": 0.004932880401611328, "step": 832} +{"info/global_step": 833, "train_info/time_within_train_step": 27.68748116493225, "step": 833} +{"train_info/time_between_train_steps": 0.005114555358886719, "step": 833} +{"info/global_step": 834, "train_info/time_within_train_step": 27.718976736068726, "step": 834} +{"train_info/time_between_train_steps": 0.004959583282470703, "step": 834} +{"info/global_step": 835, "train_info/time_within_train_step": 27.69181489944458, "step": 835} +{"train_info/time_between_train_steps": 0.005152463912963867, "step": 835} +{"info/global_step": 836, "train_info/time_within_train_step": 27.689839363098145, "step": 836} +{"train_info/time_between_train_steps": 0.00518798828125, "step": 836} +{"info/global_step": 837, "train_info/time_within_train_step": 27.691020727157593, "step": 837} +{"train_info/time_between_train_steps": 0.0051097869873046875, "step": 837} +{"info/global_step": 838, "train_info/time_within_train_step": 27.699669361114502, "step": 838} +{"train_info/time_between_train_steps": 0.005372524261474609, "step": 838} +{"info/global_step": 839, "train_info/time_within_train_step": 27.702691316604614, "step": 839} +{"train_info/time_between_train_steps": 0.005443096160888672, "step": 839} +{"info/global_step": 840, "train_info/time_within_train_step": 27.73092293739319, "step": 840} +{"train_info/time_between_train_steps": 0.0015017986297607422, "step": 840} +{"train_info/time_between_train_steps": 3.251333713531494, "step": 840} +{"info/global_step": 841, "train_info/time_within_train_step": 27.74283504486084, "step": 841} +{"train_info/time_between_train_steps": 0.004883766174316406, "step": 841} +{"info/global_step": 842, "train_info/time_within_train_step": 27.799066305160522, "step": 842} +{"train_info/time_between_train_steps": 0.0048351287841796875, "step": 842} +{"info/global_step": 843, "train_info/time_within_train_step": 27.721273183822632, "step": 843} +{"train_info/time_between_train_steps": 0.005238056182861328, "step": 843} +{"info/global_step": 844, "train_info/time_within_train_step": 27.89963698387146, "step": 844} +{"train_info/time_between_train_steps": 0.0051403045654296875, "step": 844} +{"info/global_step": 845, "train_info/time_within_train_step": 27.74312663078308, "step": 845} +{"train_info/time_between_train_steps": 0.005268096923828125, "step": 845} +{"info/global_step": 846, "train_info/time_within_train_step": 27.872466564178467, "step": 846} +{"train_info/time_between_train_steps": 0.0052661895751953125, "step": 846} +{"info/global_step": 847, "train_info/time_within_train_step": 27.69567322731018, "step": 847} +{"train_info/time_between_train_steps": 0.005102634429931641, "step": 847} +{"info/global_step": 848, "train_info/time_within_train_step": 27.77864718437195, "step": 848} +{"train_info/time_between_train_steps": 0.005409955978393555, "step": 848} +{"info/global_step": 849, "train_info/time_within_train_step": 27.702211618423462, "step": 849} +{"train_info/time_between_train_steps": 0.004860877990722656, "step": 849} +{"info/global_step": 850, "train_info/time_within_train_step": 27.83606243133545, "step": 850} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1740847254, "_runtime": 24251}, "step": 850} +{"logs": {"train/loss": 3.2331, "train/learning_rate": 0.00019444444444444443, "train/epoch": 30.01, "_timestamp": 1740847254, "_runtime": 24251}, "step": 850} +{"train_info/time_between_train_steps": 0.036550283432006836, "step": 850} +{"info/global_step": 851, "train_info/time_within_train_step": 27.82843589782715, "step": 851} +{"train_info/time_between_train_steps": 0.005017757415771484, "step": 851} +{"info/global_step": 852, "train_info/time_within_train_step": 27.678449153900146, "step": 852} +{"train_info/time_between_train_steps": 0.004865407943725586, "step": 852} +{"info/global_step": 853, "train_info/time_within_train_step": 27.690021753311157, "step": 853} +{"train_info/time_between_train_steps": 0.0049741268157958984, "step": 853} +{"info/global_step": 854, "train_info/time_within_train_step": 27.689231157302856, "step": 854} +{"train_info/time_between_train_steps": 0.0050067901611328125, "step": 854} +{"info/global_step": 855, "train_info/time_within_train_step": 27.681582927703857, "step": 855} +{"train_info/time_between_train_steps": 0.004889249801635742, "step": 855} +{"info/global_step": 856, "train_info/time_within_train_step": 27.68628454208374, "step": 856} +{"train_info/time_between_train_steps": 0.004981040954589844, "step": 856} +{"info/global_step": 857, "train_info/time_within_train_step": 27.688113689422607, "step": 857} +{"train_info/time_between_train_steps": 0.005015850067138672, "step": 857} +{"info/global_step": 858, "train_info/time_within_train_step": 27.688170909881592, "step": 858} +{"train_info/time_between_train_steps": 0.0076541900634765625, "step": 858} +{"info/global_step": 859, "train_info/time_within_train_step": 27.78089165687561, "step": 859} +{"train_info/time_between_train_steps": 0.010062694549560547, "step": 859} +{"info/global_step": 860, "train_info/time_within_train_step": 27.689239263534546, "step": 860} +{"train_info/time_between_train_steps": 0.0049285888671875, "step": 860} +{"info/global_step": 861, "train_info/time_within_train_step": 27.70326566696167, "step": 861} +{"train_info/time_between_train_steps": 0.005106925964355469, "step": 861} +{"info/global_step": 862, "train_info/time_within_train_step": 27.739277362823486, "step": 862} +{"train_info/time_between_train_steps": 0.00507807731628418, "step": 862} +{"info/global_step": 863, "train_info/time_within_train_step": 27.704433917999268, "step": 863} +{"train_info/time_between_train_steps": 0.005089998245239258, "step": 863} +{"info/global_step": 864, "train_info/time_within_train_step": 27.744089365005493, "step": 864} +{"train_info/time_between_train_steps": 0.004979610443115234, "step": 864} +{"info/global_step": 865, "train_info/time_within_train_step": 28.12311553955078, "step": 865} +{"train_info/time_between_train_steps": 0.015137195587158203, "step": 865} +{"info/global_step": 866, "train_info/time_within_train_step": 27.713617086410522, "step": 866} +{"train_info/time_between_train_steps": 0.005377054214477539, "step": 866} +{"info/global_step": 867, "train_info/time_within_train_step": 27.706388473510742, "step": 867} +{"train_info/time_between_train_steps": 0.005738973617553711, "step": 867} +{"info/global_step": 868, "train_info/time_within_train_step": 27.72342586517334, "step": 868} +{"train_info/time_between_train_steps": 0.001535177230834961, "step": 868} +{"train_info/time_between_train_steps": 3.4963676929473877, "step": 868} +{"info/global_step": 869, "train_info/time_within_train_step": 27.68919014930725, "step": 869} +{"train_info/time_between_train_steps": 0.0048253536224365234, "step": 869} +{"info/global_step": 870, "train_info/time_within_train_step": 27.898721933364868, "step": 870} +{"train_info/time_between_train_steps": 0.005372285842895508, "step": 870} +{"info/global_step": 871, "train_info/time_within_train_step": 28.83330273628235, "step": 871} +{"train_info/time_between_train_steps": 0.0054531097412109375, "step": 871} +{"info/global_step": 872, "train_info/time_within_train_step": 28.075735330581665, "step": 872} +{"train_info/time_between_train_steps": 0.00539398193359375, "step": 872} +{"info/global_step": 873, "train_info/time_within_train_step": 27.711052179336548, "step": 873} +{"train_info/time_between_train_steps": 0.005063295364379883, "step": 873} +{"info/global_step": 874, "train_info/time_within_train_step": 27.86789846420288, "step": 874} +{"train_info/time_between_train_steps": 0.0053369998931884766, "step": 874} +{"info/global_step": 875, "train_info/time_within_train_step": 27.811037302017212, "step": 875} +{"train_info/time_between_train_steps": 0.005094051361083984, "step": 875} +{"info/global_step": 876, "train_info/time_within_train_step": 27.77582311630249, "step": 876} +{"train_info/time_between_train_steps": 0.005152225494384766, "step": 876} +{"info/global_step": 877, "train_info/time_within_train_step": 27.697401523590088, "step": 877} +{"train_info/time_between_train_steps": 0.005255699157714844, "step": 877} +{"info/global_step": 878, "train_info/time_within_train_step": 27.674520254135132, "step": 878} +{"train_info/time_between_train_steps": 0.009647846221923828, "step": 878} +{"info/global_step": 879, "train_info/time_within_train_step": 27.907585382461548, "step": 879} +{"train_info/time_between_train_steps": 0.00508427619934082, "step": 879} +{"info/global_step": 880, "train_info/time_within_train_step": 27.68650722503662, "step": 880} +{"train_info/time_between_train_steps": 0.005213737487792969, "step": 880} +{"info/global_step": 881, "train_info/time_within_train_step": 27.68570852279663, "step": 881} +{"train_info/time_between_train_steps": 0.005051136016845703, "step": 881} +{"info/global_step": 882, "train_info/time_within_train_step": 27.68306303024292, "step": 882} +{"train_info/time_between_train_steps": 0.0061800479888916016, "step": 882} +{"info/global_step": 883, "train_info/time_within_train_step": 27.684018850326538, "step": 883} +{"train_info/time_between_train_steps": 0.0051555633544921875, "step": 883} +{"info/global_step": 884, "train_info/time_within_train_step": 27.68821096420288, "step": 884} +{"train_info/time_between_train_steps": 0.005286693572998047, "step": 884} +{"info/global_step": 885, "train_info/time_within_train_step": 27.689230918884277, "step": 885} +{"train_info/time_between_train_steps": 0.004991054534912109, "step": 885} +{"info/global_step": 886, "train_info/time_within_train_step": 27.734349489212036, "step": 886} +{"train_info/time_between_train_steps": 0.004942655563354492, "step": 886} +{"info/global_step": 887, "train_info/time_within_train_step": 27.706080436706543, "step": 887} +{"train_info/time_between_train_steps": 0.005006551742553711, "step": 887} +{"info/global_step": 888, "train_info/time_within_train_step": 27.684258937835693, "step": 888} +{"train_info/time_between_train_steps": 0.004907846450805664, "step": 888} +{"info/global_step": 889, "train_info/time_within_train_step": 27.70798349380493, "step": 889} +{"train_info/time_between_train_steps": 0.0051877498626708984, "step": 889} +{"info/global_step": 890, "train_info/time_within_train_step": 27.685368537902832, "step": 890} +{"train_info/time_between_train_steps": 0.004994869232177734, "step": 890} +{"info/global_step": 891, "train_info/time_within_train_step": 27.78022313117981, "step": 891} +{"train_info/time_between_train_steps": 0.005023479461669922, "step": 891} +{"info/global_step": 892, "train_info/time_within_train_step": 27.743633270263672, "step": 892} +{"train_info/time_between_train_steps": 0.005236387252807617, "step": 892} +{"info/global_step": 893, "train_info/time_within_train_step": 27.693583488464355, "step": 893} +{"train_info/time_between_train_steps": 0.005113840103149414, "step": 893} +{"info/global_step": 894, "train_info/time_within_train_step": 27.70254397392273, "step": 894} +{"train_info/time_between_train_steps": 0.005496025085449219, "step": 894} +{"info/global_step": 895, "train_info/time_within_train_step": 27.707682847976685, "step": 895} +{"train_info/time_between_train_steps": 0.0054781436920166016, "step": 895} +{"info/global_step": 896, "train_info/time_within_train_step": 27.725488424301147, "step": 896} +{"train_info/time_between_train_steps": 0.0014967918395996094, "step": 896} +{"train_info/time_between_train_steps": 3.2299065589904785, "step": 896} +{"info/global_step": 897, "train_info/time_within_train_step": 27.926234006881714, "step": 897} +{"train_info/time_between_train_steps": 0.004820346832275391, "step": 897} +{"info/global_step": 898, "train_info/time_within_train_step": 27.845701456069946, "step": 898} +{"train_info/time_between_train_steps": 0.004870414733886719, "step": 898} +{"info/global_step": 899, "train_info/time_within_train_step": 27.68794083595276, "step": 899} +{"train_info/time_between_train_steps": 0.006990671157836914, "step": 899} +{"info/global_step": 900, "train_info/time_within_train_step": 27.82821798324585, "step": 900} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1740848652, "_runtime": 25649}, "step": 900} +{"logs": {"train/loss": 3.1797, "train/learning_rate": 0.00016666666666666666, "train/epoch": 32.0, "_timestamp": 1740848652, "_runtime": 25649}, "step": 900} +{"train_info/time_between_train_steps": 36.7504825592041, "step": 900} +{"info/global_step": 901, "train_info/time_within_train_step": 27.696646451950073, "step": 901} +{"train_info/time_between_train_steps": 0.005101680755615234, "step": 901} +{"info/global_step": 902, "train_info/time_within_train_step": 27.862414360046387, "step": 902} +{"train_info/time_between_train_steps": 0.005360603332519531, "step": 902} +{"info/global_step": 903, "train_info/time_within_train_step": 27.707786798477173, "step": 903} +{"train_info/time_between_train_steps": 0.005030393600463867, "step": 903} +{"info/global_step": 904, "train_info/time_within_train_step": 27.787480354309082, "step": 904} +{"train_info/time_between_train_steps": 0.0051920413970947266, "step": 904} +{"info/global_step": 905, "train_info/time_within_train_step": 27.729715824127197, "step": 905} +{"train_info/time_between_train_steps": 0.005019187927246094, "step": 905} +{"info/global_step": 906, "train_info/time_within_train_step": 27.838914155960083, "step": 906} +{"train_info/time_between_train_steps": 0.004898548126220703, "step": 906} +{"info/global_step": 907, "train_info/time_within_train_step": 28.04639220237732, "step": 907} +{"train_info/time_between_train_steps": 0.01497507095336914, "step": 907} +{"info/global_step": 908, "train_info/time_within_train_step": 27.793696403503418, "step": 908} +{"train_info/time_between_train_steps": 0.005037546157836914, "step": 908} +{"info/global_step": 909, "train_info/time_within_train_step": 27.698598384857178, "step": 909} +{"train_info/time_between_train_steps": 0.004995584487915039, "step": 909} +{"info/global_step": 910, "train_info/time_within_train_step": 27.69609761238098, "step": 910} +{"train_info/time_between_train_steps": 0.0049779415130615234, "step": 910} +{"info/global_step": 911, "train_info/time_within_train_step": 27.68033242225647, "step": 911} +{"train_info/time_between_train_steps": 0.004877805709838867, "step": 911} +{"info/global_step": 912, "train_info/time_within_train_step": 27.687843322753906, "step": 912} +{"train_info/time_between_train_steps": 0.0049266815185546875, "step": 912} +{"info/global_step": 913, "train_info/time_within_train_step": 27.695974826812744, "step": 913} +{"train_info/time_between_train_steps": 0.00497126579284668, "step": 913} +{"info/global_step": 914, "train_info/time_within_train_step": 27.693917989730835, "step": 914} +{"train_info/time_between_train_steps": 0.004924297332763672, "step": 914} +{"info/global_step": 915, "train_info/time_within_train_step": 27.728986740112305, "step": 915} +{"train_info/time_between_train_steps": 0.0049550533294677734, "step": 915} +{"info/global_step": 916, "train_info/time_within_train_step": 27.68251609802246, "step": 916} +{"train_info/time_between_train_steps": 0.004915952682495117, "step": 916} +{"info/global_step": 917, "train_info/time_within_train_step": 27.687108039855957, "step": 917} +{"train_info/time_between_train_steps": 0.00992131233215332, "step": 917} +{"info/global_step": 918, "train_info/time_within_train_step": 27.688096284866333, "step": 918} +{"train_info/time_between_train_steps": 0.0049326419830322266, "step": 918} +{"info/global_step": 919, "train_info/time_within_train_step": 27.7058367729187, "step": 919} +{"train_info/time_between_train_steps": 0.0050394535064697266, "step": 919} +{"info/global_step": 920, "train_info/time_within_train_step": 27.7077693939209, "step": 920} +{"train_info/time_between_train_steps": 0.0051572322845458984, "step": 920} +{"info/global_step": 921, "train_info/time_within_train_step": 27.74327039718628, "step": 921} +{"train_info/time_between_train_steps": 0.005089998245239258, "step": 921} +{"info/global_step": 922, "train_info/time_within_train_step": 27.794602394104004, "step": 922} +{"train_info/time_between_train_steps": 0.005474567413330078, "step": 922} +{"info/global_step": 923, "train_info/time_within_train_step": 27.707391262054443, "step": 923} +{"train_info/time_between_train_steps": 0.0057010650634765625, "step": 923} +{"info/global_step": 924, "train_info/time_within_train_step": 27.72404980659485, "step": 924} +{"train_info/time_between_train_steps": 0.0015201568603515625, "step": 924} +{"train_info/time_between_train_steps": 3.1931982040405273, "step": 924} +{"info/global_step": 925, "train_info/time_within_train_step": 27.731209754943848, "step": 925} +{"train_info/time_between_train_steps": 0.004831552505493164, "step": 925} +{"info/global_step": 926, "train_info/time_within_train_step": 27.806808710098267, "step": 926} +{"train_info/time_between_train_steps": 0.004918575286865234, "step": 926} +{"info/global_step": 927, "train_info/time_within_train_step": 27.707465887069702, "step": 927} +{"train_info/time_between_train_steps": 0.005331993103027344, "step": 927} +{"info/global_step": 928, "train_info/time_within_train_step": 27.83900785446167, "step": 928} +{"train_info/time_between_train_steps": 0.005187511444091797, "step": 928} +{"info/global_step": 929, "train_info/time_within_train_step": 27.692662477493286, "step": 929} +{"train_info/time_between_train_steps": 0.005166530609130859, "step": 929} +{"info/global_step": 930, "train_info/time_within_train_step": 27.86037540435791, "step": 930} +{"train_info/time_between_train_steps": 0.005234479904174805, "step": 930} +{"info/global_step": 931, "train_info/time_within_train_step": 27.70640230178833, "step": 931} +{"train_info/time_between_train_steps": 0.0051076412200927734, "step": 931} +{"info/global_step": 932, "train_info/time_within_train_step": 27.82513427734375, "step": 932} +{"train_info/time_between_train_steps": 0.00518345832824707, "step": 932} +{"info/global_step": 933, "train_info/time_within_train_step": 27.69766092300415, "step": 933} +{"train_info/time_between_train_steps": 0.004933595657348633, "step": 933} +{"info/global_step": 934, "train_info/time_within_train_step": 27.683518409729004, "step": 934} +{"train_info/time_between_train_steps": 0.004869937896728516, "step": 934} +{"info/global_step": 935, "train_info/time_within_train_step": 27.680272817611694, "step": 935} +{"train_info/time_between_train_steps": 0.005054473876953125, "step": 935} +{"info/global_step": 936, "train_info/time_within_train_step": 27.733341693878174, "step": 936} +{"train_info/time_between_train_steps": 0.004926919937133789, "step": 936} +{"info/global_step": 937, "train_info/time_within_train_step": 27.80082631111145, "step": 937} +{"train_info/time_between_train_steps": 0.004988193511962891, "step": 937} +{"info/global_step": 938, "train_info/time_within_train_step": 27.707493543624878, "step": 938} +{"train_info/time_between_train_steps": 0.0050449371337890625, "step": 938} +{"info/global_step": 939, "train_info/time_within_train_step": 27.68591547012329, "step": 939} +{"train_info/time_between_train_steps": 0.00487208366394043, "step": 939} +{"info/global_step": 940, "train_info/time_within_train_step": 27.69546127319336, "step": 940} +{"train_info/time_between_train_steps": 0.004995584487915039, "step": 940} +{"info/global_step": 941, "train_info/time_within_train_step": 27.74024724960327, "step": 941} +{"train_info/time_between_train_steps": 0.004991292953491211, "step": 941} +{"info/global_step": 942, "train_info/time_within_train_step": 27.697037935256958, "step": 942} +{"train_info/time_between_train_steps": 0.006367683410644531, "step": 942} +{"info/global_step": 943, "train_info/time_within_train_step": 27.689974784851074, "step": 943} +{"train_info/time_between_train_steps": 0.0050237178802490234, "step": 943} +{"info/global_step": 944, "train_info/time_within_train_step": 27.686843872070312, "step": 944} +{"train_info/time_between_train_steps": 0.004937887191772461, "step": 944} +{"info/global_step": 945, "train_info/time_within_train_step": 27.687803268432617, "step": 945} +{"train_info/time_between_train_steps": 0.005457162857055664, "step": 945} +{"info/global_step": 946, "train_info/time_within_train_step": 27.69347095489502, "step": 946} +{"train_info/time_between_train_steps": 0.005015373229980469, "step": 946} +{"info/global_step": 947, "train_info/time_within_train_step": 27.723467350006104, "step": 947} +{"train_info/time_between_train_steps": 0.004984140396118164, "step": 947} +{"info/global_step": 948, "train_info/time_within_train_step": 27.693546533584595, "step": 948} +{"train_info/time_between_train_steps": 0.005261898040771484, "step": 948} +{"info/global_step": 949, "train_info/time_within_train_step": 27.709508180618286, "step": 949} +{"train_info/time_between_train_steps": 0.0051364898681640625, "step": 949} +{"info/global_step": 950, "train_info/time_within_train_step": 27.712969303131104, "step": 950} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1740850080, "_runtime": 27077}, "step": 950} +{"logs": {"train/loss": 3.1315, "train/learning_rate": 0.0001388888888888889, "train/epoch": 33.02, "_timestamp": 1740850080, "_runtime": 27077}, "step": 950} +{"train_info/time_between_train_steps": 0.059062957763671875, "step": 950} +{"info/global_step": 951, "train_info/time_within_train_step": 27.747575283050537, "step": 951} +{"train_info/time_between_train_steps": 0.005606889724731445, "step": 951} +{"info/global_step": 952, "train_info/time_within_train_step": 27.732807636260986, "step": 952} +{"train_info/time_between_train_steps": 0.0016493797302246094, "step": 952} +{"train_info/time_between_train_steps": 3.4207675457000732, "step": 952} +{"info/global_step": 953, "train_info/time_within_train_step": 27.782695770263672, "step": 953} +{"train_info/time_between_train_steps": 0.004809379577636719, "step": 953} +{"info/global_step": 954, "train_info/time_within_train_step": 27.806196451187134, "step": 954} +{"train_info/time_between_train_steps": 0.004947662353515625, "step": 954} +{"info/global_step": 955, "train_info/time_within_train_step": 27.697638034820557, "step": 955} +{"train_info/time_between_train_steps": 0.005120038986206055, "step": 955} +{"info/global_step": 956, "train_info/time_within_train_step": 27.834882974624634, "step": 956} +{"train_info/time_between_train_steps": 0.005203962326049805, "step": 956} +{"info/global_step": 957, "train_info/time_within_train_step": 27.708462715148926, "step": 957} +{"train_info/time_between_train_steps": 0.005115032196044922, "step": 957} +{"info/global_step": 958, "train_info/time_within_train_step": 27.91404891014099, "step": 958} +{"train_info/time_between_train_steps": 0.0052700042724609375, "step": 958} +{"info/global_step": 959, "train_info/time_within_train_step": 27.72242546081543, "step": 959} +{"train_info/time_between_train_steps": 0.00516819953918457, "step": 959} +{"info/global_step": 960, "train_info/time_within_train_step": 27.828376531600952, "step": 960} +{"train_info/time_between_train_steps": 0.005114555358886719, "step": 960} +{"info/global_step": 961, "train_info/time_within_train_step": 27.70983600616455, "step": 961} +{"train_info/time_between_train_steps": 0.004974842071533203, "step": 961} +{"info/global_step": 962, "train_info/time_within_train_step": 27.682204961776733, "step": 962} +{"train_info/time_between_train_steps": 0.004896402359008789, "step": 962} +{"info/global_step": 963, "train_info/time_within_train_step": 27.702895164489746, "step": 963} +{"train_info/time_between_train_steps": 0.00503849983215332, "step": 963} +{"info/global_step": 964, "train_info/time_within_train_step": 27.73995018005371, "step": 964} +{"train_info/time_between_train_steps": 0.00488591194152832, "step": 964} +{"info/global_step": 965, "train_info/time_within_train_step": 27.688376903533936, "step": 965} +{"train_info/time_between_train_steps": 0.004965066909790039, "step": 965} +{"info/global_step": 966, "train_info/time_within_train_step": 27.691972970962524, "step": 966} +{"train_info/time_between_train_steps": 0.004977226257324219, "step": 966} +{"info/global_step": 967, "train_info/time_within_train_step": 27.69084644317627, "step": 967} +{"train_info/time_between_train_steps": 0.0049076080322265625, "step": 967} +{"info/global_step": 968, "train_info/time_within_train_step": 27.688697814941406, "step": 968} +{"train_info/time_between_train_steps": 0.004979848861694336, "step": 968} +{"info/global_step": 969, "train_info/time_within_train_step": 27.784724712371826, "step": 969} +{"train_info/time_between_train_steps": 0.005019187927246094, "step": 969} +{"info/global_step": 970, "train_info/time_within_train_step": 27.695117712020874, "step": 970} +{"train_info/time_between_train_steps": 0.004923582077026367, "step": 970} +{"info/global_step": 971, "train_info/time_within_train_step": 27.735679149627686, "step": 971} +{"train_info/time_between_train_steps": 0.005081892013549805, "step": 971} +{"info/global_step": 972, "train_info/time_within_train_step": 27.691805839538574, "step": 972} +{"train_info/time_between_train_steps": 0.004923820495605469, "step": 972} +{"info/global_step": 973, "train_info/time_within_train_step": 27.690202713012695, "step": 973} +{"train_info/time_between_train_steps": 0.00521540641784668, "step": 973} +{"info/global_step": 974, "train_info/time_within_train_step": 27.72216010093689, "step": 974} +{"train_info/time_between_train_steps": 0.005124807357788086, "step": 974} +{"info/global_step": 975, "train_info/time_within_train_step": 28.554298162460327, "step": 975} +{"train_info/time_between_train_steps": 0.005293607711791992, "step": 975} +{"info/global_step": 976, "train_info/time_within_train_step": 28.27941083908081, "step": 976} +{"train_info/time_between_train_steps": 0.005506992340087891, "step": 976} +{"info/global_step": 977, "train_info/time_within_train_step": 27.72171425819397, "step": 977} +{"train_info/time_between_train_steps": 0.005202054977416992, "step": 977} +{"info/global_step": 978, "train_info/time_within_train_step": 27.717140436172485, "step": 978} +{"train_info/time_between_train_steps": 0.005282402038574219, "step": 978} +{"info/global_step": 979, "train_info/time_within_train_step": 27.714349269866943, "step": 979} +{"train_info/time_between_train_steps": 0.0053501129150390625, "step": 979} +{"info/global_step": 980, "train_info/time_within_train_step": 27.71910810470581, "step": 980} +{"train_info/time_between_train_steps": 0.0024755001068115234, "step": 980} +{"train_info/time_between_train_steps": 3.564074754714966, "step": 980} +{"info/global_step": 981, "train_info/time_within_train_step": 27.7419593334198, "step": 981} +{"train_info/time_between_train_steps": 0.005309581756591797, "step": 981} +{"info/global_step": 982, "train_info/time_within_train_step": 27.831403017044067, "step": 982} +{"train_info/time_between_train_steps": 0.005192279815673828, "step": 982} +{"info/global_step": 983, "train_info/time_within_train_step": 27.691007375717163, "step": 983} +{"train_info/time_between_train_steps": 0.0050203800201416016, "step": 983} +{"info/global_step": 984, "train_info/time_within_train_step": 27.90992307662964, "step": 984} +{"train_info/time_between_train_steps": 0.0051386356353759766, "step": 984} +{"info/global_step": 985, "train_info/time_within_train_step": 27.711562156677246, "step": 985} +{"train_info/time_between_train_steps": 0.005242586135864258, "step": 985} +{"info/global_step": 986, "train_info/time_within_train_step": 27.86435580253601, "step": 986} +{"train_info/time_between_train_steps": 0.0051081180572509766, "step": 986} +{"info/global_step": 987, "train_info/time_within_train_step": 27.70202112197876, "step": 987} +{"train_info/time_between_train_steps": 0.005223274230957031, "step": 987} +{"info/global_step": 988, "train_info/time_within_train_step": 27.791363954544067, "step": 988} +{"train_info/time_between_train_steps": 0.005166530609130859, "step": 988} +{"info/global_step": 989, "train_info/time_within_train_step": 27.69692635536194, "step": 989} +{"train_info/time_between_train_steps": 0.004866600036621094, "step": 989} +{"info/global_step": 990, "train_info/time_within_train_step": 27.68099880218506, "step": 990} +{"train_info/time_between_train_steps": 0.005071163177490234, "step": 990} +{"info/global_step": 991, "train_info/time_within_train_step": 27.6854989528656, "step": 991} +{"train_info/time_between_train_steps": 0.004929065704345703, "step": 991} +{"info/global_step": 992, "train_info/time_within_train_step": 27.699721813201904, "step": 992} +{"train_info/time_between_train_steps": 0.005004405975341797, "step": 992} +{"info/global_step": 993, "train_info/time_within_train_step": 27.703612327575684, "step": 993} +{"train_info/time_between_train_steps": 0.005003929138183594, "step": 993} +{"info/global_step": 994, "train_info/time_within_train_step": 27.70893883705139, "step": 994} +{"train_info/time_between_train_steps": 0.0051763057708740234, "step": 994} +{"info/global_step": 995, "train_info/time_within_train_step": 27.68734908103943, "step": 995} +{"train_info/time_between_train_steps": 0.010283470153808594, "step": 995} +{"info/global_step": 996, "train_info/time_within_train_step": 27.748680114746094, "step": 996} +{"train_info/time_between_train_steps": 0.004876375198364258, "step": 996} +{"info/global_step": 997, "train_info/time_within_train_step": 27.699695348739624, "step": 997} +{"train_info/time_between_train_steps": 0.005532264709472656, "step": 997} +{"info/global_step": 998, "train_info/time_within_train_step": 27.702770471572876, "step": 998} +{"train_info/time_between_train_steps": 0.004969358444213867, "step": 998} +{"info/global_step": 999, "train_info/time_within_train_step": 27.733907222747803, "step": 999} +{"train_info/time_between_train_steps": 0.004960298538208008, "step": 999} +{"info/global_step": 1000, "train_info/time_within_train_step": 27.80186367034912, "step": 1000} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22648.0, "train_info/memory_max_reserved": 22648.0, "_timestamp": 1740851478, "_runtime": 28475}, "step": 1000} +{"logs": {"train/loss": 3.0876, "train/learning_rate": 0.00011111111111111109, "train/epoch": 35.02, "_timestamp": 1740851478, "_runtime": 28475}, "step": 1000} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27358.0, "train_info/memory_max_reserved": 27358.0, "_timestamp": 1740851482, "_runtime": 28479}, "step": 1000} +{"logs": {"eval/loss": 4.186110973358154, "eval/runtime": 4.1625, "eval/samples_per_second": 48.769, "eval/steps_per_second": 3.123, "train/epoch": 35.02, "_timestamp": 1740851482, "_runtime": 28479}, "step": 1000} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27358.0, "train_info/memory_max_reserved": 27358.0, "_timestamp": 1740851482, "_runtime": 28479}, "step": 1000} +{"logs": {"eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_loss": 4.186110973358154, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_ppl": 65.76652520256718, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_runtime": 4.1625, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 48.769, "train/epoch": 35.02, "_timestamp": 1740851482, "_runtime": 28479}, "step": 1000} +{"train_info/time_between_train_steps": 44.458219051361084, "step": 1000} +{"info/global_step": 1001, "train_info/time_within_train_step": 27.682209014892578, "step": 1001} +{"train_info/time_between_train_steps": 0.00477910041809082, "step": 1001} +{"info/global_step": 1002, "train_info/time_within_train_step": 27.688666820526123, "step": 1002} +{"train_info/time_between_train_steps": 0.005029439926147461, "step": 1002} +{"info/global_step": 1003, "train_info/time_within_train_step": 27.68932843208313, "step": 1003} +{"train_info/time_between_train_steps": 0.005036830902099609, "step": 1003} +{"info/global_step": 1004, "train_info/time_within_train_step": 27.697763681411743, "step": 1004} +{"train_info/time_between_train_steps": 0.005048990249633789, "step": 1004} +{"info/global_step": 1005, "train_info/time_within_train_step": 27.699098587036133, "step": 1005} +{"train_info/time_between_train_steps": 0.00533747673034668, "step": 1005} +{"info/global_step": 1006, "train_info/time_within_train_step": 27.700459480285645, "step": 1006} +{"train_info/time_between_train_steps": 0.0051479339599609375, "step": 1006} +{"info/global_step": 1007, "train_info/time_within_train_step": 27.71489119529724, "step": 1007} +{"train_info/time_between_train_steps": 0.00582122802734375, "step": 1007} +{"info/global_step": 1008, "train_info/time_within_train_step": 27.723754405975342, "step": 1008} +{"train_info/time_between_train_steps": 0.0015871524810791016, "step": 1008} +{"train_info/time_between_train_steps": 3.201679229736328, "step": 1008} +{"info/global_step": 1009, "train_info/time_within_train_step": 27.72006845474243, "step": 1009} +{"train_info/time_between_train_steps": 0.004912853240966797, "step": 1009} +{"info/global_step": 1010, "train_info/time_within_train_step": 27.89464783668518, "step": 1010} +{"train_info/time_between_train_steps": 0.0051441192626953125, "step": 1010} +{"info/global_step": 1011, "train_info/time_within_train_step": 27.69727063179016, "step": 1011} +{"train_info/time_between_train_steps": 0.0072362422943115234, "step": 1011} +{"info/global_step": 1012, "train_info/time_within_train_step": 27.81343173980713, "step": 1012} +{"train_info/time_between_train_steps": 0.0051326751708984375, "step": 1012} +{"info/global_step": 1013, "train_info/time_within_train_step": 27.72498059272766, "step": 1013} +{"train_info/time_between_train_steps": 0.0076618194580078125, "step": 1013} +{"info/global_step": 1014, "train_info/time_within_train_step": 27.87170124053955, "step": 1014} +{"train_info/time_between_train_steps": 0.007405281066894531, "step": 1014} +{"info/global_step": 1015, "train_info/time_within_train_step": 27.700360536575317, "step": 1015} +{"train_info/time_between_train_steps": 0.005311727523803711, "step": 1015} +{"info/global_step": 1016, "train_info/time_within_train_step": 27.899414777755737, "step": 1016} +{"train_info/time_between_train_steps": 0.005490303039550781, "step": 1016} +{"info/global_step": 1017, "train_info/time_within_train_step": 27.7167010307312, "step": 1017} +{"train_info/time_between_train_steps": 0.004908084869384766, "step": 1017} +{"info/global_step": 1018, "train_info/time_within_train_step": 27.681734085083008, "step": 1018} +{"train_info/time_between_train_steps": 0.004971742630004883, "step": 1018} +{"info/global_step": 1019, "train_info/time_within_train_step": 27.680609703063965, "step": 1019} +{"train_info/time_between_train_steps": 0.005007028579711914, "step": 1019} +{"info/global_step": 1020, "train_info/time_within_train_step": 27.684012413024902, "step": 1020} +{"train_info/time_between_train_steps": 0.005164623260498047, "step": 1020} +{"info/global_step": 1021, "train_info/time_within_train_step": 27.6885986328125, "step": 1021} +{"train_info/time_between_train_steps": 0.004931926727294922, "step": 1021} +{"info/global_step": 1022, "train_info/time_within_train_step": 27.730257272720337, "step": 1022} +{"train_info/time_between_train_steps": 0.004884958267211914, "step": 1022} +{"info/global_step": 1023, "train_info/time_within_train_step": 27.685059785842896, "step": 1023} +{"train_info/time_between_train_steps": 0.005010843276977539, "step": 1023} +{"info/global_step": 1024, "train_info/time_within_train_step": 27.689316511154175, "step": 1024} +{"train_info/time_between_train_steps": 0.0049059391021728516, "step": 1024} +{"info/global_step": 1025, "train_info/time_within_train_step": 27.682685136795044, "step": 1025} +{"train_info/time_between_train_steps": 0.005030155181884766, "step": 1025} +{"info/global_step": 1026, "train_info/time_within_train_step": 27.686750888824463, "step": 1026} +{"train_info/time_between_train_steps": 0.00496220588684082, "step": 1026} +{"info/global_step": 1027, "train_info/time_within_train_step": 27.689253330230713, "step": 1027} +{"train_info/time_between_train_steps": 0.00490260124206543, "step": 1027} +{"info/global_step": 1028, "train_info/time_within_train_step": 27.688915252685547, "step": 1028} +{"train_info/time_between_train_steps": 0.005047798156738281, "step": 1028} +{"info/global_step": 1029, "train_info/time_within_train_step": 27.68831729888916, "step": 1029} +{"train_info/time_between_train_steps": 0.004980802536010742, "step": 1029} +{"info/global_step": 1030, "train_info/time_within_train_step": 27.722099781036377, "step": 1030} +{"train_info/time_between_train_steps": 0.0052945613861083984, "step": 1030} +{"info/global_step": 1031, "train_info/time_within_train_step": 27.782445192337036, "step": 1031} +{"train_info/time_between_train_steps": 0.00507807731628418, "step": 1031} +{"info/global_step": 1032, "train_info/time_within_train_step": 27.69851803779602, "step": 1032} +{"train_info/time_between_train_steps": 0.006116390228271484, "step": 1032} +{"info/global_step": 1033, "train_info/time_within_train_step": 27.70440125465393, "step": 1033} +{"train_info/time_between_train_steps": 0.0053446292877197266, "step": 1033} +{"info/global_step": 1034, "train_info/time_within_train_step": 27.7106511592865, "step": 1034} +{"train_info/time_between_train_steps": 0.005286455154418945, "step": 1034} +{"info/global_step": 1035, "train_info/time_within_train_step": 27.71437644958496, "step": 1035} +{"train_info/time_between_train_steps": 0.005686521530151367, "step": 1035} +{"info/global_step": 1036, "train_info/time_within_train_step": 27.720603227615356, "step": 1036} +{"train_info/time_between_train_steps": 0.0014905929565429688, "step": 1036} +{"train_info/time_between_train_steps": 3.3821921348571777, "step": 1036} +{"info/global_step": 1037, "train_info/time_within_train_step": 27.688034772872925, "step": 1037} +{"train_info/time_between_train_steps": 0.004883527755737305, "step": 1037} +{"info/global_step": 1038, "train_info/time_within_train_step": 27.895187377929688, "step": 1038} +{"train_info/time_between_train_steps": 0.005175590515136719, "step": 1038} +{"info/global_step": 1039, "train_info/time_within_train_step": 27.71996545791626, "step": 1039} +{"train_info/time_between_train_steps": 0.00524139404296875, "step": 1039} +{"info/global_step": 1040, "train_info/time_within_train_step": 27.818210124969482, "step": 1040} +{"train_info/time_between_train_steps": 0.005188703536987305, "step": 1040} +{"info/global_step": 1041, "train_info/time_within_train_step": 27.84148621559143, "step": 1041} +{"train_info/time_between_train_steps": 0.005277872085571289, "step": 1041} +{"info/global_step": 1042, "train_info/time_within_train_step": 27.84837508201599, "step": 1042} +{"train_info/time_between_train_steps": 0.005153179168701172, "step": 1042} +{"info/global_step": 1043, "train_info/time_within_train_step": 27.70326805114746, "step": 1043} +{"train_info/time_between_train_steps": 0.005298137664794922, "step": 1043} +{"info/global_step": 1044, "train_info/time_within_train_step": 27.80687117576599, "step": 1044} +{"train_info/time_between_train_steps": 0.005156993865966797, "step": 1044} +{"info/global_step": 1045, "train_info/time_within_train_step": 27.805809497833252, "step": 1045} +{"train_info/time_between_train_steps": 0.004723787307739258, "step": 1045} +{"info/global_step": 1046, "train_info/time_within_train_step": 27.79381537437439, "step": 1046} +{"train_info/time_between_train_steps": 0.009708166122436523, "step": 1046} +{"info/global_step": 1047, "train_info/time_within_train_step": 27.86529564857483, "step": 1047} +{"train_info/time_between_train_steps": 0.0049800872802734375, "step": 1047} +{"info/global_step": 1048, "train_info/time_within_train_step": 27.792426586151123, "step": 1048} +{"train_info/time_between_train_steps": 0.009690523147583008, "step": 1048} +{"info/global_step": 1049, "train_info/time_within_train_step": 27.95285964012146, "step": 1049} +{"train_info/time_between_train_steps": 0.014765262603759766, "step": 1049} +{"info/global_step": 1050, "train_info/time_within_train_step": 28.076329469680786, "step": 1050} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27358.0, "train_info/memory_max_reserved": 27358.0, "_timestamp": 1740852918, "_runtime": 29915}, "step": 1050} +{"logs": {"train/loss": 3.0482, "train/learning_rate": 8.333333333333333e-05, "train/epoch": 37.01, "_timestamp": 1740852918, "_runtime": 29915}, "step": 1050} +{"train_info/time_between_train_steps": 0.036214590072631836, "step": 1050} +{"info/global_step": 1051, "train_info/time_within_train_step": 28.069995641708374, "step": 1051} +{"train_info/time_between_train_steps": 0.014528751373291016, "step": 1051} +{"info/global_step": 1052, "train_info/time_within_train_step": 28.147722721099854, "step": 1052} +{"train_info/time_between_train_steps": 0.009772062301635742, "step": 1052} +{"info/global_step": 1053, "train_info/time_within_train_step": 27.786362648010254, "step": 1053} +{"train_info/time_between_train_steps": 0.014534950256347656, "step": 1053} +{"info/global_step": 1054, "train_info/time_within_train_step": 27.758628368377686, "step": 1054} +{"train_info/time_between_train_steps": 0.005045890808105469, "step": 1054} +{"info/global_step": 1055, "train_info/time_within_train_step": 27.70936417579651, "step": 1055} +{"train_info/time_between_train_steps": 0.004974842071533203, "step": 1055} +{"info/global_step": 1056, "train_info/time_within_train_step": 27.691473245620728, "step": 1056} +{"train_info/time_between_train_steps": 0.005099296569824219, "step": 1056} +{"info/global_step": 1057, "train_info/time_within_train_step": 27.694730520248413, "step": 1057} +{"train_info/time_between_train_steps": 0.00516510009765625, "step": 1057} +{"info/global_step": 1058, "train_info/time_within_train_step": 27.69523334503174, "step": 1058} +{"train_info/time_between_train_steps": 0.004949092864990234, "step": 1058} +{"info/global_step": 1059, "train_info/time_within_train_step": 27.700270414352417, "step": 1059} +{"train_info/time_between_train_steps": 0.005108833312988281, "step": 1059} +{"info/global_step": 1060, "train_info/time_within_train_step": 27.73585796356201, "step": 1060} +{"train_info/time_between_train_steps": 0.005490303039550781, "step": 1060} +{"info/global_step": 1061, "train_info/time_within_train_step": 27.700231313705444, "step": 1061} +{"train_info/time_between_train_steps": 0.005438804626464844, "step": 1061} +{"info/global_step": 1062, "train_info/time_within_train_step": 27.80196762084961, "step": 1062} +{"train_info/time_between_train_steps": 0.005366802215576172, "step": 1062} +{"info/global_step": 1063, "train_info/time_within_train_step": 27.712748765945435, "step": 1063} +{"train_info/time_between_train_steps": 0.0056874752044677734, "step": 1063} +{"info/global_step": 1064, "train_info/time_within_train_step": 27.726613998413086, "step": 1064} +{"train_info/time_between_train_steps": 0.001569509506225586, "step": 1064} +{"train_info/time_between_train_steps": 3.5697879791259766, "step": 1064} +{"info/global_step": 1065, "train_info/time_within_train_step": 27.69693660736084, "step": 1065} +{"train_info/time_between_train_steps": 0.0048105716705322266, "step": 1065} +{"info/global_step": 1066, "train_info/time_within_train_step": 27.84015464782715, "step": 1066} +{"train_info/time_between_train_steps": 0.005271196365356445, "step": 1066} +{"info/global_step": 1067, "train_info/time_within_train_step": 27.721667528152466, "step": 1067} +{"train_info/time_between_train_steps": 0.005237579345703125, "step": 1067} +{"info/global_step": 1068, "train_info/time_within_train_step": 27.86017632484436, "step": 1068} +{"train_info/time_between_train_steps": 0.005182743072509766, "step": 1068} +{"info/global_step": 1069, "train_info/time_within_train_step": 27.717448711395264, "step": 1069} +{"train_info/time_between_train_steps": 0.005293130874633789, "step": 1069} +{"info/global_step": 1070, "train_info/time_within_train_step": 28.072216272354126, "step": 1070} +{"train_info/time_between_train_steps": 0.005232095718383789, "step": 1070} +{"info/global_step": 1071, "train_info/time_within_train_step": 27.697765588760376, "step": 1071} +{"train_info/time_between_train_steps": 0.005047321319580078, "step": 1071} +{"info/global_step": 1072, "train_info/time_within_train_step": 27.8132963180542, "step": 1072} +{"train_info/time_between_train_steps": 0.005236625671386719, "step": 1072} +{"info/global_step": 1073, "train_info/time_within_train_step": 27.696688652038574, "step": 1073} +{"train_info/time_between_train_steps": 0.004835367202758789, "step": 1073} +{"info/global_step": 1074, "train_info/time_within_train_step": 27.68324851989746, "step": 1074} +{"train_info/time_between_train_steps": 0.0049130916595458984, "step": 1074} +{"info/global_step": 1075, "train_info/time_within_train_step": 27.687253952026367, "step": 1075} +{"train_info/time_between_train_steps": 0.0050048828125, "step": 1075} +{"info/global_step": 1076, "train_info/time_within_train_step": 27.6890549659729, "step": 1076} +{"train_info/time_between_train_steps": 0.004887580871582031, "step": 1076} +{"info/global_step": 1077, "train_info/time_within_train_step": 27.715665102005005, "step": 1077} +{"train_info/time_between_train_steps": 0.005040407180786133, "step": 1077} +{"info/global_step": 1078, "train_info/time_within_train_step": 27.80728507041931, "step": 1078} +{"train_info/time_between_train_steps": 0.005106210708618164, "step": 1078} +{"info/global_step": 1079, "train_info/time_within_train_step": 28.77679753303528, "step": 1079} +{"train_info/time_between_train_steps": 0.0053708553314208984, "step": 1079} +{"info/global_step": 1080, "train_info/time_within_train_step": 28.126327991485596, "step": 1080} +{"train_info/time_between_train_steps": 0.005132198333740234, "step": 1080} +{"info/global_step": 1081, "train_info/time_within_train_step": 27.71656084060669, "step": 1081} +{"train_info/time_between_train_steps": 0.005047321319580078, "step": 1081} +{"info/global_step": 1082, "train_info/time_within_train_step": 27.7035870552063, "step": 1082} +{"train_info/time_between_train_steps": 0.005096912384033203, "step": 1082} +{"info/global_step": 1083, "train_info/time_within_train_step": 27.72219204902649, "step": 1083} +{"train_info/time_between_train_steps": 0.004885673522949219, "step": 1083} +{"info/global_step": 1084, "train_info/time_within_train_step": 27.703861474990845, "step": 1084} +{"train_info/time_between_train_steps": 0.005156040191650391, "step": 1084} +{"info/global_step": 1085, "train_info/time_within_train_step": 27.716259002685547, "step": 1085} +{"train_info/time_between_train_steps": 0.005019426345825195, "step": 1085} +{"info/global_step": 1086, "train_info/time_within_train_step": 27.703295469284058, "step": 1086} +{"train_info/time_between_train_steps": 0.005057573318481445, "step": 1086} +{"info/global_step": 1087, "train_info/time_within_train_step": 27.701242923736572, "step": 1087} +{"train_info/time_between_train_steps": 0.005071878433227539, "step": 1087} +{"info/global_step": 1088, "train_info/time_within_train_step": 27.723514795303345, "step": 1088} +{"train_info/time_between_train_steps": 0.005185842514038086, "step": 1088} +{"info/global_step": 1089, "train_info/time_within_train_step": 27.704746961593628, "step": 1089} +{"train_info/time_between_train_steps": 0.005425453186035156, "step": 1089} +{"info/global_step": 1090, "train_info/time_within_train_step": 27.726314544677734, "step": 1090} +{"train_info/time_between_train_steps": 0.005303621292114258, "step": 1090} +{"info/global_step": 1091, "train_info/time_within_train_step": 27.7271568775177, "step": 1091} +{"train_info/time_between_train_steps": 0.005582094192504883, "step": 1091} +{"info/global_step": 1092, "train_info/time_within_train_step": 27.750995874404907, "step": 1092} +{"train_info/time_between_train_steps": 0.0015506744384765625, "step": 1092} +{"train_info/time_between_train_steps": 3.162205696105957, "step": 1092} +{"info/global_step": 1093, "train_info/time_within_train_step": 27.745746612548828, "step": 1093} +{"train_info/time_between_train_steps": 0.014979839324951172, "step": 1093} +{"info/global_step": 1094, "train_info/time_within_train_step": 27.974875450134277, "step": 1094} +{"train_info/time_between_train_steps": 0.0052835941314697266, "step": 1094} +{"info/global_step": 1095, "train_info/time_within_train_step": 27.721551418304443, "step": 1095} +{"train_info/time_between_train_steps": 0.005198240280151367, "step": 1095} +{"info/global_step": 1096, "train_info/time_within_train_step": 27.841551303863525, "step": 1096} +{"train_info/time_between_train_steps": 0.005173921585083008, "step": 1096} +{"info/global_step": 1097, "train_info/time_within_train_step": 27.71935248374939, "step": 1097} +{"train_info/time_between_train_steps": 0.005525350570678711, "step": 1097} +{"info/global_step": 1098, "train_info/time_within_train_step": 27.8533718585968, "step": 1098} +{"train_info/time_between_train_steps": 0.0052492618560791016, "step": 1098} +{"info/global_step": 1099, "train_info/time_within_train_step": 27.723833799362183, "step": 1099} +{"train_info/time_between_train_steps": 0.00526881217956543, "step": 1099} +{"info/global_step": 1100, "train_info/time_within_train_step": 27.822957515716553, "step": 1100} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27358.0, "train_info/memory_max_reserved": 27358.0, "_timestamp": 1740854317, "_runtime": 31314}, "step": 1100} +{"logs": {"train/loss": 3.013, "train/learning_rate": 5.5555555555555545e-05, "train/epoch": 39.01, "_timestamp": 1740854317, "_runtime": 31314}, "step": 1100} +{"train_info/time_between_train_steps": 41.3849093914032, "step": 1100} +{"info/global_step": 1101, "train_info/time_within_train_step": 27.740947484970093, "step": 1101} +{"train_info/time_between_train_steps": 0.009146928787231445, "step": 1101} +{"info/global_step": 1102, "train_info/time_within_train_step": 27.689859867095947, "step": 1102} +{"train_info/time_between_train_steps": 0.00500798225402832, "step": 1102} +{"info/global_step": 1103, "train_info/time_within_train_step": 27.685712337493896, "step": 1103} +{"train_info/time_between_train_steps": 0.004959583282470703, "step": 1103} +{"info/global_step": 1104, "train_info/time_within_train_step": 27.682956218719482, "step": 1104} +{"train_info/time_between_train_steps": 0.004981279373168945, "step": 1104} +{"info/global_step": 1105, "train_info/time_within_train_step": 27.68664836883545, "step": 1105} +{"train_info/time_between_train_steps": 0.005033969879150391, "step": 1105} +{"info/global_step": 1106, "train_info/time_within_train_step": 27.69723677635193, "step": 1106} +{"train_info/time_between_train_steps": 0.0049343109130859375, "step": 1106} +{"info/global_step": 1107, "train_info/time_within_train_step": 27.687281370162964, "step": 1107} +{"train_info/time_between_train_steps": 0.005059957504272461, "step": 1107} +{"info/global_step": 1108, "train_info/time_within_train_step": 27.706772804260254, "step": 1108} +{"train_info/time_between_train_steps": 0.004835844039916992, "step": 1108} +{"info/global_step": 1109, "train_info/time_within_train_step": 27.778420448303223, "step": 1109} +{"train_info/time_between_train_steps": 0.00504755973815918, "step": 1109} +{"info/global_step": 1110, "train_info/time_within_train_step": 27.68857169151306, "step": 1110} +{"train_info/time_between_train_steps": 0.005010843276977539, "step": 1110} +{"info/global_step": 1111, "train_info/time_within_train_step": 27.684194564819336, "step": 1111} +{"train_info/time_between_train_steps": 0.004918336868286133, "step": 1111} +{"info/global_step": 1112, "train_info/time_within_train_step": 27.72703790664673, "step": 1112} +{"train_info/time_between_train_steps": 0.005143880844116211, "step": 1112} +{"info/global_step": 1113, "train_info/time_within_train_step": 27.691192626953125, "step": 1113} +{"train_info/time_between_train_steps": 0.005007505416870117, "step": 1113} +{"info/global_step": 1114, "train_info/time_within_train_step": 27.69254159927368, "step": 1114} +{"train_info/time_between_train_steps": 0.005085945129394531, "step": 1114} +{"info/global_step": 1115, "train_info/time_within_train_step": 27.691052198410034, "step": 1115} +{"train_info/time_between_train_steps": 0.009713172912597656, "step": 1115} +{"info/global_step": 1116, "train_info/time_within_train_step": 27.69023895263672, "step": 1116} +{"train_info/time_between_train_steps": 0.005019664764404297, "step": 1116} +{"info/global_step": 1117, "train_info/time_within_train_step": 27.697155237197876, "step": 1117} +{"train_info/time_between_train_steps": 0.0053179264068603516, "step": 1117} +{"info/global_step": 1118, "train_info/time_within_train_step": 27.725451707839966, "step": 1118} +{"train_info/time_between_train_steps": 0.0051915645599365234, "step": 1118} +{"info/global_step": 1119, "train_info/time_within_train_step": 27.7292902469635, "step": 1119} +{"train_info/time_between_train_steps": 0.005934953689575195, "step": 1119} +{"info/global_step": 1120, "train_info/time_within_train_step": 27.731342554092407, "step": 1120} +{"train_info/time_between_train_steps": 0.0014123916625976562, "step": 1120} +{"train_info/time_between_train_steps": 3.3873960971832275, "step": 1120} +{"info/global_step": 1121, "train_info/time_within_train_step": 27.732693195343018, "step": 1121} +{"train_info/time_between_train_steps": 0.005172252655029297, "step": 1121} +{"info/global_step": 1122, "train_info/time_within_train_step": 27.843481302261353, "step": 1122} +{"train_info/time_between_train_steps": 0.005308866500854492, "step": 1122} +{"info/global_step": 1123, "train_info/time_within_train_step": 27.699169397354126, "step": 1123} +{"train_info/time_between_train_steps": 0.005075693130493164, "step": 1123} +{"info/global_step": 1124, "train_info/time_within_train_step": 27.82655692100525, "step": 1124} +{"train_info/time_between_train_steps": 0.0052869319915771484, "step": 1124} +{"info/global_step": 1125, "train_info/time_within_train_step": 27.924537658691406, "step": 1125} +{"train_info/time_between_train_steps": 0.010896444320678711, "step": 1125} +{"info/global_step": 1126, "train_info/time_within_train_step": 27.82904601097107, "step": 1126} +{"train_info/time_between_train_steps": 0.005125284194946289, "step": 1126} +{"info/global_step": 1127, "train_info/time_within_train_step": 27.752511978149414, "step": 1127} +{"train_info/time_between_train_steps": 0.005548238754272461, "step": 1127} +{"info/global_step": 1128, "train_info/time_within_train_step": 27.792283058166504, "step": 1128} +{"train_info/time_between_train_steps": 0.005066633224487305, "step": 1128} +{"info/global_step": 1129, "train_info/time_within_train_step": 27.70381450653076, "step": 1129} +{"train_info/time_between_train_steps": 0.004973411560058594, "step": 1129} +{"info/global_step": 1130, "train_info/time_within_train_step": 27.690987825393677, "step": 1130} +{"train_info/time_between_train_steps": 0.005060672760009766, "step": 1130} +{"info/global_step": 1131, "train_info/time_within_train_step": 27.73878526687622, "step": 1131} +{"train_info/time_between_train_steps": 0.0049059391021728516, "step": 1131} +{"info/global_step": 1132, "train_info/time_within_train_step": 27.70161271095276, "step": 1132} +{"train_info/time_between_train_steps": 0.005078792572021484, "step": 1132} +{"info/global_step": 1133, "train_info/time_within_train_step": 27.71372675895691, "step": 1133} +{"train_info/time_between_train_steps": 0.004920482635498047, "step": 1133} +{"info/global_step": 1134, "train_info/time_within_train_step": 27.7087984085083, "step": 1134} +{"train_info/time_between_train_steps": 0.004998207092285156, "step": 1134} +{"info/global_step": 1135, "train_info/time_within_train_step": 27.685205698013306, "step": 1135} +{"train_info/time_between_train_steps": 0.005045890808105469, "step": 1135} +{"info/global_step": 1136, "train_info/time_within_train_step": 27.691391706466675, "step": 1136} +{"train_info/time_between_train_steps": 0.004934072494506836, "step": 1136} +{"info/global_step": 1137, "train_info/time_within_train_step": 27.69384765625, "step": 1137} +{"train_info/time_between_train_steps": 0.0050776004791259766, "step": 1137} +{"info/global_step": 1138, "train_info/time_within_train_step": 27.70546793937683, "step": 1138} +{"train_info/time_between_train_steps": 0.004965543746948242, "step": 1138} +{"info/global_step": 1139, "train_info/time_within_train_step": 27.702171802520752, "step": 1139} +{"train_info/time_between_train_steps": 0.004957437515258789, "step": 1139} +{"info/global_step": 1140, "train_info/time_within_train_step": 27.827261686325073, "step": 1140} +{"train_info/time_between_train_steps": 0.005076169967651367, "step": 1140} +{"info/global_step": 1141, "train_info/time_within_train_step": 27.702036380767822, "step": 1141} +{"train_info/time_between_train_steps": 0.004945278167724609, "step": 1141} +{"info/global_step": 1142, "train_info/time_within_train_step": 27.70251441001892, "step": 1142} +{"train_info/time_between_train_steps": 0.005093812942504883, "step": 1142} +{"info/global_step": 1143, "train_info/time_within_train_step": 27.71976137161255, "step": 1143} +{"train_info/time_between_train_steps": 0.007117271423339844, "step": 1143} +{"info/global_step": 1144, "train_info/time_within_train_step": 27.748441219329834, "step": 1144} +{"train_info/time_between_train_steps": 0.005579710006713867, "step": 1144} +{"info/global_step": 1145, "train_info/time_within_train_step": 27.712103843688965, "step": 1145} +{"train_info/time_between_train_steps": 0.00545501708984375, "step": 1145} +{"info/global_step": 1146, "train_info/time_within_train_step": 27.70959234237671, "step": 1146} +{"train_info/time_between_train_steps": 0.006072521209716797, "step": 1146} +{"info/global_step": 1147, "train_info/time_within_train_step": 27.711390018463135, "step": 1147} +{"train_info/time_between_train_steps": 0.0056798458099365234, "step": 1147} +{"info/global_step": 1148, "train_info/time_within_train_step": 27.72747778892517, "step": 1148} +{"train_info/time_between_train_steps": 0.0015532970428466797, "step": 1148} +{"train_info/time_between_train_steps": 3.562267780303955, "step": 1148} +{"info/global_step": 1149, "train_info/time_within_train_step": 27.718398809432983, "step": 1149} +{"train_info/time_between_train_steps": 0.0075321197509765625, "step": 1149} +{"info/global_step": 1150, "train_info/time_within_train_step": 27.851614236831665, "step": 1150} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27358.0, "train_info/memory_max_reserved": 27358.0, "_timestamp": 1740855753, "_runtime": 32750}, "step": 1150} +{"logs": {"train/loss": 2.9831, "train/learning_rate": 2.7777777777777772e-05, "train/epoch": 41.0, "_timestamp": 1740855753, "_runtime": 32750}, "step": 1150} +{"train_info/time_between_train_steps": 0.025563955307006836, "step": 1150} +{"info/global_step": 1151, "train_info/time_within_train_step": 27.708900451660156, "step": 1151} +{"train_info/time_between_train_steps": 0.0050547122955322266, "step": 1151} +{"info/global_step": 1152, "train_info/time_within_train_step": 27.83734703063965, "step": 1152} +{"train_info/time_between_train_steps": 0.005340099334716797, "step": 1152} +{"info/global_step": 1153, "train_info/time_within_train_step": 27.70962142944336, "step": 1153} +{"train_info/time_between_train_steps": 0.005469322204589844, "step": 1153} +{"info/global_step": 1154, "train_info/time_within_train_step": 27.84589457511902, "step": 1154} +{"train_info/time_between_train_steps": 0.004991292953491211, "step": 1154} +{"info/global_step": 1155, "train_info/time_within_train_step": 27.753668069839478, "step": 1155} +{"train_info/time_between_train_steps": 0.005449056625366211, "step": 1155} +{"info/global_step": 1156, "train_info/time_within_train_step": 27.858542680740356, "step": 1156} +{"train_info/time_between_train_steps": 0.007281064987182617, "step": 1156} +{"info/global_step": 1157, "train_info/time_within_train_step": 27.698307275772095, "step": 1157} +{"train_info/time_between_train_steps": 0.0053157806396484375, "step": 1157} +{"info/global_step": 1158, "train_info/time_within_train_step": 27.70074486732483, "step": 1158} +{"train_info/time_between_train_steps": 0.005322694778442383, "step": 1158} +{"info/global_step": 1159, "train_info/time_within_train_step": 27.73197364807129, "step": 1159} +{"train_info/time_between_train_steps": 0.004992961883544922, "step": 1159} +{"info/global_step": 1160, "train_info/time_within_train_step": 27.706050872802734, "step": 1160} +{"train_info/time_between_train_steps": 0.0050470829010009766, "step": 1160} +{"info/global_step": 1161, "train_info/time_within_train_step": 27.692108869552612, "step": 1161} +{"train_info/time_between_train_steps": 0.005074262619018555, "step": 1161} +{"info/global_step": 1162, "train_info/time_within_train_step": 27.728121995925903, "step": 1162} +{"train_info/time_between_train_steps": 0.005043983459472656, "step": 1162} +{"info/global_step": 1163, "train_info/time_within_train_step": 27.70032572746277, "step": 1163} +{"train_info/time_between_train_steps": 0.008037090301513672, "step": 1163} +{"info/global_step": 1164, "train_info/time_within_train_step": 27.699866771697998, "step": 1164} +{"train_info/time_between_train_steps": 0.004914999008178711, "step": 1164} +{"info/global_step": 1165, "train_info/time_within_train_step": 27.68434500694275, "step": 1165} +{"train_info/time_between_train_steps": 0.00508880615234375, "step": 1165} +{"info/global_step": 1166, "train_info/time_within_train_step": 27.70005774497986, "step": 1166} +{"train_info/time_between_train_steps": 0.005069732666015625, "step": 1166} +{"info/global_step": 1167, "train_info/time_within_train_step": 27.696506023406982, "step": 1167} +{"train_info/time_between_train_steps": 0.0050296783447265625, "step": 1167} +{"info/global_step": 1168, "train_info/time_within_train_step": 27.689997911453247, "step": 1168} +{"train_info/time_between_train_steps": 0.00497889518737793, "step": 1168} +{"info/global_step": 1169, "train_info/time_within_train_step": 27.689486026763916, "step": 1169} +{"train_info/time_between_train_steps": 0.005052089691162109, "step": 1169} +{"info/global_step": 1170, "train_info/time_within_train_step": 27.71967601776123, "step": 1170} +{"train_info/time_between_train_steps": 0.00509333610534668, "step": 1170} +{"info/global_step": 1171, "train_info/time_within_train_step": 27.68492817878723, "step": 1171} +{"train_info/time_between_train_steps": 0.006108760833740234, "step": 1171} +{"info/global_step": 1172, "train_info/time_within_train_step": 27.798471689224243, "step": 1172} +{"train_info/time_between_train_steps": 0.0052642822265625, "step": 1172} +{"info/global_step": 1173, "train_info/time_within_train_step": 27.724548816680908, "step": 1173} +{"train_info/time_between_train_steps": 0.005867481231689453, "step": 1173} +{"info/global_step": 1174, "train_info/time_within_train_step": 27.719910144805908, "step": 1174} +{"train_info/time_between_train_steps": 0.006205320358276367, "step": 1174} +{"info/global_step": 1175, "train_info/time_within_train_step": 27.73900294303894, "step": 1175} +{"train_info/time_between_train_steps": 0.005938053131103516, "step": 1175} +{"info/global_step": 1176, "train_info/time_within_train_step": 27.727696657180786, "step": 1176} +{"train_info/time_between_train_steps": 0.0015213489532470703, "step": 1176} +{"train_info/time_between_train_steps": 3.2666282653808594, "step": 1176} +{"info/global_step": 1177, "train_info/time_within_train_step": 27.710946321487427, "step": 1177} +{"train_info/time_between_train_steps": 0.0072307586669921875, "step": 1177} +{"info/global_step": 1178, "train_info/time_within_train_step": 27.846046447753906, "step": 1178} +{"train_info/time_between_train_steps": 0.007917165756225586, "step": 1178} +{"info/global_step": 1179, "train_info/time_within_train_step": 27.735076665878296, "step": 1179} +{"train_info/time_between_train_steps": 0.00687861442565918, "step": 1179} +{"info/global_step": 1180, "train_info/time_within_train_step": 27.83994483947754, "step": 1180} +{"train_info/time_between_train_steps": 0.005432605743408203, "step": 1180} +{"info/global_step": 1181, "train_info/time_within_train_step": 28.810654640197754, "step": 1181} +{"train_info/time_between_train_steps": 0.0059549808502197266, "step": 1181} +{"info/global_step": 1182, "train_info/time_within_train_step": 28.331612825393677, "step": 1182} +{"train_info/time_between_train_steps": 0.0055866241455078125, "step": 1182} +{"info/global_step": 1183, "train_info/time_within_train_step": 29.139237642288208, "step": 1183} +{"train_info/time_between_train_steps": 0.006341695785522461, "step": 1183} +{"info/global_step": 1184, "train_info/time_within_train_step": 27.82138752937317, "step": 1184} +{"train_info/time_between_train_steps": 0.006087303161621094, "step": 1184} +{"info/global_step": 1185, "train_info/time_within_train_step": 27.73844623565674, "step": 1185} +{"train_info/time_between_train_steps": 0.005465507507324219, "step": 1185} +{"info/global_step": 1186, "train_info/time_within_train_step": 27.700336933135986, "step": 1186} +{"train_info/time_between_train_steps": 0.009825468063354492, "step": 1186} +{"info/global_step": 1187, "train_info/time_within_train_step": 27.829964876174927, "step": 1187} +{"train_info/time_between_train_steps": 0.00507664680480957, "step": 1187} +{"info/global_step": 1188, "train_info/time_within_train_step": 27.70087432861328, "step": 1188} +{"train_info/time_between_train_steps": 0.015006065368652344, "step": 1188} +{"info/global_step": 1189, "train_info/time_within_train_step": 27.85968017578125, "step": 1189} +{"train_info/time_between_train_steps": 0.005117654800415039, "step": 1189} +{"info/global_step": 1190, "train_info/time_within_train_step": 27.71331214904785, "step": 1190} +{"train_info/time_between_train_steps": 0.005063056945800781, "step": 1190} +{"info/global_step": 1191, "train_info/time_within_train_step": 27.7104389667511, "step": 1191} +{"train_info/time_between_train_steps": 0.00611114501953125, "step": 1191} +{"info/global_step": 1192, "train_info/time_within_train_step": 27.70087242126465, "step": 1192} +{"train_info/time_between_train_steps": 0.009768486022949219, "step": 1192} +{"info/global_step": 1193, "train_info/time_within_train_step": 27.693838119506836, "step": 1193} +{"train_info/time_between_train_steps": 0.005204677581787109, "step": 1193} +{"info/global_step": 1194, "train_info/time_within_train_step": 27.700496196746826, "step": 1194} +{"train_info/time_between_train_steps": 0.005087852478027344, "step": 1194} +{"info/global_step": 1195, "train_info/time_within_train_step": 27.705958127975464, "step": 1195} +{"train_info/time_between_train_steps": 0.005042314529418945, "step": 1195} +{"info/global_step": 1196, "train_info/time_within_train_step": 27.740895986557007, "step": 1196} +{"train_info/time_between_train_steps": 0.0054547786712646484, "step": 1196} +{"info/global_step": 1197, "train_info/time_within_train_step": 27.71330213546753, "step": 1197} +{"train_info/time_between_train_steps": 0.00714111328125, "step": 1197} +{"info/global_step": 1198, "train_info/time_within_train_step": 27.709962844848633, "step": 1198} +{"train_info/time_between_train_steps": 0.0050868988037109375, "step": 1198} +{"info/global_step": 1199, "train_info/time_within_train_step": 27.83929991722107, "step": 1199} +{"train_info/time_between_train_steps": 0.00975656509399414, "step": 1199} +{"info/global_step": 1200, "train_info/time_within_train_step": 27.737732648849487, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27358.0, "train_info/memory_max_reserved": 27358.0, "_timestamp": 1740857150, "_runtime": 34147}, "step": 1200} +{"logs": {"train/loss": 2.9595, "train/learning_rate": 0.0, "train/epoch": 42.02, "_timestamp": 1740857150, "_runtime": 34147}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27358.0, "train_info/memory_max_reserved": 27358.0, "_timestamp": 1740857190, "_runtime": 34187}, "step": 1200} +{"logs": {"train/train_runtime": 34189.2249, "train/train_samples_per_second": 17.971, "train/train_steps_per_second": 0.035, "train/total_flos": 3.21097597452288e+17, "train/train_loss": 3.904088788032532, "train/epoch": 42.02, "_timestamp": 1740857190, "_runtime": 34187}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1922.4453125, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27358.0, "train_info/memory_max_reserved": 27358.0, "_timestamp": 1740857209, "_runtime": 34206}, "step": 1200} +{"logs": {"eval/loss": 4.216039657592773, "eval/runtime": 4.0167, "eval/samples_per_second": 50.539, "eval/steps_per_second": 3.236, "train/epoch": 42.02, "_timestamp": 1740857209, "_runtime": 34206}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1922.4453125, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27358.0, "train_info/memory_max_reserved": 27358.0, "_timestamp": 1740857209, "_runtime": 34206}, "step": 1200} +{"logs": {"eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_loss": 4.216039657592773, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_ppl": 67.76458122205067, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_runtime": 4.0167, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 50.539, "train/epoch": 42.02, "_timestamp": 1740857209, "_runtime": 34206}, "step": 1200} diff --git a/perturb_adj_num_np_det_en_EN_randinit_seed53.log b/perturb_adj_num_np_det_en_EN_randinit_seed53.log new file mode 100755 index 0000000000000000000000000000000000000000..ed85ba3abdd9d9d3c0219178fca3a2e286846b37 --- /dev/null +++ b/perturb_adj_num_np_det_en_EN_randinit_seed53.log @@ -0,0 +1,121 @@ +|=>> 03/01 [10:55:26] - mistral - INFO :: Starting Run: perturb_adj_num_np_det_en_EN_randinit_seed53... +|=>> 03/01 [10:55:26] - mistral - INFO :: Setting Random Seed to 53! +|=>> 03/01 [10:55:26] - mistral - INFO :: Building Tokenize and Initializing `gpt2-small` via AutoModel/AutoConfig... +|=>> 03/01 [10:55:26] - mistral - INFO :: Using Configs For Model From: /scratch/xiulyang/multilingual-LM/mistral/conf/models/gpt2-small-EN.json ... +|=>> 03/01 [10:55:26] - mistral.models.auto - INFO :: Building Hugging Face GPT2Config from provided configs: {'activation_function': 'gelu_new', 'architectures': ['GPT2LMHeadModel'], 'attn_pdrop': 0.1, 'bos_token_id': 50256, 'embd_pdrop': 0.1, 'eos_token_id': 50256, 'initializer_range': 0.02, 'layer_norm_epsilon': 1e-05, 'model_type': 'gpt2', 'n_ctx': 1024, 'n_embd': 768, 'n_head': 12, 'n_inner': None, 'n_layer': 12, 'n_positions': 1024, 'reorder_and_upcast_attn': True, 'resid_pdrop': 0.1, 'scale_attn_by_inverse_layer_idx': True, 'scale_attn_weights': True, 'summary_activation': None, 'summary_first_dropout': 0.2, 'summary_proj_to_labels': True, 'summary_type': 'cls_index', 'summary_use_proj': True, 'task_specific_params': {'text-generation': {'do_sample': True, 'max_length': 1024}}, 'torch_dtype': 'float32', 'transformers_version': '4.35.2', 'use_cache': False, 'vocab_size': 50257} ... +|=>> 03/01 [10:55:26] - mistral.models.auto - INFO :: Fetching Hugging Face [Fast] AutoTokenizer for Model: `gpt2`... +|=>> 03/01 [10:55:26] - mistral.models.auto - INFO :: Using a Pretokenized Dataset +|=>> 03/01 [10:55:26] - mistral.models.auto - INFO :: Initializing Custom GPT-2 Model from Configuration: `gpt2`... +|=>> 03/01 [10:55:30] - mistral - INFO :: Setting Training Arguments from Quinfig... +|=>> 03/01 [10:55:30] - mistral.args.training - INFO :: Setting Gradient Accumulation Steps = `64` [BSZ: 512 World Size: 1 Device BSZ: 8] +|=>> 03/01 [10:55:30] - mistral - INFO :: Downloading and Preprocessing Dataset `/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py`... +|=>> 03/01 [10:55:31] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Generating examples from = /scratch/xiulyang/multilingual-LM/data/multilingual/multilingual_data_perturbed/perturb_adj_num_np_det_en/train +|=>> 03/01 [10:55:32] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Total sentences: 1025689 +|=>> 03/01 [10:55:33] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Loading pre-tokenized data +|=>> 03/01 [10:55:37] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Concatenating tokenized data using EOS token +|=>> 03/01 [10:55:38] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Chunking tokens into sublists of 1024 +|=>> 03/01 [10:55:39] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Writing dataset as space-separated sequences of tokens +|=>> 03/01 [10:55:45] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Generating examples from = /scratch/xiulyang/multilingual-LM/data/multilingual/multilingual_data_perturbed/perturb_adj_num_np_det_en/dev +|=>> 03/01 [10:55:45] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Total sentences: 11410 +|=>> 03/01 [10:55:45] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Loading pre-tokenized data +|=>> 03/01 [10:55:45] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Concatenating tokenized data using EOS token +|=>> 03/01 [10:55:45] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Chunking tokens into sublists of 1024 +|=>> 03/01 [10:55:45] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Writing dataset as space-separated sequences of tokens +|=>> 03/01 [10:55:46] - mistral.corpora.auto - INFO :: Building Tokenized Indexed Dataset for {dataset_id}/{dataset_name}... +|=>> 03/01 [10:55:46] - mistral.corpora.auto - INFO :: Building Indexed Dataset for train +|=>> 03/01 [10:56:16] - mistral.corpora.auto - INFO :: Building Indexed Dataset for validation +|=>> 03/01 [10:56:17] - mistral - INFO :: Initializing Model Trainer... +|=>> 03/01 [10:56:17] - mistral - INFO :: Training Arguments: TrainingArguments( +_n_gpu=1, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +bf16=False, +bf16_full_eval=False, +data_seed=53, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=IntervalStrategy.STEPS, +fp16=True, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +gradient_accumulation_steps=64, +gradient_checkpointing=False, +greater_is_better=None, +group_by_length=False, +half_precision_backend=auto, +hub_model_id=None, +hub_strategy=HubStrategy.EVERY_SAVE, +hub_token=, +ignore_data_skip=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0006, +length_column_name=length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=logs, +logging_first_step=True, +logging_nan_inf_filter=True, +logging_steps=50, +logging_strategy=IntervalStrategy.STEPS, +lr_scheduler_type=SchedulerType.LINEAR, +max_grad_norm=1.0, +max_steps=1200, +metric_for_best_model=None, +mp_parameters=, +no_cuda=False, +num_train_epochs=3.0, +optim=OptimizerNames.ADAMW_HF, +output_dir=//scratch/xiulyang/multilingual_models/perturb_adj_num_np_det_en_EN_randinit/babylm_perturb_adj_num_np_det_en_EN_randinit_seed53/runs/perturb_adj_num_np_det_en_EN_randinit_seed53, +overwrite_output_dir=False, +past_index=-1, +per_device_eval_batch_size=16, +per_device_train_batch_size=8, +prediction_loss_only=True, +push_to_hub=False, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +remove_unused_columns=True, +report_to=[], +resume_from_checkpoint=None, +run_name=perturb_adj_num_np_det_en_EN_randinit_seed53, +save_on_each_node=False, +save_steps=1000, +save_strategy=IntervalStrategy.STEPS, +save_total_limit=None, +seed=53, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=120, +weight_decay=0.1, +xpu_backend=None, +) +|=>> 03/01 [10:56:25] - mistral.core.callbacks - INFO :: Setting W&B Project: xiulin-yang-compling +|=>> 03/01 [10:56:41] - mistral - INFO :: Training... +|=>> 03/01 [10:56:41] - mistral.core.callbacks - INFO :: Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" +|=>> 03/01 [20:26:45] - mistral - INFO :: ...and that's all folks! +|=>> 03/01 [20:26:45] - mistral - INFO :: Running final evaluation... diff --git a/pytorch_model.bin b/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..1435f70551d88361bd2e84bd9cb3dac06518f183 --- /dev/null +++ b/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fd243ed4a838b0199d67b4f4d0099095fd65044f10e04d0d01e4d02edbfddc9 +size 510396521 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/training_args.bin b/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..bca3042c603d146089deb3309dc6c6828df7acbf --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79b2014dac66e89fff48441e3acbe29cace7e30db0b4b50068d0d17e60e27a36 +size 3183