diff --git a/checkpoint-0/config.json b/checkpoint-0/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-0/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-0/pytorch_model.bin b/checkpoint-0/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..061288474e4dc90b01d25ca0d912dbf2bc2cbece --- /dev/null +++ b/checkpoint-0/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4cb7ddeb977797468ae0b2e8a977bb66b33b53cf9ce0a85051cfaa03f3f32eb +size 420912233 diff --git a/checkpoint-0/special_tokens_map.json b/checkpoint-0/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-0/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-0/tokenizer_config.json b/checkpoint-0/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-0/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-0/training_args.bin b/checkpoint-0/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..38f8469260f0d284612437800574b1fc07760030 --- /dev/null +++ b/checkpoint-0/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1caf1e1a70f50539b082bfbf9af6ffc327e561ca075be9c4a29ed12fb0374e03 +size 3183 diff --git a/checkpoint-100/config.json b/checkpoint-100/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-100/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-100/optimizer.pt b/checkpoint-100/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..0eb7640a25970a08c2926cdbe2aafe7b03795c2b --- /dev/null +++ b/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3af4371aa6fd61d09ba12c46134cf5b9be05e7533f8327b2b9098fb64b9f0251 +size 816635249 diff --git a/checkpoint-100/pytorch_model.bin b/checkpoint-100/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..c48ff6523745cac467024f2bba34578489f35e7d --- /dev/null +++ b/checkpoint-100/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:087b8a132ddac0100172c6dedf602814215fb38a43929e1d82a83ae40ff480ca +size 420912233 diff --git a/checkpoint-100/rng_state.pth b/checkpoint-100/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..6f98aeaedf0bf1c1ee5b51d4af4122f17114b6f8 --- /dev/null +++ b/checkpoint-100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:429f1439286572a341c131a80fe56abdf695e63636a6ef5197b4fbad4af96f86 +size 14567 diff --git a/checkpoint-100/scaler.pt b/checkpoint-100/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..45cc4a33e17645cb0ed4a911b11c77cb2e7ce7f3 --- /dev/null +++ b/checkpoint-100/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13a3423b2fe42f204bc8fe2c666ff379f9fd753a0f13613064a5e71e86b519e8 +size 559 diff --git a/checkpoint-100/scheduler.pt b/checkpoint-100/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..595eec9094e91b3eb24c2de88c461df2c22026ab --- /dev/null +++ b/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75e9b9d31d11c624d89b0c04ad496adf4b5addd3e703848d2583972c703e8da6 +size 623 diff --git a/checkpoint-100/special_tokens_map.json b/checkpoint-100/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-100/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-100/tokenizer_config.json b/checkpoint-100/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-100/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-100/trainer_state.json b/checkpoint-100/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..5c9b625e8a3a9825072fd66ed9a3a83055b6bd8b --- /dev/null +++ b/checkpoint-100/trainer_state.json @@ -0,0 +1,34 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0233333333333334, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.107, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9276, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9315, + "step": 100 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.7044770480128e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-100/training_args.bin b/checkpoint-100/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..38f8469260f0d284612437800574b1fc07760030 --- /dev/null +++ b/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1caf1e1a70f50539b082bfbf9af6ffc327e561ca075be9c4a29ed12fb0374e03 +size 3183 diff --git a/checkpoint-1000/config.json b/checkpoint-1000/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-1000/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-1000/optimizer.pt b/checkpoint-1000/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..17e12019de6f6ee1c9088a378dae87652a706ca1 --- /dev/null +++ b/checkpoint-1000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c88794987710c6706168c9a5b7b16ef15f343958d816b9587b71c426f9fa656a +size 816635441 diff --git a/checkpoint-1000/pytorch_model.bin b/checkpoint-1000/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..908edf2ecc6c9f238122db1ea7c56de15e5a9a08 --- /dev/null +++ b/checkpoint-1000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63b4a5121176851db0ffce2cb7f285d04e0988275bf1b3417a7790c2bf6331c8 +size 420912233 diff --git a/checkpoint-1000/rng_state.pth b/checkpoint-1000/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..9adf00148c4d5c827c600aaca89146c81d49a2e5 --- /dev/null +++ b/checkpoint-1000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78a3fbbacd1aaf2944e9f296a88ca1e695d21957bf36c2326e94db9e95ba8ab3 +size 14567 diff --git a/checkpoint-1000/scaler.pt b/checkpoint-1000/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..b04695d3a30e4bab2b78883d9c849c25c37ef7d7 --- /dev/null +++ b/checkpoint-1000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f810fc7b695697c440d8985f6042b4ba23a9e1027604c265718b518ca29f1b2b +size 559 diff --git a/checkpoint-1000/scheduler.pt b/checkpoint-1000/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..2a616899591a36b66ac0bd1ceeb324087c181a2a --- /dev/null +++ b/checkpoint-1000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0691206f4bd9ca409d6e7104087a4e0eb05df8f8f555a400f6ecc532edba52d8 +size 623 diff --git a/checkpoint-1000/special_tokens_map.json b/checkpoint-1000/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-1000/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-1000/tokenizer_config.json b/checkpoint-1000/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-1000/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-1000/trainer_state.json b/checkpoint-1000/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..e5a883285a3bb457c0e29628223f0f9d48e24e84 --- /dev/null +++ b/checkpoint-1000/trainer_state.json @@ -0,0 +1,158 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 27.023333333333333, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.107, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9276, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9315, + "step": 100 + }, + { + "epoch": 4.0, + "learning_rate": 0.0005833333333333333, + "loss": 4.3684, + "step": 150 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.0546, + "step": 200 + }, + { + "epoch": 6.03, + "learning_rate": 0.0005277777777777777, + "loss": 3.9329, + "step": 250 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005, + "loss": 3.83, + "step": 300 + }, + { + "epoch": 9.02, + "learning_rate": 0.00047222222222222224, + "loss": 3.6134, + "step": 350 + }, + { + "epoch": 11.0, + "learning_rate": 0.00044444444444444436, + "loss": 3.4876, + "step": 400 + }, + { + "epoch": 12.02, + "learning_rate": 0.00041666666666666664, + "loss": 3.3228, + "step": 450 + }, + { + "epoch": 13.03, + "learning_rate": 0.00038888888888888887, + "loss": 3.2223, + "step": 500 + }, + { + "epoch": 15.01, + "learning_rate": 0.0003611111111111111, + "loss": 3.1724, + "step": 550 + }, + { + "epoch": 16.02, + "learning_rate": 0.0003333333333333333, + "loss": 3.0668, + "step": 600 + }, + { + "epoch": 18.0, + "learning_rate": 0.00030555555555555555, + "loss": 3.0338, + "step": 650 + }, + { + "epoch": 19.01, + "learning_rate": 0.0002777777777777778, + "loss": 2.9462, + "step": 700 + }, + { + "epoch": 20.02, + "learning_rate": 0.00025, + "loss": 2.8965, + "step": 750 + }, + { + "epoch": 22.01, + "learning_rate": 0.00022222222222222218, + "loss": 2.8837, + "step": 800 + }, + { + "epoch": 23.02, + "learning_rate": 0.00019444444444444443, + "loss": 2.8106, + "step": 850 + }, + { + "epoch": 24.03, + "learning_rate": 0.00016666666666666666, + "loss": 2.7726, + "step": 900 + }, + { + "epoch": 26.01, + "learning_rate": 0.0001388888888888889, + "loss": 2.7668, + "step": 950 + }, + { + "epoch": 27.02, + "learning_rate": 0.00011111111111111109, + "loss": 2.7047, + "step": 1000 + }, + { + "epoch": 27.02, + "eval_loss": 3.4166903495788574, + "eval_runtime": 2.4384, + "eval_samples_per_second": 54.135, + "eval_steps_per_second": 3.691, + "step": 1000 + }, + { + "epoch": 27.02, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_loss": 3.4166903495788574, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_ppl": 30.468408184611675, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_runtime": 2.4384, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 54.135, + "step": 1000 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.71457337212928e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1000/training_args.bin b/checkpoint-1000/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..38f8469260f0d284612437800574b1fc07760030 --- /dev/null +++ b/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1caf1e1a70f50539b082bfbf9af6ffc327e561ca075be9c4a29ed12fb0374e03 +size 3183 diff --git a/checkpoint-1100/config.json b/checkpoint-1100/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-1100/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-1100/optimizer.pt b/checkpoint-1100/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..5eb0ac727c64650c2ebc588dca133c09bce68f6a --- /dev/null +++ b/checkpoint-1100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26869eac86d3343dc9e1e6c10465560863ed78df4a5c19598882ed8f5361dc10 +size 816635441 diff --git a/checkpoint-1100/pytorch_model.bin b/checkpoint-1100/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..956a60b13bb238dce68e67973d632937c429c30e --- /dev/null +++ b/checkpoint-1100/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ec24b2a082e9e94f17be0308182f3ccb2578dd90ddecd16d88094d7f44b65e5 +size 420912233 diff --git a/checkpoint-1100/rng_state.pth b/checkpoint-1100/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..8ce96e2122001cebd93ad9fd582622141b9d8923 --- /dev/null +++ b/checkpoint-1100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3897be86a20bc0fe33fe38889a6c032ca4561ef11b24b3dc72df4468fe8d416 +size 14567 diff --git a/checkpoint-1100/scaler.pt b/checkpoint-1100/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..37bf049fbd5fd721203bf0238edc8ff67dbd8f94 --- /dev/null +++ b/checkpoint-1100/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fb16c30b686aa43e110b0d33f9d46bf3127b7124542ca8dc34831233d4675a0 +size 559 diff --git a/checkpoint-1100/scheduler.pt b/checkpoint-1100/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..c158169ec0bb09620952d544fd4b4edea0cc9cf4 --- /dev/null +++ b/checkpoint-1100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f1b7713e4bb40428f29080b7d08d4a52f779ac863737861e4724292b2cf6c59 +size 623 diff --git a/checkpoint-1100/special_tokens_map.json b/checkpoint-1100/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-1100/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-1100/tokenizer_config.json b/checkpoint-1100/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-1100/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-1100/trainer_state.json b/checkpoint-1100/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..114af8615e399f2584c3d710ec78b8e3c0c2925e --- /dev/null +++ b/checkpoint-1100/trainer_state.json @@ -0,0 +1,170 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 30.016666666666666, + "global_step": 1100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.107, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9276, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9315, + "step": 100 + }, + { + "epoch": 4.0, + "learning_rate": 0.0005833333333333333, + "loss": 4.3684, + "step": 150 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.0546, + "step": 200 + }, + { + "epoch": 6.03, + "learning_rate": 0.0005277777777777777, + "loss": 3.9329, + "step": 250 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005, + "loss": 3.83, + "step": 300 + }, + { + "epoch": 9.02, + "learning_rate": 0.00047222222222222224, + "loss": 3.6134, + "step": 350 + }, + { + "epoch": 11.0, + "learning_rate": 0.00044444444444444436, + "loss": 3.4876, + "step": 400 + }, + { + "epoch": 12.02, + "learning_rate": 0.00041666666666666664, + "loss": 3.3228, + "step": 450 + }, + { + "epoch": 13.03, + "learning_rate": 0.00038888888888888887, + "loss": 3.2223, + "step": 500 + }, + { + "epoch": 15.01, + "learning_rate": 0.0003611111111111111, + "loss": 3.1724, + "step": 550 + }, + { + "epoch": 16.02, + "learning_rate": 0.0003333333333333333, + "loss": 3.0668, + "step": 600 + }, + { + "epoch": 18.0, + "learning_rate": 0.00030555555555555555, + "loss": 3.0338, + "step": 650 + }, + { + "epoch": 19.01, + "learning_rate": 0.0002777777777777778, + "loss": 2.9462, + "step": 700 + }, + { + "epoch": 20.02, + "learning_rate": 0.00025, + "loss": 2.8965, + "step": 750 + }, + { + "epoch": 22.01, + "learning_rate": 0.00022222222222222218, + "loss": 2.8837, + "step": 800 + }, + { + "epoch": 23.02, + "learning_rate": 0.00019444444444444443, + "loss": 2.8106, + "step": 850 + }, + { + "epoch": 24.03, + "learning_rate": 0.00016666666666666666, + "loss": 2.7726, + "step": 900 + }, + { + "epoch": 26.01, + "learning_rate": 0.0001388888888888889, + "loss": 2.7668, + "step": 950 + }, + { + "epoch": 27.02, + "learning_rate": 0.00011111111111111109, + "loss": 2.7047, + "step": 1000 + }, + { + "epoch": 27.02, + "eval_loss": 3.4166903495788574, + "eval_runtime": 2.4384, + "eval_samples_per_second": 54.135, + "eval_steps_per_second": 3.691, + "step": 1000 + }, + { + "epoch": 27.02, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_loss": 3.4166903495788574, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_ppl": 30.468408184611675, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_runtime": 2.4384, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 54.135, + "step": 1000 + }, + { + "epoch": 29.0, + "learning_rate": 8.333333333333333e-05, + "loss": 2.7036, + "step": 1050 + }, + { + "epoch": 30.02, + "learning_rate": 5.5555555555555545e-05, + "loss": 2.6476, + "step": 1100 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.9864634089472e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1100/training_args.bin b/checkpoint-1100/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..38f8469260f0d284612437800574b1fc07760030 --- /dev/null +++ b/checkpoint-1100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1caf1e1a70f50539b082bfbf9af6ffc327e561ca075be9c4a29ed12fb0374e03 +size 3183 diff --git a/checkpoint-1200/config.json b/checkpoint-1200/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-1200/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-1200/optimizer.pt b/checkpoint-1200/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..102f0771c1f8e2c78a41f03348a5f8ca1731eec4 --- /dev/null +++ b/checkpoint-1200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd3b906202ea19942b91613344b7002b7ac072723d58be3da794d3bfb4df4dd8 +size 816635441 diff --git a/checkpoint-1200/pytorch_model.bin b/checkpoint-1200/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..5cf8877bb352a529f8f9cd6ef07f9c62b0badbe7 --- /dev/null +++ b/checkpoint-1200/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:663dffe78009dcb4795164dcc0048e6af772e40af06c3bbc3269d589096ef61b +size 420912233 diff --git a/checkpoint-1200/rng_state.pth b/checkpoint-1200/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..1f11b28978310c8717328766f6491b41f4484902 --- /dev/null +++ b/checkpoint-1200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e505bf671d3805c2cb34fbe70d1ccf7a95ffdd9d3cc96c5a79c2defe8a3019b +size 14567 diff --git a/checkpoint-1200/scaler.pt b/checkpoint-1200/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..8953dddccbefc4703c09dcda27d83c15add2bade --- /dev/null +++ b/checkpoint-1200/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19c7277eaca0850ae3e9b6790b3d002d820169cce0671185e672c28c8ae8e056 +size 559 diff --git a/checkpoint-1200/scheduler.pt b/checkpoint-1200/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..310d39c17fc616a9c83286ed00f0f4cefba9f5df --- /dev/null +++ b/checkpoint-1200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:935a8fb09a6e9698d9894853b05e181b3f56098deaaecddde08e55f06bf000c4 +size 623 diff --git a/checkpoint-1200/special_tokens_map.json b/checkpoint-1200/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-1200/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-1200/tokenizer_config.json b/checkpoint-1200/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-1200/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-1200/trainer_state.json b/checkpoint-1200/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..4880ee477ad7d1f665f245d5892f5950ee056a33 --- /dev/null +++ b/checkpoint-1200/trainer_state.json @@ -0,0 +1,182 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 33.01, + "global_step": 1200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.107, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9276, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9315, + "step": 100 + }, + { + "epoch": 4.0, + "learning_rate": 0.0005833333333333333, + "loss": 4.3684, + "step": 150 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.0546, + "step": 200 + }, + { + "epoch": 6.03, + "learning_rate": 0.0005277777777777777, + "loss": 3.9329, + "step": 250 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005, + "loss": 3.83, + "step": 300 + }, + { + "epoch": 9.02, + "learning_rate": 0.00047222222222222224, + "loss": 3.6134, + "step": 350 + }, + { + "epoch": 11.0, + "learning_rate": 0.00044444444444444436, + "loss": 3.4876, + "step": 400 + }, + { + "epoch": 12.02, + "learning_rate": 0.00041666666666666664, + "loss": 3.3228, + "step": 450 + }, + { + "epoch": 13.03, + "learning_rate": 0.00038888888888888887, + "loss": 3.2223, + "step": 500 + }, + { + "epoch": 15.01, + "learning_rate": 0.0003611111111111111, + "loss": 3.1724, + "step": 550 + }, + { + "epoch": 16.02, + "learning_rate": 0.0003333333333333333, + "loss": 3.0668, + "step": 600 + }, + { + "epoch": 18.0, + "learning_rate": 0.00030555555555555555, + "loss": 3.0338, + "step": 650 + }, + { + "epoch": 19.01, + "learning_rate": 0.0002777777777777778, + "loss": 2.9462, + "step": 700 + }, + { + "epoch": 20.02, + "learning_rate": 0.00025, + "loss": 2.8965, + "step": 750 + }, + { + "epoch": 22.01, + "learning_rate": 0.00022222222222222218, + "loss": 2.8837, + "step": 800 + }, + { + "epoch": 23.02, + "learning_rate": 0.00019444444444444443, + "loss": 2.8106, + "step": 850 + }, + { + "epoch": 24.03, + "learning_rate": 0.00016666666666666666, + "loss": 2.7726, + "step": 900 + }, + { + "epoch": 26.01, + "learning_rate": 0.0001388888888888889, + "loss": 2.7668, + "step": 950 + }, + { + "epoch": 27.02, + "learning_rate": 0.00011111111111111109, + "loss": 2.7047, + "step": 1000 + }, + { + "epoch": 27.02, + "eval_loss": 3.4166903495788574, + "eval_runtime": 2.4384, + "eval_samples_per_second": 54.135, + "eval_steps_per_second": 3.691, + "step": 1000 + }, + { + "epoch": 27.02, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_loss": 3.4166903495788574, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_ppl": 30.468408184611675, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_runtime": 2.4384, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 54.135, + "step": 1000 + }, + { + "epoch": 29.0, + "learning_rate": 8.333333333333333e-05, + "loss": 2.7036, + "step": 1050 + }, + { + "epoch": 30.02, + "learning_rate": 5.5555555555555545e-05, + "loss": 2.6476, + "step": 1100 + }, + { + "epoch": 31.03, + "learning_rate": 2.7777777777777772e-05, + "loss": 2.6246, + "step": 1150 + }, + { + "epoch": 33.01, + "learning_rate": 0.0, + "loss": 2.6339, + "step": 1200 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 3.25835344576512e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1200/training_args.bin b/checkpoint-1200/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..38f8469260f0d284612437800574b1fc07760030 --- /dev/null +++ b/checkpoint-1200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1caf1e1a70f50539b082bfbf9af6ffc327e561ca075be9c4a29ed12fb0374e03 +size 3183 diff --git a/checkpoint-200/config.json b/checkpoint-200/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-200/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-200/optimizer.pt b/checkpoint-200/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..5a758c7053dcda1c856e7e92d5c4143473082586 --- /dev/null +++ b/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39870001c129900d3acdc3819132695063bc72f3b16c6fa8298ec6d7a8281e91 +size 816635249 diff --git a/checkpoint-200/pytorch_model.bin b/checkpoint-200/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..cc9bfbcca61f8edb5033a385f923fb1aeb80e848 --- /dev/null +++ b/checkpoint-200/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f263f5a1d105f61577f129b530ca59a501c9c660abdcfb501890a12c9c2d3158 +size 420912233 diff --git a/checkpoint-200/rng_state.pth b/checkpoint-200/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..41e8f9ce856bdd1a496db4ff54ef010361c9c761 --- /dev/null +++ b/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:944bf41a4a7dee4b7fa831032e4d90adc430c72bbcc8fe7bcdb7edc8cd275776 +size 14567 diff --git a/checkpoint-200/scaler.pt b/checkpoint-200/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..365b52ebf376498237a843f6d7332e5a49b14902 --- /dev/null +++ b/checkpoint-200/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb6982c29cd162f49aeb531674acf574eccd46a8f556bec596040d7c3b95200a +size 559 diff --git a/checkpoint-200/scheduler.pt b/checkpoint-200/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..36cb8da739c80a63971a62f06f781c40ac0fceb2 --- /dev/null +++ b/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a992625ada6d884e508ff9392d16738b4a4163147f8fcbf9f46be82ecae9888 +size 623 diff --git a/checkpoint-200/special_tokens_map.json b/checkpoint-200/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-200/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-200/tokenizer_config.json b/checkpoint-200/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-200/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-200/trainer_state.json b/checkpoint-200/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..613d1791f29ae2952e8177d59ad3bd77355d3469 --- /dev/null +++ b/checkpoint-200/trainer_state.json @@ -0,0 +1,46 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.016666666666667, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.107, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9276, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9315, + "step": 100 + }, + { + "epoch": 4.0, + "learning_rate": 0.0005833333333333333, + "loss": 4.3684, + "step": 150 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.0546, + "step": 200 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 5.423377416192e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-200/training_args.bin b/checkpoint-200/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..38f8469260f0d284612437800574b1fc07760030 --- /dev/null +++ b/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1caf1e1a70f50539b082bfbf9af6ffc327e561ca075be9c4a29ed12fb0374e03 +size 3183 diff --git a/checkpoint-300/config.json b/checkpoint-300/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-300/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-300/optimizer.pt b/checkpoint-300/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..3032e653335f0ec97a5f3895f1a311f1b529a0b2 --- /dev/null +++ b/checkpoint-300/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85f992ad368b50984c5d9a21107535cf1fb6aed794494c9603b5d1f273f4ab54 +size 816635441 diff --git a/checkpoint-300/pytorch_model.bin b/checkpoint-300/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..b1b34aff5e68c43313365715b572edae25c21366 --- /dev/null +++ b/checkpoint-300/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:799f6f0e131587dfa7d5ab6b185baf9ffe9ab0aed5e6611f94d9d2db503f902b +size 420912233 diff --git a/checkpoint-300/rng_state.pth b/checkpoint-300/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..8bc1688e48d0e195487d275c7f3529ceb678ade4 --- /dev/null +++ b/checkpoint-300/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ee1f604df21172369363613f0db0214481402214b9cea77c1aa27085b6b9bbb +size 14567 diff --git a/checkpoint-300/scaler.pt b/checkpoint-300/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..01066cf4761ea9d2f7962f5181762f7b08690b79 --- /dev/null +++ b/checkpoint-300/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0967b9f865f16344c55f5ccc3cf7d6e8e97ca61dda304e931ca6bad130f48dd1 +size 559 diff --git a/checkpoint-300/scheduler.pt b/checkpoint-300/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..681987f21e79cd4685aac32a5e6b74341e25d936 --- /dev/null +++ b/checkpoint-300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1db899b266916f792a0898ceb27a87eaf76647f10c29cc0c13ce22f12a12efd +size 623 diff --git a/checkpoint-300/special_tokens_map.json b/checkpoint-300/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-300/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-300/tokenizer_config.json b/checkpoint-300/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-300/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-300/trainer_state.json b/checkpoint-300/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..d9b2cb0d944fa695f53a7e83818e06e758ee4f4b --- /dev/null +++ b/checkpoint-300/trainer_state.json @@ -0,0 +1,58 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 8.01, + "global_step": 300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.107, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9276, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9315, + "step": 100 + }, + { + "epoch": 4.0, + "learning_rate": 0.0005833333333333333, + "loss": 4.3684, + "step": 150 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.0546, + "step": 200 + }, + { + "epoch": 6.03, + "learning_rate": 0.0005277777777777777, + "loss": 3.9329, + "step": 250 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005, + "loss": 3.83, + "step": 300 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 8.1422777843712e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-300/training_args.bin b/checkpoint-300/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..38f8469260f0d284612437800574b1fc07760030 --- /dev/null +++ b/checkpoint-300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1caf1e1a70f50539b082bfbf9af6ffc327e561ca075be9c4a29ed12fb0374e03 +size 3183 diff --git a/checkpoint-400/config.json b/checkpoint-400/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-400/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-400/optimizer.pt b/checkpoint-400/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..f2046f6c76ab3643e1d033d8a339f5bcaaf9cc13 --- /dev/null +++ b/checkpoint-400/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0ad291f0861c0a1ce954a64524331dafab87ac47a64e14640c94db514cfa212 +size 816635441 diff --git a/checkpoint-400/pytorch_model.bin b/checkpoint-400/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..813c4946c3187c123757034a8a504e4dbce5ed65 --- /dev/null +++ b/checkpoint-400/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e4999edc88caf6b8231743faaa5e16a1f8a108b8d333e1bd18f5ac50396505c +size 420912233 diff --git a/checkpoint-400/rng_state.pth b/checkpoint-400/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..656eb3629791bbc4a543b7060b6f576c98ed7210 --- /dev/null +++ b/checkpoint-400/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e63c60e2a449faea85102f85f629d790cec0ac12105aa80971ceb5d021f481fc +size 14567 diff --git a/checkpoint-400/scaler.pt b/checkpoint-400/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..9c7aef4199e98d81152810b661dbaffc01963383 --- /dev/null +++ b/checkpoint-400/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:476e510c8ea7edbd2b51d1e76a4e037820a5639381c0d8b5d32dafa492795a1e +size 559 diff --git a/checkpoint-400/scheduler.pt b/checkpoint-400/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..967673cfc91836d239beb5a7ede06992232ba309 --- /dev/null +++ b/checkpoint-400/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db087d678047b5c346bbb8511936612c1fdf223c6fd70321e97369bc31ed76a8 +size 623 diff --git a/checkpoint-400/special_tokens_map.json b/checkpoint-400/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-400/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-400/tokenizer_config.json b/checkpoint-400/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-400/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-400/trainer_state.json b/checkpoint-400/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..388cffcac154054c1fd9ccf2c685c07e730eba9c --- /dev/null +++ b/checkpoint-400/trainer_state.json @@ -0,0 +1,70 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 11.003333333333334, + "global_step": 400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.107, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9276, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9315, + "step": 100 + }, + { + "epoch": 4.0, + "learning_rate": 0.0005833333333333333, + "loss": 4.3684, + "step": 150 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.0546, + "step": 200 + }, + { + "epoch": 6.03, + "learning_rate": 0.0005277777777777777, + "loss": 3.9329, + "step": 250 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005, + "loss": 3.83, + "step": 300 + }, + { + "epoch": 9.02, + "learning_rate": 0.00047222222222222224, + "loss": 3.6134, + "step": 350 + }, + { + "epoch": 11.0, + "learning_rate": 0.00044444444444444436, + "loss": 3.4876, + "step": 400 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.08611781525504e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-400/training_args.bin b/checkpoint-400/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..38f8469260f0d284612437800574b1fc07760030 --- /dev/null +++ b/checkpoint-400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1caf1e1a70f50539b082bfbf9af6ffc327e561ca075be9c4a29ed12fb0374e03 +size 3183 diff --git a/checkpoint-500/config.json b/checkpoint-500/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-500/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-500/optimizer.pt b/checkpoint-500/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..099d0ed9823a79db6ea85da90314184defdc2f1f --- /dev/null +++ b/checkpoint-500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d946e1e19eedc62cc03b5880723288ef3e25fe5cb7fb140a92926612ca0b396 +size 816635441 diff --git a/checkpoint-500/pytorch_model.bin b/checkpoint-500/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..2a3cb13ab633882024dff257ce2ad16667883bf4 --- /dev/null +++ b/checkpoint-500/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c07c4b69ee8ea136dd0534b9d6820f0596c3cfca14411c75e07a580ba2d7c3a +size 420912233 diff --git a/checkpoint-500/rng_state.pth b/checkpoint-500/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..4a870b4b065e9b75ee42f8a198bc344cc3abfa6b --- /dev/null +++ b/checkpoint-500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a06add09933911c01237e09c1f662f603cf3a101c5d31a823b856559be65276 +size 14567 diff --git a/checkpoint-500/scaler.pt b/checkpoint-500/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..e8b96c9c2837f2c95b1d07d4fc3f245f9ad1ef62 --- /dev/null +++ b/checkpoint-500/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fa4c7be44c959599b8b43bb9bc3371e9e4e5bbc5758b3ab5afcccfda3e72e67 +size 559 diff --git a/checkpoint-500/scheduler.pt b/checkpoint-500/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..77b958181a5b47b022b96b38a6207f274a1b6604 --- /dev/null +++ b/checkpoint-500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:026fae4a90d56c24de94b10dfa7a75b6ba4e43bd5c1a3fdb2d77356b81cd6f8a +size 623 diff --git a/checkpoint-500/special_tokens_map.json b/checkpoint-500/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-500/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-500/tokenizer_config.json b/checkpoint-500/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-500/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-500/trainer_state.json b/checkpoint-500/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..4e32b7905c65301fd0016eea25e447f633651ece --- /dev/null +++ b/checkpoint-500/trainer_state.json @@ -0,0 +1,82 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 13.026666666666667, + "global_step": 500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.107, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9276, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9315, + "step": 100 + }, + { + "epoch": 4.0, + "learning_rate": 0.0005833333333333333, + "loss": 4.3684, + "step": 150 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.0546, + "step": 200 + }, + { + "epoch": 6.03, + "learning_rate": 0.0005277777777777777, + "loss": 3.9329, + "step": 250 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005, + "loss": 3.83, + "step": 300 + }, + { + "epoch": 9.02, + "learning_rate": 0.00047222222222222224, + "loss": 3.6134, + "step": 350 + }, + { + "epoch": 11.0, + "learning_rate": 0.00044444444444444436, + "loss": 3.4876, + "step": 400 + }, + { + "epoch": 12.02, + "learning_rate": 0.00041666666666666664, + "loss": 3.3228, + "step": 450 + }, + { + "epoch": 13.03, + "learning_rate": 0.00038888888888888887, + "loss": 3.2223, + "step": 500 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.35656552005632e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-500/training_args.bin b/checkpoint-500/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..38f8469260f0d284612437800574b1fc07760030 --- /dev/null +++ b/checkpoint-500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1caf1e1a70f50539b082bfbf9af6ffc327e561ca075be9c4a29ed12fb0374e03 +size 3183 diff --git a/checkpoint-600/config.json b/checkpoint-600/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-600/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-600/optimizer.pt b/checkpoint-600/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..d3dc5d71ed9056d5841df20bbff17a64ddaf89c9 --- /dev/null +++ b/checkpoint-600/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab106d8ad65eeb9ea34999285b4a50f2501a927ba4e1ef9441c79d959ca0a8b0 +size 816635441 diff --git a/checkpoint-600/pytorch_model.bin b/checkpoint-600/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..26a70721c51b1ad292ce7ab70ee6e286993edb85 --- /dev/null +++ b/checkpoint-600/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd5aaf2f641a9ab8417f0fbe9679942f3d2e8cbe81bc508c771978774c7dd8c0 +size 420912233 diff --git a/checkpoint-600/rng_state.pth b/checkpoint-600/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..8367b85391181b57a22f8241ee62545f931782e3 --- /dev/null +++ b/checkpoint-600/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:897df2de52e73bba3930d4abea2afd4470eeef03b1c4425aa80b8d0942752e27 +size 14567 diff --git a/checkpoint-600/scaler.pt b/checkpoint-600/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..f95b3e36da01561ec333a83ee8419ad225633e06 --- /dev/null +++ b/checkpoint-600/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e8415b86bbce347c0df306b84a695add049c2a3b2d0b6f4dda3bf036d341150 +size 559 diff --git a/checkpoint-600/scheduler.pt b/checkpoint-600/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..62e5359badd619e7dad2c51d98fa8043d9948f0b --- /dev/null +++ b/checkpoint-600/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:700940432b1c2117248896e2ce5a58d93c051d92ea97707f74d76bf1ef24deee +size 623 diff --git a/checkpoint-600/special_tokens_map.json b/checkpoint-600/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-600/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-600/tokenizer_config.json b/checkpoint-600/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-600/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-600/trainer_state.json b/checkpoint-600/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..41037bee5f935c1c89bdcb92cc47a4d60d212544 --- /dev/null +++ b/checkpoint-600/trainer_state.json @@ -0,0 +1,94 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 16.02, + "global_step": 600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.107, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9276, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9315, + "step": 100 + }, + { + "epoch": 4.0, + "learning_rate": 0.0005833333333333333, + "loss": 4.3684, + "step": 150 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.0546, + "step": 200 + }, + { + "epoch": 6.03, + "learning_rate": 0.0005277777777777777, + "loss": 3.9329, + "step": 250 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005, + "loss": 3.83, + "step": 300 + }, + { + "epoch": 9.02, + "learning_rate": 0.00047222222222222224, + "loss": 3.6134, + "step": 350 + }, + { + "epoch": 11.0, + "learning_rate": 0.00044444444444444436, + "loss": 3.4876, + "step": 400 + }, + { + "epoch": 12.02, + "learning_rate": 0.00041666666666666664, + "loss": 3.3228, + "step": 450 + }, + { + "epoch": 13.03, + "learning_rate": 0.00038888888888888887, + "loss": 3.2223, + "step": 500 + }, + { + "epoch": 15.01, + "learning_rate": 0.0003611111111111111, + "loss": 3.1724, + "step": 550 + }, + { + "epoch": 16.02, + "learning_rate": 0.0003333333333333333, + "loss": 3.0668, + "step": 600 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.62845555687424e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-600/training_args.bin b/checkpoint-600/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..38f8469260f0d284612437800574b1fc07760030 --- /dev/null +++ b/checkpoint-600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1caf1e1a70f50539b082bfbf9af6ffc327e561ca075be9c4a29ed12fb0374e03 +size 3183 diff --git a/checkpoint-700/config.json b/checkpoint-700/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-700/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-700/optimizer.pt b/checkpoint-700/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..a6f74f7c54a55d2edcce1376ee6f5bc943ad6dc8 --- /dev/null +++ b/checkpoint-700/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:183a3815089adf2350e3d09b54a449af9a222f31e968e50e69fd44c8cea0b074 +size 816635441 diff --git a/checkpoint-700/pytorch_model.bin b/checkpoint-700/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..6130e804462ec472a04513afdea862c536727551 --- /dev/null +++ b/checkpoint-700/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5028d95b20afc34dbd33971bd8aa59c50797a6d90e6ff2820fac58aa18dc1237 +size 420912233 diff --git a/checkpoint-700/rng_state.pth b/checkpoint-700/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..ad60c888307225718091fe5cc81e82d1a4343b31 --- /dev/null +++ b/checkpoint-700/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9482d521f3196b82e6659c824c4061fa88600a60ea25a7b1821ca1cd1d4e68eb +size 14567 diff --git a/checkpoint-700/scaler.pt b/checkpoint-700/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..12f2b8ec834e54a2bd7cfdd0e07b0c6e125b6490 --- /dev/null +++ b/checkpoint-700/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fb213daf5cce18a5f92167ca14da9df084d907f2b9796efc4666630f312b58c +size 559 diff --git a/checkpoint-700/scheduler.pt b/checkpoint-700/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..42408b6253265af34ae78746144fbba9316e0d7e --- /dev/null +++ b/checkpoint-700/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2d4910fd408e002ebeff50d62bfb043dcae5ef658777d0c3ee4a3bbb515ec15 +size 623 diff --git a/checkpoint-700/special_tokens_map.json b/checkpoint-700/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-700/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-700/tokenizer_config.json b/checkpoint-700/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-700/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-700/trainer_state.json b/checkpoint-700/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..08187a422360226b724d1bef8c2a25651513c9da --- /dev/null +++ b/checkpoint-700/trainer_state.json @@ -0,0 +1,106 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 19.013333333333332, + "global_step": 700, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.107, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9276, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9315, + "step": 100 + }, + { + "epoch": 4.0, + "learning_rate": 0.0005833333333333333, + "loss": 4.3684, + "step": 150 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.0546, + "step": 200 + }, + { + "epoch": 6.03, + "learning_rate": 0.0005277777777777777, + "loss": 3.9329, + "step": 250 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005, + "loss": 3.83, + "step": 300 + }, + { + "epoch": 9.02, + "learning_rate": 0.00047222222222222224, + "loss": 3.6134, + "step": 350 + }, + { + "epoch": 11.0, + "learning_rate": 0.00044444444444444436, + "loss": 3.4876, + "step": 400 + }, + { + "epoch": 12.02, + "learning_rate": 0.00041666666666666664, + "loss": 3.3228, + "step": 450 + }, + { + "epoch": 13.03, + "learning_rate": 0.00038888888888888887, + "loss": 3.2223, + "step": 500 + }, + { + "epoch": 15.01, + "learning_rate": 0.0003611111111111111, + "loss": 3.1724, + "step": 550 + }, + { + "epoch": 16.02, + "learning_rate": 0.0003333333333333333, + "loss": 3.0668, + "step": 600 + }, + { + "epoch": 18.0, + "learning_rate": 0.00030555555555555555, + "loss": 3.0338, + "step": 650 + }, + { + "epoch": 19.01, + "learning_rate": 0.0002777777777777778, + "loss": 2.9462, + "step": 700 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.90034559369216e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-700/training_args.bin b/checkpoint-700/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..38f8469260f0d284612437800574b1fc07760030 --- /dev/null +++ b/checkpoint-700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1caf1e1a70f50539b082bfbf9af6ffc327e561ca075be9c4a29ed12fb0374e03 +size 3183 diff --git a/checkpoint-800/config.json b/checkpoint-800/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-800/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-800/optimizer.pt b/checkpoint-800/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..7a51b44284badc9a8d0b2d2d69724dbce52d9585 --- /dev/null +++ b/checkpoint-800/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e09e1abe5af0dbab3bdbf63b04ec82969fef452129774d4efb93b6dea77d49e5 +size 816635441 diff --git a/checkpoint-800/pytorch_model.bin b/checkpoint-800/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..b74ce1fd53334a081346ec83da00626ef1b54006 --- /dev/null +++ b/checkpoint-800/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:810b5b5609baa7525999784116fcc89a4b4835506c2ed134e424aa855bf6a842 +size 420912233 diff --git a/checkpoint-800/rng_state.pth b/checkpoint-800/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..e077710d6130cb85eebb1bd035b9ae9c7e204c6b --- /dev/null +++ b/checkpoint-800/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69adae0c7f1a82356f32db190c9359810440b99e36efd33da71d6976e62a29cf +size 14567 diff --git a/checkpoint-800/scaler.pt b/checkpoint-800/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..b3c73372264156b02df8dada2192ee3c96dd5fc4 --- /dev/null +++ b/checkpoint-800/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c2074cdcefbaa0a39f736d6b0f7bf018c350d49e85648bc8accc4f756ad816e +size 559 diff --git a/checkpoint-800/scheduler.pt b/checkpoint-800/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..75713dbce3771306ca00343ecc497c4f19a01d03 --- /dev/null +++ b/checkpoint-800/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b27fb255c84833fb6ab5d93679cb236a569de9a1c4f805f72a2f60a2bc7c7499 +size 623 diff --git a/checkpoint-800/special_tokens_map.json b/checkpoint-800/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-800/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-800/tokenizer_config.json b/checkpoint-800/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-800/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-800/trainer_state.json b/checkpoint-800/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..e5fed8735977ea4c9a209207d2a1820f4d3f949e --- /dev/null +++ b/checkpoint-800/trainer_state.json @@ -0,0 +1,118 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 22.006666666666668, + "global_step": 800, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.107, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9276, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9315, + "step": 100 + }, + { + "epoch": 4.0, + "learning_rate": 0.0005833333333333333, + "loss": 4.3684, + "step": 150 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.0546, + "step": 200 + }, + { + "epoch": 6.03, + "learning_rate": 0.0005277777777777777, + "loss": 3.9329, + "step": 250 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005, + "loss": 3.83, + "step": 300 + }, + { + "epoch": 9.02, + "learning_rate": 0.00047222222222222224, + "loss": 3.6134, + "step": 350 + }, + { + "epoch": 11.0, + "learning_rate": 0.00044444444444444436, + "loss": 3.4876, + "step": 400 + }, + { + "epoch": 12.02, + "learning_rate": 0.00041666666666666664, + "loss": 3.3228, + "step": 450 + }, + { + "epoch": 13.03, + "learning_rate": 0.00038888888888888887, + "loss": 3.2223, + "step": 500 + }, + { + "epoch": 15.01, + "learning_rate": 0.0003611111111111111, + "loss": 3.1724, + "step": 550 + }, + { + "epoch": 16.02, + "learning_rate": 0.0003333333333333333, + "loss": 3.0668, + "step": 600 + }, + { + "epoch": 18.0, + "learning_rate": 0.00030555555555555555, + "loss": 3.0338, + "step": 650 + }, + { + "epoch": 19.01, + "learning_rate": 0.0002777777777777778, + "loss": 2.9462, + "step": 700 + }, + { + "epoch": 20.02, + "learning_rate": 0.00025, + "loss": 2.8965, + "step": 750 + }, + { + "epoch": 22.01, + "learning_rate": 0.00022222222222222218, + "loss": 2.8837, + "step": 800 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.17223563051008e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-800/training_args.bin b/checkpoint-800/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..38f8469260f0d284612437800574b1fc07760030 --- /dev/null +++ b/checkpoint-800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1caf1e1a70f50539b082bfbf9af6ffc327e561ca075be9c4a29ed12fb0374e03 +size 3183 diff --git a/checkpoint-900/config.json b/checkpoint-900/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-900/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-900/optimizer.pt b/checkpoint-900/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..654fae6d65aa9b8b774c40b7ad583a05f4eddb12 --- /dev/null +++ b/checkpoint-900/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:452d7c615bb19a0388dcfe802857f2b1834c1a18ba973546763dad7ec509afc3 +size 816635441 diff --git a/checkpoint-900/pytorch_model.bin b/checkpoint-900/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..da89f30b06bc21ebb9e14a4e03b9129ad2598768 --- /dev/null +++ b/checkpoint-900/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80dc95f873abd786b51970229a43e91dc95586731979a2711ecd15b5f28c7f23 +size 420912233 diff --git a/checkpoint-900/rng_state.pth b/checkpoint-900/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..e05e48489e7eea50b2b03864965ecb2d97f5e825 --- /dev/null +++ b/checkpoint-900/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57452310eeb031b5cf43b7b2270438fb3c626e97a4a41975fc57487f710d8978 +size 14567 diff --git a/checkpoint-900/scaler.pt b/checkpoint-900/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..be54cb13c777bc6feccb478ff218e7e21fad482a --- /dev/null +++ b/checkpoint-900/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8695f57df923e22b943b0b0f2b9cc7007008e80b53ccee275b3a35963fe67e9 +size 559 diff --git a/checkpoint-900/scheduler.pt b/checkpoint-900/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..539d7c83ea252818dc9cbffac08cf340bb05a454 --- /dev/null +++ b/checkpoint-900/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2cb09e5db72772a15094286e93cbb61d745d9b63863703cf53da0bcb9827821 +size 623 diff --git a/checkpoint-900/special_tokens_map.json b/checkpoint-900/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-900/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-900/tokenizer_config.json b/checkpoint-900/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-900/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-900/trainer_state.json b/checkpoint-900/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..4b65f23c776444d4ce695d0b362f072773e32d07 --- /dev/null +++ b/checkpoint-900/trainer_state.json @@ -0,0 +1,130 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 24.03, + "global_step": 900, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.107, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9276, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9315, + "step": 100 + }, + { + "epoch": 4.0, + "learning_rate": 0.0005833333333333333, + "loss": 4.3684, + "step": 150 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.0546, + "step": 200 + }, + { + "epoch": 6.03, + "learning_rate": 0.0005277777777777777, + "loss": 3.9329, + "step": 250 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005, + "loss": 3.83, + "step": 300 + }, + { + "epoch": 9.02, + "learning_rate": 0.00047222222222222224, + "loss": 3.6134, + "step": 350 + }, + { + "epoch": 11.0, + "learning_rate": 0.00044444444444444436, + "loss": 3.4876, + "step": 400 + }, + { + "epoch": 12.02, + "learning_rate": 0.00041666666666666664, + "loss": 3.3228, + "step": 450 + }, + { + "epoch": 13.03, + "learning_rate": 0.00038888888888888887, + "loss": 3.2223, + "step": 500 + }, + { + "epoch": 15.01, + "learning_rate": 0.0003611111111111111, + "loss": 3.1724, + "step": 550 + }, + { + "epoch": 16.02, + "learning_rate": 0.0003333333333333333, + "loss": 3.0668, + "step": 600 + }, + { + "epoch": 18.0, + "learning_rate": 0.00030555555555555555, + "loss": 3.0338, + "step": 650 + }, + { + "epoch": 19.01, + "learning_rate": 0.0002777777777777778, + "loss": 2.9462, + "step": 700 + }, + { + "epoch": 20.02, + "learning_rate": 0.00025, + "loss": 2.8965, + "step": 750 + }, + { + "epoch": 22.01, + "learning_rate": 0.00022222222222222218, + "loss": 2.8837, + "step": 800 + }, + { + "epoch": 23.02, + "learning_rate": 0.00019444444444444443, + "loss": 2.8106, + "step": 850 + }, + { + "epoch": 24.03, + "learning_rate": 0.00016666666666666666, + "loss": 2.7726, + "step": 900 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.44268333531136e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-900/training_args.bin b/checkpoint-900/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..38f8469260f0d284612437800574b1fc07760030 --- /dev/null +++ b/checkpoint-900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1caf1e1a70f50539b082bfbf9af6ffc327e561ca075be9c4a29ed12fb0374e03 +size 3183 diff --git a/config.json b/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/metrics.json b/metrics.json new file mode 100755 index 0000000000000000000000000000000000000000..b7d9ca841d5ea4c778e94b0d1bc9b56d93dfe120 --- /dev/null +++ b/metrics.json @@ -0,0 +1,2494 @@ +{"num_parameters": 102068736, "trainable_parameters": 102068736, "step": 0} +{"train_info/time_between_train_steps": 4.119660377502441, "step": 0} +{"info/global_step": 1, "train_info/time_within_train_step": 25.273154735565186, "step": 1} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 17273.5732421875, "train_info/memory_reserved": 18442.0, "train_info/memory_max_reserved": 18442.0, "_timestamp": 1740922541, "_runtime": 48}, "step": 1} +{"logs": {"train/loss": 10.107, "train/learning_rate": 4.9999999999999996e-06, "train/epoch": 0.0, "_timestamp": 1740922541, "_runtime": 48}, "step": 1} +{"train_info/time_between_train_steps": 0.16227054595947266, "step": 1} +{"info/global_step": 2, "train_info/time_within_train_step": 24.899159908294678, "step": 2} +{"train_info/time_between_train_steps": 0.005263090133666992, "step": 2} +{"info/global_step": 3, "train_info/time_within_train_step": 24.76400637626648, "step": 3} +{"train_info/time_between_train_steps": 0.005812168121337891, "step": 3} +{"info/global_step": 4, "train_info/time_within_train_step": 24.9081392288208, "step": 4} +{"train_info/time_between_train_steps": 0.0054624080657958984, "step": 4} +{"info/global_step": 5, "train_info/time_within_train_step": 24.758665084838867, "step": 5} +{"train_info/time_between_train_steps": 0.005239725112915039, "step": 5} +{"info/global_step": 6, "train_info/time_within_train_step": 24.90164089202881, "step": 6} +{"train_info/time_between_train_steps": 0.00541996955871582, "step": 6} +{"info/global_step": 7, "train_info/time_within_train_step": 24.77679443359375, "step": 7} +{"train_info/time_between_train_steps": 0.005455970764160156, "step": 7} +{"info/global_step": 8, "train_info/time_within_train_step": 24.918128967285156, "step": 8} +{"train_info/time_between_train_steps": 0.005603313446044922, "step": 8} +{"info/global_step": 9, "train_info/time_within_train_step": 24.73108959197998, "step": 9} +{"train_info/time_between_train_steps": 0.005206584930419922, "step": 9} +{"info/global_step": 10, "train_info/time_within_train_step": 24.95818066596985, "step": 10} +{"train_info/time_between_train_steps": 0.005242586135864258, "step": 10} +{"info/global_step": 11, "train_info/time_within_train_step": 24.72966957092285, "step": 11} +{"train_info/time_between_train_steps": 0.005383014678955078, "step": 11} +{"info/global_step": 12, "train_info/time_within_train_step": 24.93461561203003, "step": 12} +{"train_info/time_between_train_steps": 0.005430936813354492, "step": 12} +{"info/global_step": 13, "train_info/time_within_train_step": 24.736960887908936, "step": 13} +{"train_info/time_between_train_steps": 0.005471467971801758, "step": 13} +{"info/global_step": 14, "train_info/time_within_train_step": 24.84140968322754, "step": 14} +{"train_info/time_between_train_steps": 0.005342721939086914, "step": 14} +{"info/global_step": 15, "train_info/time_within_train_step": 24.740407466888428, "step": 15} +{"train_info/time_between_train_steps": 0.005161285400390625, "step": 15} +{"info/global_step": 16, "train_info/time_within_train_step": 24.922091007232666, "step": 16} +{"train_info/time_between_train_steps": 0.005364179611206055, "step": 16} +{"info/global_step": 17, "train_info/time_within_train_step": 24.812851905822754, "step": 17} +{"train_info/time_between_train_steps": 0.023205995559692383, "step": 17} +{"info/global_step": 18, "train_info/time_within_train_step": 24.7176673412323, "step": 18} +{"train_info/time_between_train_steps": 0.00516057014465332, "step": 18} +{"info/global_step": 19, "train_info/time_within_train_step": 24.77604627609253, "step": 19} +{"train_info/time_between_train_steps": 0.005003690719604492, "step": 19} +{"info/global_step": 20, "train_info/time_within_train_step": 24.755600452423096, "step": 20} +{"train_info/time_between_train_steps": 0.0050051212310791016, "step": 20} +{"info/global_step": 21, "train_info/time_within_train_step": 24.73310375213623, "step": 21} +{"train_info/time_between_train_steps": 0.005129575729370117, "step": 21} +{"info/global_step": 22, "train_info/time_within_train_step": 24.714956521987915, "step": 22} +{"train_info/time_between_train_steps": 0.004967689514160156, "step": 22} +{"info/global_step": 23, "train_info/time_within_train_step": 24.72867178916931, "step": 23} +{"train_info/time_between_train_steps": 0.005055904388427734, "step": 23} +{"info/global_step": 24, "train_info/time_within_train_step": 24.790987730026245, "step": 24} +{"train_info/time_between_train_steps": 0.0052073001861572266, "step": 24} +{"info/global_step": 25, "train_info/time_within_train_step": 24.712308645248413, "step": 25} +{"train_info/time_between_train_steps": 0.005122661590576172, "step": 25} +{"info/global_step": 26, "train_info/time_within_train_step": 24.72137475013733, "step": 26} +{"train_info/time_between_train_steps": 0.005218505859375, "step": 26} +{"info/global_step": 27, "train_info/time_within_train_step": 24.723060846328735, "step": 27} +{"train_info/time_between_train_steps": 0.005139350891113281, "step": 27} +{"info/global_step": 28, "train_info/time_within_train_step": 24.717763900756836, "step": 28} +{"train_info/time_between_train_steps": 0.005037546157836914, "step": 28} +{"info/global_step": 29, "train_info/time_within_train_step": 24.773507118225098, "step": 29} +{"train_info/time_between_train_steps": 0.005243062973022461, "step": 29} +{"info/global_step": 30, "train_info/time_within_train_step": 24.725379467010498, "step": 30} +{"train_info/time_between_train_steps": 0.0051152706146240234, "step": 30} +{"info/global_step": 31, "train_info/time_within_train_step": 24.79283308982849, "step": 31} +{"train_info/time_between_train_steps": 0.005353212356567383, "step": 31} +{"info/global_step": 32, "train_info/time_within_train_step": 24.845596313476562, "step": 32} +{"train_info/time_between_train_steps": 0.005185127258300781, "step": 32} +{"info/global_step": 33, "train_info/time_within_train_step": 24.721897840499878, "step": 33} +{"train_info/time_between_train_steps": 0.0050847530364990234, "step": 33} +{"info/global_step": 34, "train_info/time_within_train_step": 24.7961323261261, "step": 34} +{"train_info/time_between_train_steps": 0.005456209182739258, "step": 34} +{"info/global_step": 35, "train_info/time_within_train_step": 24.740270853042603, "step": 35} +{"train_info/time_between_train_steps": 0.00601649284362793, "step": 35} +{"info/global_step": 36, "train_info/time_within_train_step": 24.820967435836792, "step": 36} +{"train_info/time_between_train_steps": 0.005698442459106445, "step": 36} +{"train_info/time_between_train_steps": 16.769776821136475, "step": 36} +{"info/global_step": 37, "train_info/time_within_train_step": 24.724176168441772, "step": 37} +{"train_info/time_between_train_steps": 0.005446910858154297, "step": 37} +{"info/global_step": 38, "train_info/time_within_train_step": 24.888243436813354, "step": 38} +{"train_info/time_between_train_steps": 0.005348682403564453, "step": 38} +{"info/global_step": 39, "train_info/time_within_train_step": 24.74074101448059, "step": 39} +{"train_info/time_between_train_steps": 0.005143404006958008, "step": 39} +{"info/global_step": 40, "train_info/time_within_train_step": 24.876956462860107, "step": 40} +{"train_info/time_between_train_steps": 0.005250215530395508, "step": 40} +{"info/global_step": 41, "train_info/time_within_train_step": 24.744599103927612, "step": 41} +{"train_info/time_between_train_steps": 0.005498647689819336, "step": 41} +{"info/global_step": 42, "train_info/time_within_train_step": 24.8730149269104, "step": 42} +{"train_info/time_between_train_steps": 0.005347013473510742, "step": 42} +{"info/global_step": 43, "train_info/time_within_train_step": 24.74907660484314, "step": 43} +{"train_info/time_between_train_steps": 0.0051729679107666016, "step": 43} +{"info/global_step": 44, "train_info/time_within_train_step": 24.91425371170044, "step": 44} +{"train_info/time_between_train_steps": 0.0052471160888671875, "step": 44} +{"info/global_step": 45, "train_info/time_within_train_step": 24.913200616836548, "step": 45} +{"train_info/time_between_train_steps": 0.005139350891113281, "step": 45} +{"info/global_step": 46, "train_info/time_within_train_step": 24.85678744316101, "step": 46} +{"train_info/time_between_train_steps": 0.005409717559814453, "step": 46} +{"info/global_step": 47, "train_info/time_within_train_step": 24.846288204193115, "step": 47} +{"train_info/time_between_train_steps": 0.010351181030273438, "step": 47} +{"info/global_step": 48, "train_info/time_within_train_step": 24.877975940704346, "step": 48} +{"train_info/time_between_train_steps": 0.005288839340209961, "step": 48} +{"info/global_step": 49, "train_info/time_within_train_step": 24.741082668304443, "step": 49} +{"train_info/time_between_train_steps": 0.005689859390258789, "step": 49} +{"info/global_step": 50, "train_info/time_within_train_step": 24.926923513412476, "step": 50} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740923775, "_runtime": 1282}, "step": 50} +{"logs": {"train/loss": 6.9276, "train/learning_rate": 0.00025, "train/epoch": 1.01, "_timestamp": 1740923775, "_runtime": 1282}, "step": 50} +{"train_info/time_between_train_steps": 0.02619314193725586, "step": 50} +{"info/global_step": 51, "train_info/time_within_train_step": 24.733765363693237, "step": 51} +{"train_info/time_between_train_steps": 0.0051915645599365234, "step": 51} +{"info/global_step": 52, "train_info/time_within_train_step": 24.846138954162598, "step": 52} +{"train_info/time_between_train_steps": 0.005495786666870117, "step": 52} +{"info/global_step": 53, "train_info/time_within_train_step": 24.75537919998169, "step": 53} +{"train_info/time_between_train_steps": 0.02640056610107422, "step": 53} +{"info/global_step": 54, "train_info/time_within_train_step": 24.724066257476807, "step": 54} +{"train_info/time_between_train_steps": 0.005005836486816406, "step": 54} +{"info/global_step": 55, "train_info/time_within_train_step": 24.776208639144897, "step": 55} +{"train_info/time_between_train_steps": 0.005117654800415039, "step": 55} +{"info/global_step": 56, "train_info/time_within_train_step": 24.73217535018921, "step": 56} +{"train_info/time_between_train_steps": 0.005234718322753906, "step": 56} +{"info/global_step": 57, "train_info/time_within_train_step": 24.72877788543701, "step": 57} +{"train_info/time_between_train_steps": 0.0051615238189697266, "step": 57} +{"info/global_step": 58, "train_info/time_within_train_step": 24.72805428504944, "step": 58} +{"train_info/time_between_train_steps": 0.005256175994873047, "step": 58} +{"info/global_step": 59, "train_info/time_within_train_step": 24.72071409225464, "step": 59} +{"train_info/time_between_train_steps": 0.005088090896606445, "step": 59} +{"info/global_step": 60, "train_info/time_within_train_step": 24.721163511276245, "step": 60} +{"train_info/time_between_train_steps": 0.010199785232543945, "step": 60} +{"info/global_step": 61, "train_info/time_within_train_step": 24.728107929229736, "step": 61} +{"train_info/time_between_train_steps": 0.010354757308959961, "step": 61} +{"info/global_step": 62, "train_info/time_within_train_step": 24.893946647644043, "step": 62} +{"train_info/time_between_train_steps": 0.005186557769775391, "step": 62} +{"info/global_step": 63, "train_info/time_within_train_step": 24.727864265441895, "step": 63} +{"train_info/time_between_train_steps": 0.010123968124389648, "step": 63} +{"info/global_step": 64, "train_info/time_within_train_step": 24.728338956832886, "step": 64} +{"train_info/time_between_train_steps": 0.010281801223754883, "step": 64} +{"info/global_step": 65, "train_info/time_within_train_step": 24.76189136505127, "step": 65} +{"train_info/time_between_train_steps": 0.010266304016113281, "step": 65} +{"info/global_step": 66, "train_info/time_within_train_step": 24.720656871795654, "step": 66} +{"train_info/time_between_train_steps": 0.010033369064331055, "step": 66} +{"info/global_step": 67, "train_info/time_within_train_step": 24.803853034973145, "step": 67} +{"train_info/time_between_train_steps": 0.005928993225097656, "step": 67} +{"info/global_step": 68, "train_info/time_within_train_step": 24.964896202087402, "step": 68} +{"train_info/time_between_train_steps": 0.005087375640869141, "step": 68} +{"info/global_step": 69, "train_info/time_within_train_step": 24.72969079017639, "step": 69} +{"train_info/time_between_train_steps": 0.005283355712890625, "step": 69} +{"info/global_step": 70, "train_info/time_within_train_step": 24.737666368484497, "step": 70} +{"train_info/time_between_train_steps": 0.00552678108215332, "step": 70} +{"info/global_step": 71, "train_info/time_within_train_step": 24.742443323135376, "step": 71} +{"train_info/time_between_train_steps": 0.006570100784301758, "step": 71} +{"info/global_step": 72, "train_info/time_within_train_step": 24.776593923568726, "step": 72} +{"train_info/time_between_train_steps": 0.0061910152435302734, "step": 72} +{"train_info/time_between_train_steps": 17.14392328262329, "step": 72} +{"info/global_step": 73, "train_info/time_within_train_step": 24.720908641815186, "step": 73} +{"train_info/time_between_train_steps": 0.009241342544555664, "step": 73} +{"info/global_step": 74, "train_info/time_within_train_step": 24.987294912338257, "step": 74} +{"train_info/time_between_train_steps": 0.005404949188232422, "step": 74} +{"info/global_step": 75, "train_info/time_within_train_step": 24.731191158294678, "step": 75} +{"train_info/time_between_train_steps": 0.005391120910644531, "step": 75} +{"info/global_step": 76, "train_info/time_within_train_step": 24.9228732585907, "step": 76} +{"train_info/time_between_train_steps": 0.007287502288818359, "step": 76} +{"info/global_step": 77, "train_info/time_within_train_step": 24.734930276870728, "step": 77} +{"train_info/time_between_train_steps": 0.005297660827636719, "step": 77} +{"info/global_step": 78, "train_info/time_within_train_step": 24.99625539779663, "step": 78} +{"train_info/time_between_train_steps": 0.005424976348876953, "step": 78} +{"info/global_step": 79, "train_info/time_within_train_step": 24.729117393493652, "step": 79} +{"train_info/time_between_train_steps": 0.005209684371948242, "step": 79} +{"info/global_step": 80, "train_info/time_within_train_step": 24.873370885849, "step": 80} +{"train_info/time_between_train_steps": 0.005306243896484375, "step": 80} +{"info/global_step": 81, "train_info/time_within_train_step": 24.76773691177368, "step": 81} +{"train_info/time_between_train_steps": 0.0053806304931640625, "step": 81} +{"info/global_step": 82, "train_info/time_within_train_step": 24.859867811203003, "step": 82} +{"train_info/time_between_train_steps": 0.010189056396484375, "step": 82} +{"info/global_step": 83, "train_info/time_within_train_step": 24.8145592212677, "step": 83} +{"train_info/time_between_train_steps": 0.010822772979736328, "step": 83} +{"info/global_step": 84, "train_info/time_within_train_step": 24.873509407043457, "step": 84} +{"train_info/time_between_train_steps": 0.005445718765258789, "step": 84} +{"info/global_step": 85, "train_info/time_within_train_step": 24.732080459594727, "step": 85} +{"train_info/time_between_train_steps": 0.005346775054931641, "step": 85} +{"info/global_step": 86, "train_info/time_within_train_step": 24.945415258407593, "step": 86} +{"train_info/time_between_train_steps": 0.00546717643737793, "step": 86} +{"info/global_step": 87, "train_info/time_within_train_step": 24.738391399383545, "step": 87} +{"train_info/time_between_train_steps": 0.00524449348449707, "step": 87} +{"info/global_step": 88, "train_info/time_within_train_step": 24.831838607788086, "step": 88} +{"train_info/time_between_train_steps": 0.010589361190795898, "step": 88} +{"info/global_step": 89, "train_info/time_within_train_step": 24.759670972824097, "step": 89} +{"train_info/time_between_train_steps": 0.02579951286315918, "step": 89} +{"info/global_step": 90, "train_info/time_within_train_step": 24.7477285861969, "step": 90} +{"train_info/time_between_train_steps": 0.010054349899291992, "step": 90} +{"info/global_step": 91, "train_info/time_within_train_step": 24.75909996032715, "step": 91} +{"train_info/time_between_train_steps": 0.005146980285644531, "step": 91} +{"info/global_step": 92, "train_info/time_within_train_step": 24.7253897190094, "step": 92} +{"train_info/time_between_train_steps": 0.0052030086517333984, "step": 92} +{"info/global_step": 93, "train_info/time_within_train_step": 24.841415405273438, "step": 93} +{"train_info/time_between_train_steps": 0.005061149597167969, "step": 93} +{"info/global_step": 94, "train_info/time_within_train_step": 24.730775833129883, "step": 94} +{"train_info/time_between_train_steps": 0.0050506591796875, "step": 94} +{"info/global_step": 95, "train_info/time_within_train_step": 24.753088235855103, "step": 95} +{"train_info/time_between_train_steps": 0.0051801204681396484, "step": 95} +{"info/global_step": 96, "train_info/time_within_train_step": 24.729127883911133, "step": 96} +{"train_info/time_between_train_steps": 0.00519561767578125, "step": 96} +{"info/global_step": 97, "train_info/time_within_train_step": 24.7333767414093, "step": 97} +{"train_info/time_between_train_steps": 0.005438804626464844, "step": 97} +{"info/global_step": 98, "train_info/time_within_train_step": 24.761513233184814, "step": 98} +{"train_info/time_between_train_steps": 0.0050563812255859375, "step": 98} +{"info/global_step": 99, "train_info/time_within_train_step": 24.72911047935486, "step": 99} +{"train_info/time_between_train_steps": 0.0051364898681640625, "step": 99} +{"info/global_step": 100, "train_info/time_within_train_step": 24.73305368423462, "step": 100} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740925033, "_runtime": 2540}, "step": 100} +{"logs": {"train/loss": 4.9315, "train/learning_rate": 0.0005, "train/epoch": 2.02, "_timestamp": 1740925033, "_runtime": 2540}, "step": 100} +{"train_info/time_between_train_steps": 133.4732301235199, "step": 100} +{"info/global_step": 101, "train_info/time_within_train_step": 24.882115364074707, "step": 101} +{"train_info/time_between_train_steps": 0.013573646545410156, "step": 101} +{"info/global_step": 102, "train_info/time_within_train_step": 25.093729972839355, "step": 102} +{"train_info/time_between_train_steps": 0.013552427291870117, "step": 102} +{"info/global_step": 103, "train_info/time_within_train_step": 24.73431634902954, "step": 103} +{"train_info/time_between_train_steps": 0.005156755447387695, "step": 103} +{"info/global_step": 104, "train_info/time_within_train_step": 24.719481229782104, "step": 104} +{"train_info/time_between_train_steps": 0.005179643630981445, "step": 104} +{"info/global_step": 105, "train_info/time_within_train_step": 24.80142593383789, "step": 105} +{"train_info/time_between_train_steps": 0.013840913772583008, "step": 105} +{"info/global_step": 106, "train_info/time_within_train_step": 24.729044437408447, "step": 106} +{"train_info/time_between_train_steps": 0.005442619323730469, "step": 106} +{"info/global_step": 107, "train_info/time_within_train_step": 24.805668354034424, "step": 107} +{"train_info/time_between_train_steps": 0.006098747253417969, "step": 107} +{"info/global_step": 108, "train_info/time_within_train_step": 24.762121200561523, "step": 108} +{"train_info/time_between_train_steps": 0.006226301193237305, "step": 108} +{"train_info/time_between_train_steps": 16.892711877822876, "step": 108} +{"info/global_step": 109, "train_info/time_within_train_step": 24.726821660995483, "step": 109} +{"train_info/time_between_train_steps": 0.005229949951171875, "step": 109} +{"info/global_step": 110, "train_info/time_within_train_step": 24.883602619171143, "step": 110} +{"train_info/time_between_train_steps": 0.00515437126159668, "step": 110} +{"info/global_step": 111, "train_info/time_within_train_step": 24.73740816116333, "step": 111} +{"train_info/time_between_train_steps": 0.0053539276123046875, "step": 111} +{"info/global_step": 112, "train_info/time_within_train_step": 24.853476524353027, "step": 112} +{"train_info/time_between_train_steps": 0.005315542221069336, "step": 112} +{"info/global_step": 113, "train_info/time_within_train_step": 24.732415199279785, "step": 113} +{"train_info/time_between_train_steps": 0.007449150085449219, "step": 113} +{"info/global_step": 114, "train_info/time_within_train_step": 24.931288719177246, "step": 114} +{"train_info/time_between_train_steps": 0.010734319686889648, "step": 114} +{"info/global_step": 115, "train_info/time_within_train_step": 24.732808828353882, "step": 115} +{"train_info/time_between_train_steps": 0.010184049606323242, "step": 115} +{"info/global_step": 116, "train_info/time_within_train_step": 24.86386466026306, "step": 116} +{"train_info/time_between_train_steps": 0.0053827762603759766, "step": 116} +{"info/global_step": 117, "train_info/time_within_train_step": 24.759897708892822, "step": 117} +{"train_info/time_between_train_steps": 0.005577564239501953, "step": 117} +{"info/global_step": 118, "train_info/time_within_train_step": 24.87812900543213, "step": 118} +{"train_info/time_between_train_steps": 0.00519871711730957, "step": 118} +{"info/global_step": 119, "train_info/time_within_train_step": 24.812983989715576, "step": 119} +{"train_info/time_between_train_steps": 0.0054700374603271484, "step": 119} +{"info/global_step": 120, "train_info/time_within_train_step": 24.913219451904297, "step": 120} +{"train_info/time_between_train_steps": 0.005382537841796875, "step": 120} +{"info/global_step": 121, "train_info/time_within_train_step": 24.818816661834717, "step": 121} +{"train_info/time_between_train_steps": 0.010629653930664062, "step": 121} +{"info/global_step": 122, "train_info/time_within_train_step": 24.90242600440979, "step": 122} +{"train_info/time_between_train_steps": 0.0054395198822021484, "step": 122} +{"info/global_step": 123, "train_info/time_within_train_step": 24.75810217857361, "step": 123} +{"train_info/time_between_train_steps": 0.005406618118286133, "step": 123} +{"info/global_step": 124, "train_info/time_within_train_step": 24.95044445991516, "step": 124} +{"train_info/time_between_train_steps": 0.0053691864013671875, "step": 124} +{"info/global_step": 125, "train_info/time_within_train_step": 24.73263382911682, "step": 125} +{"train_info/time_between_train_steps": 0.03280758857727051, "step": 125} +{"info/global_step": 126, "train_info/time_within_train_step": 24.757869720458984, "step": 126} +{"train_info/time_between_train_steps": 0.005228519439697266, "step": 126} +{"info/global_step": 127, "train_info/time_within_train_step": 24.71414303779602, "step": 127} +{"train_info/time_between_train_steps": 0.00507044792175293, "step": 127} +{"info/global_step": 128, "train_info/time_within_train_step": 24.71859574317932, "step": 128} +{"train_info/time_between_train_steps": 0.0050776004791259766, "step": 128} +{"info/global_step": 129, "train_info/time_within_train_step": 24.721726894378662, "step": 129} +{"train_info/time_between_train_steps": 0.00513148307800293, "step": 129} +{"info/global_step": 130, "train_info/time_within_train_step": 24.719080209732056, "step": 130} +{"train_info/time_between_train_steps": 0.004944324493408203, "step": 130} +{"info/global_step": 131, "train_info/time_within_train_step": 24.734745264053345, "step": 131} +{"train_info/time_between_train_steps": 0.005109548568725586, "step": 131} +{"info/global_step": 132, "train_info/time_within_train_step": 24.716788291931152, "step": 132} +{"train_info/time_between_train_steps": 0.005068063735961914, "step": 132} +{"info/global_step": 133, "train_info/time_within_train_step": 24.720041513442993, "step": 133} +{"train_info/time_between_train_steps": 0.005125999450683594, "step": 133} +{"info/global_step": 134, "train_info/time_within_train_step": 24.72980237007141, "step": 134} +{"train_info/time_between_train_steps": 0.005273580551147461, "step": 134} +{"info/global_step": 135, "train_info/time_within_train_step": 24.720351219177246, "step": 135} +{"train_info/time_between_train_steps": 0.0050237178802490234, "step": 135} +{"info/global_step": 136, "train_info/time_within_train_step": 24.800422430038452, "step": 136} +{"train_info/time_between_train_steps": 0.0050089359283447266, "step": 136} +{"info/global_step": 137, "train_info/time_within_train_step": 24.7293643951416, "step": 137} +{"train_info/time_between_train_steps": 0.005222320556640625, "step": 137} +{"info/global_step": 138, "train_info/time_within_train_step": 24.800289630889893, "step": 138} +{"train_info/time_between_train_steps": 0.004994630813598633, "step": 138} +{"info/global_step": 139, "train_info/time_within_train_step": 24.815828323364258, "step": 139} +{"train_info/time_between_train_steps": 0.00518035888671875, "step": 139} +{"info/global_step": 140, "train_info/time_within_train_step": 24.746534824371338, "step": 140} +{"train_info/time_between_train_steps": 0.005275726318359375, "step": 140} +{"info/global_step": 141, "train_info/time_within_train_step": 24.798784732818604, "step": 141} +{"train_info/time_between_train_steps": 0.005303621292114258, "step": 141} +{"info/global_step": 142, "train_info/time_within_train_step": 24.73755121231079, "step": 142} +{"train_info/time_between_train_steps": 0.005464315414428711, "step": 142} +{"info/global_step": 143, "train_info/time_within_train_step": 24.810331106185913, "step": 143} +{"train_info/time_between_train_steps": 0.010912418365478516, "step": 143} +{"info/global_step": 144, "train_info/time_within_train_step": 24.798304557800293, "step": 144} +{"train_info/time_between_train_steps": 0.014536619186401367, "step": 144} +{"train_info/time_between_train_steps": 17.0001060962677, "step": 144} +{"info/global_step": 145, "train_info/time_within_train_step": 24.798924922943115, "step": 145} +{"train_info/time_between_train_steps": 0.005002021789550781, "step": 145} +{"info/global_step": 146, "train_info/time_within_train_step": 24.832703113555908, "step": 146} +{"train_info/time_between_train_steps": 0.005168437957763672, "step": 146} +{"info/global_step": 147, "train_info/time_within_train_step": 24.809592485427856, "step": 147} +{"train_info/time_between_train_steps": 0.004993438720703125, "step": 147} +{"info/global_step": 148, "train_info/time_within_train_step": 24.841588497161865, "step": 148} +{"train_info/time_between_train_steps": 0.005094051361083984, "step": 148} +{"info/global_step": 149, "train_info/time_within_train_step": 24.73699951171875, "step": 149} +{"train_info/time_between_train_steps": 0.005188465118408203, "step": 149} +{"info/global_step": 150, "train_info/time_within_train_step": 24.923277378082275, "step": 150} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740926441, "_runtime": 3948}, "step": 150} +{"logs": {"train/loss": 4.3684, "train/learning_rate": 0.0005833333333333333, "train/epoch": 4.0, "_timestamp": 1740926441, "_runtime": 3948}, "step": 150} +{"train_info/time_between_train_steps": 0.026505708694458008, "step": 150} +{"info/global_step": 151, "train_info/time_within_train_step": 24.73082399368286, "step": 151} +{"train_info/time_between_train_steps": 0.00517582893371582, "step": 151} +{"info/global_step": 152, "train_info/time_within_train_step": 24.879566431045532, "step": 152} +{"train_info/time_between_train_steps": 0.005157947540283203, "step": 152} +{"info/global_step": 153, "train_info/time_within_train_step": 24.730825901031494, "step": 153} +{"train_info/time_between_train_steps": 0.0052263736724853516, "step": 153} +{"info/global_step": 154, "train_info/time_within_train_step": 24.848764896392822, "step": 154} +{"train_info/time_between_train_steps": 0.009995222091674805, "step": 154} +{"info/global_step": 155, "train_info/time_within_train_step": 24.834640979766846, "step": 155} +{"train_info/time_between_train_steps": 0.0056498050689697266, "step": 155} +{"info/global_step": 156, "train_info/time_within_train_step": 25.045519590377808, "step": 156} +{"train_info/time_between_train_steps": 0.01007699966430664, "step": 156} +{"info/global_step": 157, "train_info/time_within_train_step": 24.797995805740356, "step": 157} +{"train_info/time_between_train_steps": 0.010064125061035156, "step": 157} +{"info/global_step": 158, "train_info/time_within_train_step": 24.8933846950531, "step": 158} +{"train_info/time_between_train_steps": 0.0052525997161865234, "step": 158} +{"info/global_step": 159, "train_info/time_within_train_step": 24.819217443466187, "step": 159} +{"train_info/time_between_train_steps": 0.005067586898803711, "step": 159} +{"info/global_step": 160, "train_info/time_within_train_step": 24.829960346221924, "step": 160} +{"train_info/time_between_train_steps": 0.007098197937011719, "step": 160} +{"info/global_step": 161, "train_info/time_within_train_step": 24.82168698310852, "step": 161} +{"train_info/time_between_train_steps": 0.07840657234191895, "step": 161} +{"info/global_step": 162, "train_info/time_within_train_step": 24.856635808944702, "step": 162} +{"train_info/time_between_train_steps": 0.006185293197631836, "step": 162} +{"info/global_step": 163, "train_info/time_within_train_step": 24.7722110748291, "step": 163} +{"train_info/time_between_train_steps": 0.005104780197143555, "step": 163} +{"info/global_step": 164, "train_info/time_within_train_step": 24.731840133666992, "step": 164} +{"train_info/time_between_train_steps": 0.00516819953918457, "step": 164} +{"info/global_step": 165, "train_info/time_within_train_step": 24.72507882118225, "step": 165} +{"train_info/time_between_train_steps": 0.005038261413574219, "step": 165} +{"info/global_step": 166, "train_info/time_within_train_step": 24.74874997138977, "step": 166} +{"train_info/time_between_train_steps": 0.005453824996948242, "step": 166} +{"info/global_step": 167, "train_info/time_within_train_step": 24.785122632980347, "step": 167} +{"train_info/time_between_train_steps": 0.005211830139160156, "step": 167} +{"info/global_step": 168, "train_info/time_within_train_step": 24.749690771102905, "step": 168} +{"train_info/time_between_train_steps": 0.005339622497558594, "step": 168} +{"info/global_step": 169, "train_info/time_within_train_step": 24.773287534713745, "step": 169} +{"train_info/time_between_train_steps": 0.010312795639038086, "step": 169} +{"info/global_step": 170, "train_info/time_within_train_step": 24.814843893051147, "step": 170} +{"train_info/time_between_train_steps": 0.005171060562133789, "step": 170} +{"info/global_step": 171, "train_info/time_within_train_step": 24.80934739112854, "step": 171} +{"train_info/time_between_train_steps": 0.005190134048461914, "step": 171} +{"info/global_step": 172, "train_info/time_within_train_step": 24.73474431037903, "step": 172} +{"train_info/time_between_train_steps": 0.00541996955871582, "step": 172} +{"info/global_step": 173, "train_info/time_within_train_step": 24.73139524459839, "step": 173} +{"train_info/time_between_train_steps": 0.0051043033599853516, "step": 173} +{"info/global_step": 174, "train_info/time_within_train_step": 24.764876127243042, "step": 174} +{"train_info/time_between_train_steps": 0.005115509033203125, "step": 174} +{"info/global_step": 175, "train_info/time_within_train_step": 24.733168840408325, "step": 175} +{"train_info/time_between_train_steps": 0.005206108093261719, "step": 175} +{"info/global_step": 176, "train_info/time_within_train_step": 24.804470777511597, "step": 176} +{"train_info/time_between_train_steps": 0.0051708221435546875, "step": 176} +{"info/global_step": 177, "train_info/time_within_train_step": 24.737152099609375, "step": 177} +{"train_info/time_between_train_steps": 0.0054700374603271484, "step": 177} +{"info/global_step": 178, "train_info/time_within_train_step": 24.748235940933228, "step": 178} +{"train_info/time_between_train_steps": 0.005700349807739258, "step": 178} +{"info/global_step": 179, "train_info/time_within_train_step": 24.765604257583618, "step": 179} +{"train_info/time_between_train_steps": 0.006046295166015625, "step": 179} +{"info/global_step": 180, "train_info/time_within_train_step": 24.773468494415283, "step": 180} +{"train_info/time_between_train_steps": 0.00640869140625, "step": 180} +{"train_info/time_between_train_steps": 17.099931240081787, "step": 180} +{"info/global_step": 181, "train_info/time_within_train_step": 24.723568201065063, "step": 181} +{"train_info/time_between_train_steps": 0.0050089359283447266, "step": 181} +{"info/global_step": 182, "train_info/time_within_train_step": 24.872040271759033, "step": 182} +{"train_info/time_between_train_steps": 0.005255460739135742, "step": 182} +{"info/global_step": 183, "train_info/time_within_train_step": 24.740485429763794, "step": 183} +{"train_info/time_between_train_steps": 0.005321979522705078, "step": 183} +{"info/global_step": 184, "train_info/time_within_train_step": 24.84564208984375, "step": 184} +{"train_info/time_between_train_steps": 0.005305051803588867, "step": 184} +{"info/global_step": 185, "train_info/time_within_train_step": 24.8663649559021, "step": 185} +{"train_info/time_between_train_steps": 0.010261774063110352, "step": 185} +{"info/global_step": 186, "train_info/time_within_train_step": 24.8662531375885, "step": 186} +{"train_info/time_between_train_steps": 0.005337953567504883, "step": 186} +{"info/global_step": 187, "train_info/time_within_train_step": 24.73142647743225, "step": 187} +{"train_info/time_between_train_steps": 0.005232095718383789, "step": 187} +{"info/global_step": 188, "train_info/time_within_train_step": 24.902579069137573, "step": 188} +{"train_info/time_between_train_steps": 0.005342006683349609, "step": 188} +{"info/global_step": 189, "train_info/time_within_train_step": 24.75101613998413, "step": 189} +{"train_info/time_between_train_steps": 0.0053331851959228516, "step": 189} +{"info/global_step": 190, "train_info/time_within_train_step": 24.886656284332275, "step": 190} +{"train_info/time_between_train_steps": 0.005047321319580078, "step": 190} +{"info/global_step": 191, "train_info/time_within_train_step": 24.751821994781494, "step": 191} +{"train_info/time_between_train_steps": 0.005628108978271484, "step": 191} +{"info/global_step": 192, "train_info/time_within_train_step": 24.87754797935486, "step": 192} +{"train_info/time_between_train_steps": 0.005446672439575195, "step": 192} +{"info/global_step": 193, "train_info/time_within_train_step": 24.74321436882019, "step": 193} +{"train_info/time_between_train_steps": 0.005414724349975586, "step": 193} +{"info/global_step": 194, "train_info/time_within_train_step": 25.092822790145874, "step": 194} +{"train_info/time_between_train_steps": 0.0052356719970703125, "step": 194} +{"info/global_step": 195, "train_info/time_within_train_step": 24.81673288345337, "step": 195} +{"train_info/time_between_train_steps": 0.005236148834228516, "step": 195} +{"info/global_step": 196, "train_info/time_within_train_step": 24.822011947631836, "step": 196} +{"train_info/time_between_train_steps": 0.005288839340209961, "step": 196} +{"info/global_step": 197, "train_info/time_within_train_step": 24.82948064804077, "step": 197} +{"train_info/time_between_train_steps": 0.037397146224975586, "step": 197} +{"info/global_step": 198, "train_info/time_within_train_step": 24.71962308883667, "step": 198} +{"train_info/time_between_train_steps": 0.009263277053833008, "step": 198} +{"info/global_step": 199, "train_info/time_within_train_step": 24.81124997138977, "step": 199} +{"train_info/time_between_train_steps": 0.004952669143676758, "step": 199} +{"info/global_step": 200, "train_info/time_within_train_step": 24.810889720916748, "step": 200} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740927700, "_runtime": 5207}, "step": 200} +{"logs": {"train/loss": 4.0546, "train/learning_rate": 0.0005555555555555556, "train/epoch": 5.02, "_timestamp": 1740927700, "_runtime": 5207}, "step": 200} +{"train_info/time_between_train_steps": 116.5102527141571, "step": 200} +{"info/global_step": 201, "train_info/time_within_train_step": 24.813753843307495, "step": 201} +{"train_info/time_between_train_steps": 0.005078792572021484, "step": 201} +{"info/global_step": 202, "train_info/time_within_train_step": 24.729507446289062, "step": 202} +{"train_info/time_between_train_steps": 0.0049669742584228516, "step": 202} +{"info/global_step": 203, "train_info/time_within_train_step": 24.769134521484375, "step": 203} +{"train_info/time_between_train_steps": 0.010085582733154297, "step": 203} +{"info/global_step": 204, "train_info/time_within_train_step": 24.7316997051239, "step": 204} +{"train_info/time_between_train_steps": 0.010113239288330078, "step": 204} +{"info/global_step": 205, "train_info/time_within_train_step": 24.731664896011353, "step": 205} +{"train_info/time_between_train_steps": 0.009960174560546875, "step": 205} +{"info/global_step": 206, "train_info/time_within_train_step": 24.725895404815674, "step": 206} +{"train_info/time_between_train_steps": 0.010246753692626953, "step": 206} +{"info/global_step": 207, "train_info/time_within_train_step": 24.730036973953247, "step": 207} +{"train_info/time_between_train_steps": 0.009989738464355469, "step": 207} +{"info/global_step": 208, "train_info/time_within_train_step": 24.730824947357178, "step": 208} +{"train_info/time_between_train_steps": 0.005239725112915039, "step": 208} +{"info/global_step": 209, "train_info/time_within_train_step": 24.725091695785522, "step": 209} +{"train_info/time_between_train_steps": 0.009857416152954102, "step": 209} +{"info/global_step": 210, "train_info/time_within_train_step": 24.749786615371704, "step": 210} +{"train_info/time_between_train_steps": 0.0050373077392578125, "step": 210} +{"info/global_step": 211, "train_info/time_within_train_step": 24.72877264022827, "step": 211} +{"train_info/time_between_train_steps": 0.0053806304931640625, "step": 211} +{"info/global_step": 212, "train_info/time_within_train_step": 24.80886936187744, "step": 212} +{"train_info/time_between_train_steps": 0.0050771236419677734, "step": 212} +{"info/global_step": 213, "train_info/time_within_train_step": 24.730902910232544, "step": 213} +{"train_info/time_between_train_steps": 0.005183219909667969, "step": 213} +{"info/global_step": 214, "train_info/time_within_train_step": 24.83693242073059, "step": 214} +{"train_info/time_between_train_steps": 0.0055811405181884766, "step": 214} +{"info/global_step": 215, "train_info/time_within_train_step": 24.758980751037598, "step": 215} +{"train_info/time_between_train_steps": 0.006060123443603516, "step": 215} +{"info/global_step": 216, "train_info/time_within_train_step": 24.856537103652954, "step": 216} +{"train_info/time_between_train_steps": 0.006104707717895508, "step": 216} +{"train_info/time_between_train_steps": 16.9336416721344, "step": 216} +{"info/global_step": 217, "train_info/time_within_train_step": 24.71935510635376, "step": 217} +{"train_info/time_between_train_steps": 0.004846811294555664, "step": 217} +{"info/global_step": 218, "train_info/time_within_train_step": 24.89282727241516, "step": 218} +{"train_info/time_between_train_steps": 0.005189180374145508, "step": 218} +{"info/global_step": 219, "train_info/time_within_train_step": 24.769453525543213, "step": 219} +{"train_info/time_between_train_steps": 0.0055239200592041016, "step": 219} +{"info/global_step": 220, "train_info/time_within_train_step": 24.877575159072876, "step": 220} +{"train_info/time_between_train_steps": 0.005213022232055664, "step": 220} +{"info/global_step": 221, "train_info/time_within_train_step": 24.77844524383545, "step": 221} +{"train_info/time_between_train_steps": 0.0053331851959228516, "step": 221} +{"info/global_step": 222, "train_info/time_within_train_step": 24.934911012649536, "step": 222} +{"train_info/time_between_train_steps": 0.005452156066894531, "step": 222} +{"info/global_step": 223, "train_info/time_within_train_step": 24.73674774169922, "step": 223} +{"train_info/time_between_train_steps": 0.00519871711730957, "step": 223} +{"info/global_step": 224, "train_info/time_within_train_step": 24.928049564361572, "step": 224} +{"train_info/time_between_train_steps": 0.005375385284423828, "step": 224} +{"info/global_step": 225, "train_info/time_within_train_step": 24.75495409965515, "step": 225} +{"train_info/time_between_train_steps": 0.005770444869995117, "step": 225} +{"info/global_step": 226, "train_info/time_within_train_step": 24.95674180984497, "step": 226} +{"train_info/time_between_train_steps": 0.0053043365478515625, "step": 226} +{"info/global_step": 227, "train_info/time_within_train_step": 24.746970653533936, "step": 227} +{"train_info/time_between_train_steps": 0.0056591033935546875, "step": 227} +{"info/global_step": 228, "train_info/time_within_train_step": 24.907557249069214, "step": 228} +{"train_info/time_between_train_steps": 0.0053942203521728516, "step": 228} +{"info/global_step": 229, "train_info/time_within_train_step": 24.738362550735474, "step": 229} +{"train_info/time_between_train_steps": 0.0052642822265625, "step": 229} +{"info/global_step": 230, "train_info/time_within_train_step": 24.887505054473877, "step": 230} +{"train_info/time_between_train_steps": 0.014330148696899414, "step": 230} +{"info/global_step": 231, "train_info/time_within_train_step": 24.81975793838501, "step": 231} +{"train_info/time_between_train_steps": 0.005445241928100586, "step": 231} +{"info/global_step": 232, "train_info/time_within_train_step": 24.939480543136597, "step": 232} +{"train_info/time_between_train_steps": 0.005411386489868164, "step": 232} +{"info/global_step": 233, "train_info/time_within_train_step": 24.82803249359131, "step": 233} +{"train_info/time_between_train_steps": 0.03844499588012695, "step": 233} +{"info/global_step": 234, "train_info/time_within_train_step": 24.729262351989746, "step": 234} +{"train_info/time_between_train_steps": 0.005104780197143555, "step": 234} +{"info/global_step": 235, "train_info/time_within_train_step": 24.738863706588745, "step": 235} +{"train_info/time_between_train_steps": 0.004994869232177734, "step": 235} +{"info/global_step": 236, "train_info/time_within_train_step": 24.799175262451172, "step": 236} +{"train_info/time_between_train_steps": 0.009907007217407227, "step": 236} +{"info/global_step": 237, "train_info/time_within_train_step": 24.763516664505005, "step": 237} +{"train_info/time_between_train_steps": 0.005143880844116211, "step": 237} +{"info/global_step": 238, "train_info/time_within_train_step": 24.802821159362793, "step": 238} +{"train_info/time_between_train_steps": 0.005028724670410156, "step": 238} +{"info/global_step": 239, "train_info/time_within_train_step": 24.729965925216675, "step": 239} +{"train_info/time_between_train_steps": 0.005166769027709961, "step": 239} +{"info/global_step": 240, "train_info/time_within_train_step": 24.729742765426636, "step": 240} +{"train_info/time_between_train_steps": 0.00525665283203125, "step": 240} +{"info/global_step": 241, "train_info/time_within_train_step": 24.767504453659058, "step": 241} +{"train_info/time_between_train_steps": 0.0052144527435302734, "step": 241} +{"info/global_step": 242, "train_info/time_within_train_step": 24.741360902786255, "step": 242} +{"train_info/time_between_train_steps": 0.005282402038574219, "step": 242} +{"info/global_step": 243, "train_info/time_within_train_step": 24.842708349227905, "step": 243} +{"train_info/time_between_train_steps": 0.005072355270385742, "step": 243} +{"info/global_step": 244, "train_info/time_within_train_step": 24.72869324684143, "step": 244} +{"train_info/time_between_train_steps": 0.005182027816772461, "step": 244} +{"info/global_step": 245, "train_info/time_within_train_step": 24.791080236434937, "step": 245} +{"train_info/time_between_train_steps": 0.0051686763763427734, "step": 245} +{"info/global_step": 246, "train_info/time_within_train_step": 24.728395223617554, "step": 246} +{"train_info/time_between_train_steps": 0.0050318241119384766, "step": 246} +{"info/global_step": 247, "train_info/time_within_train_step": 24.823030948638916, "step": 247} +{"train_info/time_between_train_steps": 0.00517582893371582, "step": 247} +{"info/global_step": 248, "train_info/time_within_train_step": 24.81110453605652, "step": 248} +{"train_info/time_between_train_steps": 0.00519871711730957, "step": 248} +{"info/global_step": 249, "train_info/time_within_train_step": 24.736380338668823, "step": 249} +{"train_info/time_between_train_steps": 0.005385875701904297, "step": 249} +{"info/global_step": 250, "train_info/time_within_train_step": 24.8276948928833, "step": 250} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740929075, "_runtime": 6582}, "step": 250} +{"logs": {"train/loss": 3.9329, "train/learning_rate": 0.0005277777777777777, "train/epoch": 6.03, "_timestamp": 1740929075, "_runtime": 6582}, "step": 250} +{"train_info/time_between_train_steps": 0.02699422836303711, "step": 250} +{"info/global_step": 251, "train_info/time_within_train_step": 24.753977298736572, "step": 251} +{"train_info/time_between_train_steps": 0.005865812301635742, "step": 251} +{"info/global_step": 252, "train_info/time_within_train_step": 24.788028478622437, "step": 252} +{"train_info/time_between_train_steps": 0.00592350959777832, "step": 252} +{"train_info/time_between_train_steps": 16.878462314605713, "step": 252} +{"info/global_step": 253, "train_info/time_within_train_step": 24.716723203659058, "step": 253} +{"train_info/time_between_train_steps": 0.0049915313720703125, "step": 253} +{"info/global_step": 254, "train_info/time_within_train_step": 24.920268774032593, "step": 254} +{"train_info/time_between_train_steps": 0.004972934722900391, "step": 254} +{"info/global_step": 255, "train_info/time_within_train_step": 24.732539415359497, "step": 255} +{"train_info/time_between_train_steps": 0.007607698440551758, "step": 255} +{"info/global_step": 256, "train_info/time_within_train_step": 24.8757164478302, "step": 256} +{"train_info/time_between_train_steps": 0.005404233932495117, "step": 256} +{"info/global_step": 257, "train_info/time_within_train_step": 24.8071231842041, "step": 257} +{"train_info/time_between_train_steps": 0.005136966705322266, "step": 257} +{"info/global_step": 258, "train_info/time_within_train_step": 24.880013704299927, "step": 258} +{"train_info/time_between_train_steps": 0.005314826965332031, "step": 258} +{"info/global_step": 259, "train_info/time_within_train_step": 24.91344404220581, "step": 259} +{"train_info/time_between_train_steps": 0.005241870880126953, "step": 259} +{"info/global_step": 260, "train_info/time_within_train_step": 24.962342500686646, "step": 260} +{"train_info/time_between_train_steps": 0.014187335968017578, "step": 260} +{"info/global_step": 261, "train_info/time_within_train_step": 24.74067521095276, "step": 261} +{"train_info/time_between_train_steps": 0.00540471076965332, "step": 261} +{"info/global_step": 262, "train_info/time_within_train_step": 24.98554229736328, "step": 262} +{"train_info/time_between_train_steps": 0.009653568267822266, "step": 262} +{"info/global_step": 263, "train_info/time_within_train_step": 24.732510566711426, "step": 263} +{"train_info/time_between_train_steps": 0.005326986312866211, "step": 263} +{"info/global_step": 264, "train_info/time_within_train_step": 24.97148585319519, "step": 264} +{"train_info/time_between_train_steps": 0.0053026676177978516, "step": 264} +{"info/global_step": 265, "train_info/time_within_train_step": 24.74306082725525, "step": 265} +{"train_info/time_between_train_steps": 0.005445957183837891, "step": 265} +{"info/global_step": 266, "train_info/time_within_train_step": 24.867276191711426, "step": 266} +{"train_info/time_between_train_steps": 0.005270481109619141, "step": 266} +{"info/global_step": 267, "train_info/time_within_train_step": 24.73718237876892, "step": 267} +{"train_info/time_between_train_steps": 0.005268096923828125, "step": 267} +{"info/global_step": 268, "train_info/time_within_train_step": 24.853711366653442, "step": 268} +{"train_info/time_between_train_steps": 0.005338191986083984, "step": 268} +{"info/global_step": 269, "train_info/time_within_train_step": 24.734222650527954, "step": 269} +{"train_info/time_between_train_steps": 0.023396730422973633, "step": 269} +{"info/global_step": 270, "train_info/time_within_train_step": 24.7267963886261, "step": 270} +{"train_info/time_between_train_steps": 0.005250692367553711, "step": 270} +{"info/global_step": 271, "train_info/time_within_train_step": 24.739325523376465, "step": 271} +{"train_info/time_between_train_steps": 0.004991054534912109, "step": 271} +{"info/global_step": 272, "train_info/time_within_train_step": 24.7223379611969, "step": 272} +{"train_info/time_between_train_steps": 0.0050296783447265625, "step": 272} +{"info/global_step": 273, "train_info/time_within_train_step": 24.742064476013184, "step": 273} +{"train_info/time_between_train_steps": 0.005102396011352539, "step": 273} +{"info/global_step": 274, "train_info/time_within_train_step": 24.737054347991943, "step": 274} +{"train_info/time_between_train_steps": 0.005015850067138672, "step": 274} +{"info/global_step": 275, "train_info/time_within_train_step": 24.726995944976807, "step": 275} +{"train_info/time_between_train_steps": 0.005034685134887695, "step": 275} +{"info/global_step": 276, "train_info/time_within_train_step": 24.727190017700195, "step": 276} +{"train_info/time_between_train_steps": 0.005263090133666992, "step": 276} +{"info/global_step": 277, "train_info/time_within_train_step": 24.728248834609985, "step": 277} +{"train_info/time_between_train_steps": 0.005223274230957031, "step": 277} +{"info/global_step": 278, "train_info/time_within_train_step": 24.818652153015137, "step": 278} +{"train_info/time_between_train_steps": 0.005064725875854492, "step": 278} +{"info/global_step": 279, "train_info/time_within_train_step": 24.729640245437622, "step": 279} +{"train_info/time_between_train_steps": 0.0051839351654052734, "step": 279} +{"info/global_step": 280, "train_info/time_within_train_step": 24.731594800949097, "step": 280} +{"train_info/time_between_train_steps": 0.005083560943603516, "step": 280} +{"info/global_step": 281, "train_info/time_within_train_step": 25.144447565078735, "step": 281} +{"train_info/time_between_train_steps": 0.009288787841796875, "step": 281} +{"info/global_step": 282, "train_info/time_within_train_step": 24.798739433288574, "step": 282} +{"train_info/time_between_train_steps": 0.013722658157348633, "step": 282} +{"info/global_step": 283, "train_info/time_within_train_step": 24.843758821487427, "step": 283} +{"train_info/time_between_train_steps": 0.005075693130493164, "step": 283} +{"info/global_step": 284, "train_info/time_within_train_step": 24.734339952468872, "step": 284} +{"train_info/time_between_train_steps": 0.00519108772277832, "step": 284} +{"info/global_step": 285, "train_info/time_within_train_step": 24.733973026275635, "step": 285} +{"train_info/time_between_train_steps": 0.005187034606933594, "step": 285} +{"info/global_step": 286, "train_info/time_within_train_step": 24.80772852897644, "step": 286} +{"train_info/time_between_train_steps": 0.005602598190307617, "step": 286} +{"info/global_step": 287, "train_info/time_within_train_step": 24.755130767822266, "step": 287} +{"train_info/time_between_train_steps": 0.006181478500366211, "step": 287} +{"info/global_step": 288, "train_info/time_within_train_step": 24.858343362808228, "step": 288} +{"train_info/time_between_train_steps": 0.011245489120483398, "step": 288} +{"train_info/time_between_train_steps": 16.9804584980011, "step": 288} +{"info/global_step": 289, "train_info/time_within_train_step": 24.72721767425537, "step": 289} +{"train_info/time_between_train_steps": 0.004950523376464844, "step": 289} +{"info/global_step": 290, "train_info/time_within_train_step": 24.907243728637695, "step": 290} +{"train_info/time_between_train_steps": 0.005390167236328125, "step": 290} +{"info/global_step": 291, "train_info/time_within_train_step": 24.747677326202393, "step": 291} +{"train_info/time_between_train_steps": 0.005564451217651367, "step": 291} +{"info/global_step": 292, "train_info/time_within_train_step": 24.886417865753174, "step": 292} +{"train_info/time_between_train_steps": 0.005583763122558594, "step": 292} +{"info/global_step": 293, "train_info/time_within_train_step": 24.84259271621704, "step": 293} +{"train_info/time_between_train_steps": 0.0053844451904296875, "step": 293} +{"info/global_step": 294, "train_info/time_within_train_step": 24.916806936264038, "step": 294} +{"train_info/time_between_train_steps": 0.0053293704986572266, "step": 294} +{"info/global_step": 295, "train_info/time_within_train_step": 24.75308585166931, "step": 295} +{"train_info/time_between_train_steps": 0.005380392074584961, "step": 295} +{"info/global_step": 296, "train_info/time_within_train_step": 24.862555503845215, "step": 296} +{"train_info/time_between_train_steps": 0.0057260990142822266, "step": 296} +{"info/global_step": 297, "train_info/time_within_train_step": 24.914015769958496, "step": 297} +{"train_info/time_between_train_steps": 0.010633230209350586, "step": 297} +{"info/global_step": 298, "train_info/time_within_train_step": 24.87648344039917, "step": 298} +{"train_info/time_between_train_steps": 0.0051648616790771484, "step": 298} +{"info/global_step": 299, "train_info/time_within_train_step": 24.737029790878296, "step": 299} +{"train_info/time_between_train_steps": 0.01035928726196289, "step": 299} +{"info/global_step": 300, "train_info/time_within_train_step": 26.334414958953857, "step": 300} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740930352, "_runtime": 7859}, "step": 300} +{"logs": {"train/loss": 3.83, "train/learning_rate": 0.0005, "train/epoch": 8.01, "_timestamp": 1740930352, "_runtime": 7859}, "step": 300} +{"train_info/time_between_train_steps": 77.4589912891388, "step": 300} +{"info/global_step": 301, "train_info/time_within_train_step": 24.806314706802368, "step": 301} +{"train_info/time_between_train_steps": 0.005334377288818359, "step": 301} +{"info/global_step": 302, "train_info/time_within_train_step": 24.84765887260437, "step": 302} +{"train_info/time_between_train_steps": 0.0052449703216552734, "step": 302} +{"info/global_step": 303, "train_info/time_within_train_step": 24.752132892608643, "step": 303} +{"train_info/time_between_train_steps": 0.005578756332397461, "step": 303} +{"info/global_step": 304, "train_info/time_within_train_step": 24.859380960464478, "step": 304} +{"train_info/time_between_train_steps": 0.0052869319915771484, "step": 304} +{"info/global_step": 305, "train_info/time_within_train_step": 24.735511302947998, "step": 305} +{"train_info/time_between_train_steps": 0.027527570724487305, "step": 305} +{"info/global_step": 306, "train_info/time_within_train_step": 24.72178077697754, "step": 306} +{"train_info/time_between_train_steps": 0.005173444747924805, "step": 306} +{"info/global_step": 307, "train_info/time_within_train_step": 24.719128608703613, "step": 307} +{"train_info/time_between_train_steps": 0.00500941276550293, "step": 307} +{"info/global_step": 308, "train_info/time_within_train_step": 24.734455823898315, "step": 308} +{"train_info/time_between_train_steps": 0.0051517486572265625, "step": 308} +{"info/global_step": 309, "train_info/time_within_train_step": 24.847349882125854, "step": 309} +{"train_info/time_between_train_steps": 0.009898185729980469, "step": 309} +{"info/global_step": 310, "train_info/time_within_train_step": 24.73203444480896, "step": 310} +{"train_info/time_between_train_steps": 0.009825706481933594, "step": 310} +{"info/global_step": 311, "train_info/time_within_train_step": 24.887491464614868, "step": 311} +{"train_info/time_between_train_steps": 0.005274534225463867, "step": 311} +{"info/global_step": 312, "train_info/time_within_train_step": 24.72430181503296, "step": 312} +{"train_info/time_between_train_steps": 0.005156755447387695, "step": 312} +{"info/global_step": 313, "train_info/time_within_train_step": 24.736189603805542, "step": 313} +{"train_info/time_between_train_steps": 0.005152702331542969, "step": 313} +{"info/global_step": 314, "train_info/time_within_train_step": 24.760748863220215, "step": 314} +{"train_info/time_between_train_steps": 0.005181789398193359, "step": 314} +{"info/global_step": 315, "train_info/time_within_train_step": 24.735718250274658, "step": 315} +{"train_info/time_between_train_steps": 0.005104541778564453, "step": 315} +{"info/global_step": 316, "train_info/time_within_train_step": 24.801544189453125, "step": 316} +{"train_info/time_between_train_steps": 0.0051882266998291016, "step": 316} +{"info/global_step": 317, "train_info/time_within_train_step": 24.743972063064575, "step": 317} +{"train_info/time_between_train_steps": 0.005829811096191406, "step": 317} +{"info/global_step": 318, "train_info/time_within_train_step": 24.796972274780273, "step": 318} +{"train_info/time_between_train_steps": 0.005342245101928711, "step": 318} +{"info/global_step": 319, "train_info/time_within_train_step": 24.73738431930542, "step": 319} +{"train_info/time_between_train_steps": 0.005492448806762695, "step": 319} +{"info/global_step": 320, "train_info/time_within_train_step": 24.73893976211548, "step": 320} +{"train_info/time_between_train_steps": 0.005339622497558594, "step": 320} +{"info/global_step": 321, "train_info/time_within_train_step": 24.777540922164917, "step": 321} +{"train_info/time_between_train_steps": 0.005436897277832031, "step": 321} +{"info/global_step": 322, "train_info/time_within_train_step": 24.756640434265137, "step": 322} +{"train_info/time_between_train_steps": 0.005793094635009766, "step": 322} +{"info/global_step": 323, "train_info/time_within_train_step": 24.83676242828369, "step": 323} +{"train_info/time_between_train_steps": 0.0062580108642578125, "step": 323} +{"info/global_step": 324, "train_info/time_within_train_step": 24.863920211791992, "step": 324} +{"train_info/time_between_train_steps": 0.006032466888427734, "step": 324} +{"train_info/time_between_train_steps": 17.05787229537964, "step": 324} +{"info/global_step": 325, "train_info/time_within_train_step": 24.813079118728638, "step": 325} +{"train_info/time_between_train_steps": 0.005552530288696289, "step": 325} +{"info/global_step": 326, "train_info/time_within_train_step": 24.889230728149414, "step": 326} +{"train_info/time_between_train_steps": 0.005412578582763672, "step": 326} +{"info/global_step": 327, "train_info/time_within_train_step": 24.85023069381714, "step": 327} +{"train_info/time_between_train_steps": 0.00527644157409668, "step": 327} +{"info/global_step": 328, "train_info/time_within_train_step": 24.920120239257812, "step": 328} +{"train_info/time_between_train_steps": 0.005433320999145508, "step": 328} +{"info/global_step": 329, "train_info/time_within_train_step": 24.747002601623535, "step": 329} +{"train_info/time_between_train_steps": 0.005340099334716797, "step": 329} +{"info/global_step": 330, "train_info/time_within_train_step": 24.910324096679688, "step": 330} +{"train_info/time_between_train_steps": 0.005456209182739258, "step": 330} +{"info/global_step": 331, "train_info/time_within_train_step": 24.743218421936035, "step": 331} +{"train_info/time_between_train_steps": 0.0052738189697265625, "step": 331} +{"info/global_step": 332, "train_info/time_within_train_step": 24.92364478111267, "step": 332} +{"train_info/time_between_train_steps": 0.00538945198059082, "step": 332} +{"info/global_step": 333, "train_info/time_within_train_step": 24.744107246398926, "step": 333} +{"train_info/time_between_train_steps": 0.0054247379302978516, "step": 333} +{"info/global_step": 334, "train_info/time_within_train_step": 24.895418167114258, "step": 334} +{"train_info/time_between_train_steps": 0.005293607711791992, "step": 334} +{"info/global_step": 335, "train_info/time_within_train_step": 24.81177520751953, "step": 335} +{"train_info/time_between_train_steps": 0.005324125289916992, "step": 335} +{"info/global_step": 336, "train_info/time_within_train_step": 24.89644956588745, "step": 336} +{"train_info/time_between_train_steps": 0.005433797836303711, "step": 336} +{"info/global_step": 337, "train_info/time_within_train_step": 24.74066972732544, "step": 337} +{"train_info/time_between_train_steps": 0.0052242279052734375, "step": 337} +{"info/global_step": 338, "train_info/time_within_train_step": 24.874791145324707, "step": 338} +{"train_info/time_between_train_steps": 0.005401611328125, "step": 338} +{"info/global_step": 339, "train_info/time_within_train_step": 24.860044717788696, "step": 339} +{"train_info/time_between_train_steps": 0.0052831172943115234, "step": 339} +{"info/global_step": 340, "train_info/time_within_train_step": 24.859413623809814, "step": 340} +{"train_info/time_between_train_steps": 0.0053863525390625, "step": 340} +{"info/global_step": 341, "train_info/time_within_train_step": 24.735116481781006, "step": 341} +{"train_info/time_between_train_steps": 0.025614261627197266, "step": 341} +{"info/global_step": 342, "train_info/time_within_train_step": 24.75526213645935, "step": 342} +{"train_info/time_between_train_steps": 0.005194664001464844, "step": 342} +{"info/global_step": 343, "train_info/time_within_train_step": 24.722256422042847, "step": 343} +{"train_info/time_between_train_steps": 0.009650945663452148, "step": 343} +{"info/global_step": 344, "train_info/time_within_train_step": 24.80258059501648, "step": 344} +{"train_info/time_between_train_steps": 0.005098581314086914, "step": 344} +{"info/global_step": 345, "train_info/time_within_train_step": 24.73126244544983, "step": 345} +{"train_info/time_between_train_steps": 0.005131244659423828, "step": 345} +{"info/global_step": 346, "train_info/time_within_train_step": 24.739503383636475, "step": 346} +{"train_info/time_between_train_steps": 0.004994869232177734, "step": 346} +{"info/global_step": 347, "train_info/time_within_train_step": 24.75287365913391, "step": 347} +{"train_info/time_between_train_steps": 0.00562739372253418, "step": 347} +{"info/global_step": 348, "train_info/time_within_train_step": 24.734326124191284, "step": 348} +{"train_info/time_between_train_steps": 0.005227804183959961, "step": 348} +{"info/global_step": 349, "train_info/time_within_train_step": 24.80338454246521, "step": 349} +{"train_info/time_between_train_steps": 0.005112171173095703, "step": 349} +{"info/global_step": 350, "train_info/time_within_train_step": 24.729557514190674, "step": 350} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740931688, "_runtime": 9195}, "step": 350} +{"logs": {"train/loss": 3.6134, "train/learning_rate": 0.00047222222222222224, "train/epoch": 9.02, "_timestamp": 1740931688, "_runtime": 9195}, "step": 350} +{"train_info/time_between_train_steps": 0.02660369873046875, "step": 350} +{"info/global_step": 351, "train_info/time_within_train_step": 24.74362015724182, "step": 351} +{"train_info/time_between_train_steps": 0.005118608474731445, "step": 351} +{"info/global_step": 352, "train_info/time_within_train_step": 24.729747772216797, "step": 352} +{"train_info/time_between_train_steps": 0.005132436752319336, "step": 352} +{"info/global_step": 353, "train_info/time_within_train_step": 24.75915288925171, "step": 353} +{"train_info/time_between_train_steps": 0.005430698394775391, "step": 353} +{"info/global_step": 354, "train_info/time_within_train_step": 24.822343349456787, "step": 354} +{"train_info/time_between_train_steps": 0.0054264068603515625, "step": 354} +{"info/global_step": 355, "train_info/time_within_train_step": 24.841840028762817, "step": 355} +{"train_info/time_between_train_steps": 0.005342721939086914, "step": 355} +{"info/global_step": 356, "train_info/time_within_train_step": 24.77834939956665, "step": 356} +{"train_info/time_between_train_steps": 0.0055446624755859375, "step": 356} +{"info/global_step": 357, "train_info/time_within_train_step": 24.74046301841736, "step": 357} +{"train_info/time_between_train_steps": 0.005637645721435547, "step": 357} +{"info/global_step": 358, "train_info/time_within_train_step": 24.763729572296143, "step": 358} +{"train_info/time_between_train_steps": 0.005702018737792969, "step": 358} +{"info/global_step": 359, "train_info/time_within_train_step": 24.819066286087036, "step": 359} +{"train_info/time_between_train_steps": 0.006487607955932617, "step": 359} +{"info/global_step": 360, "train_info/time_within_train_step": 24.785385131835938, "step": 360} +{"train_info/time_between_train_steps": 0.006326436996459961, "step": 360} +{"train_info/time_between_train_steps": 16.70390510559082, "step": 360} +{"info/global_step": 361, "train_info/time_within_train_step": 24.748214960098267, "step": 361} +{"train_info/time_between_train_steps": 0.006617546081542969, "step": 361} +{"info/global_step": 362, "train_info/time_within_train_step": 24.92195749282837, "step": 362} +{"train_info/time_between_train_steps": 0.00549006462097168, "step": 362} +{"info/global_step": 363, "train_info/time_within_train_step": 24.81333827972412, "step": 363} +{"train_info/time_between_train_steps": 0.0052607059478759766, "step": 363} +{"info/global_step": 364, "train_info/time_within_train_step": 24.911192417144775, "step": 364} +{"train_info/time_between_train_steps": 0.005359649658203125, "step": 364} +{"info/global_step": 365, "train_info/time_within_train_step": 24.76346182823181, "step": 365} +{"train_info/time_between_train_steps": 0.005571126937866211, "step": 365} +{"info/global_step": 366, "train_info/time_within_train_step": 24.900421380996704, "step": 366} +{"train_info/time_between_train_steps": 0.00544428825378418, "step": 366} +{"info/global_step": 367, "train_info/time_within_train_step": 24.752030611038208, "step": 367} +{"train_info/time_between_train_steps": 0.005732297897338867, "step": 367} +{"info/global_step": 368, "train_info/time_within_train_step": 24.91304874420166, "step": 368} +{"train_info/time_between_train_steps": 0.00522303581237793, "step": 368} +{"info/global_step": 369, "train_info/time_within_train_step": 24.75371289253235, "step": 369} +{"train_info/time_between_train_steps": 0.005692481994628906, "step": 369} +{"info/global_step": 370, "train_info/time_within_train_step": 25.046132564544678, "step": 370} +{"train_info/time_between_train_steps": 0.005276918411254883, "step": 370} +{"info/global_step": 371, "train_info/time_within_train_step": 24.752314805984497, "step": 371} +{"train_info/time_between_train_steps": 0.005896091461181641, "step": 371} +{"info/global_step": 372, "train_info/time_within_train_step": 24.889564514160156, "step": 372} +{"train_info/time_between_train_steps": 0.005309104919433594, "step": 372} +{"info/global_step": 373, "train_info/time_within_train_step": 24.743955612182617, "step": 373} +{"train_info/time_between_train_steps": 0.010260820388793945, "step": 373} +{"info/global_step": 374, "train_info/time_within_train_step": 24.92668128013611, "step": 374} +{"train_info/time_between_train_steps": 0.005247592926025391, "step": 374} +{"info/global_step": 375, "train_info/time_within_train_step": 24.872409343719482, "step": 375} +{"train_info/time_between_train_steps": 0.01070857048034668, "step": 375} +{"info/global_step": 376, "train_info/time_within_train_step": 24.840129375457764, "step": 376} +{"train_info/time_between_train_steps": 0.005439281463623047, "step": 376} +{"info/global_step": 377, "train_info/time_within_train_step": 24.768635272979736, "step": 377} +{"train_info/time_between_train_steps": 0.02962327003479004, "step": 377} +{"info/global_step": 378, "train_info/time_within_train_step": 24.7188138961792, "step": 378} +{"train_info/time_between_train_steps": 0.004959821701049805, "step": 378} +{"info/global_step": 379, "train_info/time_within_train_step": 24.732945442199707, "step": 379} +{"train_info/time_between_train_steps": 0.0051097869873046875, "step": 379} +{"info/global_step": 380, "train_info/time_within_train_step": 24.77721881866455, "step": 380} +{"train_info/time_between_train_steps": 0.005392789840698242, "step": 380} +{"info/global_step": 381, "train_info/time_within_train_step": 24.726751804351807, "step": 381} +{"train_info/time_between_train_steps": 0.005032777786254883, "step": 381} +{"info/global_step": 382, "train_info/time_within_train_step": 24.801753520965576, "step": 382} +{"train_info/time_between_train_steps": 0.005068540573120117, "step": 382} +{"info/global_step": 383, "train_info/time_within_train_step": 24.73218297958374, "step": 383} +{"train_info/time_between_train_steps": 0.005261659622192383, "step": 383} +{"info/global_step": 384, "train_info/time_within_train_step": 24.736559867858887, "step": 384} +{"train_info/time_between_train_steps": 0.005189657211303711, "step": 384} +{"info/global_step": 385, "train_info/time_within_train_step": 24.806463479995728, "step": 385} +{"train_info/time_between_train_steps": 0.0052089691162109375, "step": 385} +{"info/global_step": 386, "train_info/time_within_train_step": 24.82623052597046, "step": 386} +{"train_info/time_between_train_steps": 0.005247831344604492, "step": 386} +{"info/global_step": 387, "train_info/time_within_train_step": 24.76849913597107, "step": 387} +{"train_info/time_between_train_steps": 0.005193233489990234, "step": 387} +{"info/global_step": 388, "train_info/time_within_train_step": 24.743777990341187, "step": 388} +{"train_info/time_between_train_steps": 0.0051746368408203125, "step": 388} +{"info/global_step": 389, "train_info/time_within_train_step": 24.75203227996826, "step": 389} +{"train_info/time_between_train_steps": 0.0053327083587646484, "step": 389} +{"info/global_step": 390, "train_info/time_within_train_step": 24.75908064842224, "step": 390} +{"train_info/time_between_train_steps": 0.005067586898803711, "step": 390} +{"info/global_step": 391, "train_info/time_within_train_step": 24.75637149810791, "step": 391} +{"train_info/time_between_train_steps": 0.005255222320556641, "step": 391} +{"info/global_step": 392, "train_info/time_within_train_step": 24.845572471618652, "step": 392} +{"train_info/time_between_train_steps": 0.005064725875854492, "step": 392} +{"info/global_step": 393, "train_info/time_within_train_step": 24.736787796020508, "step": 393} +{"train_info/time_between_train_steps": 0.005313873291015625, "step": 393} +{"info/global_step": 394, "train_info/time_within_train_step": 24.77017092704773, "step": 394} +{"train_info/time_between_train_steps": 0.007596254348754883, "step": 394} +{"info/global_step": 395, "train_info/time_within_train_step": 24.778963565826416, "step": 395} +{"train_info/time_between_train_steps": 0.0059087276458740234, "step": 395} +{"info/global_step": 396, "train_info/time_within_train_step": 24.789217710494995, "step": 396} +{"train_info/time_between_train_steps": 0.00604701042175293, "step": 396} +{"train_info/time_between_train_steps": 16.89971947669983, "step": 396} +{"info/global_step": 397, "train_info/time_within_train_step": 24.718884706497192, "step": 397} +{"train_info/time_between_train_steps": 0.004892587661743164, "step": 397} +{"info/global_step": 398, "train_info/time_within_train_step": 24.825684785842896, "step": 398} +{"train_info/time_between_train_steps": 0.005044221878051758, "step": 398} +{"info/global_step": 399, "train_info/time_within_train_step": 24.775736808776855, "step": 399} +{"train_info/time_between_train_steps": 0.0058209896087646484, "step": 399} +{"info/global_step": 400, "train_info/time_within_train_step": 25.331448554992676, "step": 400} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740932964, "_runtime": 10471}, "step": 400} +{"logs": {"train/loss": 3.4876, "train/learning_rate": 0.00044444444444444436, "train/epoch": 11.0, "_timestamp": 1740932964, "_runtime": 10471}, "step": 400} +{"train_info/time_between_train_steps": 76.81318211555481, "step": 400} +{"info/global_step": 401, "train_info/time_within_train_step": 24.826761960983276, "step": 401} +{"train_info/time_between_train_steps": 0.00517582893371582, "step": 401} +{"info/global_step": 402, "train_info/time_within_train_step": 24.89307475090027, "step": 402} +{"train_info/time_between_train_steps": 0.005380392074584961, "step": 402} +{"info/global_step": 403, "train_info/time_within_train_step": 24.73485517501831, "step": 403} +{"train_info/time_between_train_steps": 0.0053136348724365234, "step": 403} +{"info/global_step": 404, "train_info/time_within_train_step": 24.862119674682617, "step": 404} +{"train_info/time_between_train_steps": 0.005083799362182617, "step": 404} +{"info/global_step": 405, "train_info/time_within_train_step": 24.832298517227173, "step": 405} +{"train_info/time_between_train_steps": 0.005467891693115234, "step": 405} +{"info/global_step": 406, "train_info/time_within_train_step": 24.850717067718506, "step": 406} +{"train_info/time_between_train_steps": 0.005061626434326172, "step": 406} +{"info/global_step": 407, "train_info/time_within_train_step": 24.736642837524414, "step": 407} +{"train_info/time_between_train_steps": 0.005222320556640625, "step": 407} +{"info/global_step": 408, "train_info/time_within_train_step": 24.983750581741333, "step": 408} +{"train_info/time_between_train_steps": 0.00539088249206543, "step": 408} +{"info/global_step": 409, "train_info/time_within_train_step": 24.748112201690674, "step": 409} +{"train_info/time_between_train_steps": 0.006181955337524414, "step": 409} +{"info/global_step": 410, "train_info/time_within_train_step": 24.851125240325928, "step": 410} +{"train_info/time_between_train_steps": 0.004998445510864258, "step": 410} +{"info/global_step": 411, "train_info/time_within_train_step": 24.74228572845459, "step": 411} +{"train_info/time_between_train_steps": 0.005206584930419922, "step": 411} +{"info/global_step": 412, "train_info/time_within_train_step": 25.070828914642334, "step": 412} +{"train_info/time_between_train_steps": 0.0052490234375, "step": 412} +{"info/global_step": 413, "train_info/time_within_train_step": 24.72233533859253, "step": 413} +{"train_info/time_between_train_steps": 0.019162893295288086, "step": 413} +{"info/global_step": 414, "train_info/time_within_train_step": 24.708271741867065, "step": 414} +{"train_info/time_between_train_steps": 0.004736900329589844, "step": 414} +{"info/global_step": 415, "train_info/time_within_train_step": 24.777758836746216, "step": 415} +{"train_info/time_between_train_steps": 0.004982709884643555, "step": 415} +{"info/global_step": 416, "train_info/time_within_train_step": 24.84083342552185, "step": 416} +{"train_info/time_between_train_steps": 0.005144596099853516, "step": 416} +{"info/global_step": 417, "train_info/time_within_train_step": 24.721797943115234, "step": 417} +{"train_info/time_between_train_steps": 0.005014896392822266, "step": 417} +{"info/global_step": 418, "train_info/time_within_train_step": 24.72531270980835, "step": 418} +{"train_info/time_between_train_steps": 0.004987239837646484, "step": 418} +{"info/global_step": 419, "train_info/time_within_train_step": 24.734859228134155, "step": 419} +{"train_info/time_between_train_steps": 0.005064249038696289, "step": 419} +{"info/global_step": 420, "train_info/time_within_train_step": 24.820607900619507, "step": 420} +{"train_info/time_between_train_steps": 0.005242586135864258, "step": 420} +{"info/global_step": 421, "train_info/time_within_train_step": 24.72376275062561, "step": 421} +{"train_info/time_between_train_steps": 0.005097866058349609, "step": 421} +{"info/global_step": 422, "train_info/time_within_train_step": 24.802992820739746, "step": 422} +{"train_info/time_between_train_steps": 0.009615659713745117, "step": 422} +{"info/global_step": 423, "train_info/time_within_train_step": 24.72078585624695, "step": 423} +{"train_info/time_between_train_steps": 0.009650945663452148, "step": 423} +{"info/global_step": 424, "train_info/time_within_train_step": 24.75128197669983, "step": 424} +{"train_info/time_between_train_steps": 0.009810924530029297, "step": 424} +{"info/global_step": 425, "train_info/time_within_train_step": 24.785635471343994, "step": 425} +{"train_info/time_between_train_steps": 0.005213260650634766, "step": 425} +{"info/global_step": 426, "train_info/time_within_train_step": 24.746288537979126, "step": 426} +{"train_info/time_between_train_steps": 0.005046844482421875, "step": 426} +{"info/global_step": 427, "train_info/time_within_train_step": 24.79993200302124, "step": 427} +{"train_info/time_between_train_steps": 0.0098724365234375, "step": 427} +{"info/global_step": 428, "train_info/time_within_train_step": 24.745992422103882, "step": 428} +{"train_info/time_between_train_steps": 0.005311012268066406, "step": 428} +{"info/global_step": 429, "train_info/time_within_train_step": 24.78555178642273, "step": 429} +{"train_info/time_between_train_steps": 0.0051920413970947266, "step": 429} +{"info/global_step": 430, "train_info/time_within_train_step": 24.746751070022583, "step": 430} +{"train_info/time_between_train_steps": 0.005337238311767578, "step": 430} +{"info/global_step": 431, "train_info/time_within_train_step": 24.78028392791748, "step": 431} +{"train_info/time_between_train_steps": 0.006041288375854492, "step": 431} +{"info/global_step": 432, "train_info/time_within_train_step": 24.963714599609375, "step": 432} +{"train_info/time_between_train_steps": 0.0058441162109375, "step": 432} +{"train_info/time_between_train_steps": 17.07774829864502, "step": 432} +{"info/global_step": 433, "train_info/time_within_train_step": 24.740321397781372, "step": 433} +{"train_info/time_between_train_steps": 0.005276203155517578, "step": 433} +{"info/global_step": 434, "train_info/time_within_train_step": 24.890523195266724, "step": 434} +{"train_info/time_between_train_steps": 0.005308628082275391, "step": 434} +{"info/global_step": 435, "train_info/time_within_train_step": 24.744062900543213, "step": 435} +{"train_info/time_between_train_steps": 0.0052454471588134766, "step": 435} +{"info/global_step": 436, "train_info/time_within_train_step": 24.891456842422485, "step": 436} +{"train_info/time_between_train_steps": 0.014351606369018555, "step": 436} +{"info/global_step": 437, "train_info/time_within_train_step": 24.816036462783813, "step": 437} +{"train_info/time_between_train_steps": 0.00525212287902832, "step": 437} +{"info/global_step": 438, "train_info/time_within_train_step": 24.874692916870117, "step": 438} +{"train_info/time_between_train_steps": 0.005326271057128906, "step": 438} +{"info/global_step": 439, "train_info/time_within_train_step": 24.73322558403015, "step": 439} +{"train_info/time_between_train_steps": 0.005181074142456055, "step": 439} +{"info/global_step": 440, "train_info/time_within_train_step": 24.909196138381958, "step": 440} +{"train_info/time_between_train_steps": 0.005414724349975586, "step": 440} +{"info/global_step": 441, "train_info/time_within_train_step": 24.74953556060791, "step": 441} +{"train_info/time_between_train_steps": 0.005330085754394531, "step": 441} +{"info/global_step": 442, "train_info/time_within_train_step": 24.85993003845215, "step": 442} +{"train_info/time_between_train_steps": 0.0050373077392578125, "step": 442} +{"info/global_step": 443, "train_info/time_within_train_step": 24.87210988998413, "step": 443} +{"train_info/time_between_train_steps": 0.005506277084350586, "step": 443} +{"info/global_step": 444, "train_info/time_within_train_step": 24.867829084396362, "step": 444} +{"train_info/time_between_train_steps": 0.005308389663696289, "step": 444} +{"info/global_step": 445, "train_info/time_within_train_step": 24.737187385559082, "step": 445} +{"train_info/time_between_train_steps": 0.005300998687744141, "step": 445} +{"info/global_step": 446, "train_info/time_within_train_step": 24.964707374572754, "step": 446} +{"train_info/time_between_train_steps": 0.0053250789642333984, "step": 446} +{"info/global_step": 447, "train_info/time_within_train_step": 24.852619647979736, "step": 447} +{"train_info/time_between_train_steps": 0.009459733963012695, "step": 447} +{"info/global_step": 448, "train_info/time_within_train_step": 24.819602489471436, "step": 448} +{"train_info/time_between_train_steps": 0.005356311798095703, "step": 448} +{"info/global_step": 449, "train_info/time_within_train_step": 24.74036955833435, "step": 449} +{"train_info/time_between_train_steps": 0.03724408149719238, "step": 449} +{"info/global_step": 450, "train_info/time_within_train_step": 24.734694480895996, "step": 450} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740934300, "_runtime": 11807}, "step": 450} +{"logs": {"train/loss": 3.3228, "train/learning_rate": 0.00041666666666666664, "train/epoch": 12.02, "_timestamp": 1740934300, "_runtime": 11807}, "step": 450} +{"train_info/time_between_train_steps": 0.026086091995239258, "step": 450} +{"info/global_step": 451, "train_info/time_within_train_step": 24.722829818725586, "step": 451} +{"train_info/time_between_train_steps": 0.0048885345458984375, "step": 451} +{"info/global_step": 452, "train_info/time_within_train_step": 24.719696044921875, "step": 452} +{"train_info/time_between_train_steps": 0.005017518997192383, "step": 452} +{"info/global_step": 453, "train_info/time_within_train_step": 24.727006912231445, "step": 453} +{"train_info/time_between_train_steps": 0.005115032196044922, "step": 453} +{"info/global_step": 454, "train_info/time_within_train_step": 24.72318196296692, "step": 454} +{"train_info/time_between_train_steps": 0.00975942611694336, "step": 454} +{"info/global_step": 455, "train_info/time_within_train_step": 24.722861766815186, "step": 455} +{"train_info/time_between_train_steps": 0.004986286163330078, "step": 455} +{"info/global_step": 456, "train_info/time_within_train_step": 24.724376440048218, "step": 456} +{"train_info/time_between_train_steps": 0.005158424377441406, "step": 456} +{"info/global_step": 457, "train_info/time_within_train_step": 24.728564739227295, "step": 457} +{"train_info/time_between_train_steps": 0.005182504653930664, "step": 457} +{"info/global_step": 458, "train_info/time_within_train_step": 24.730761528015137, "step": 458} +{"train_info/time_between_train_steps": 0.013582229614257812, "step": 458} +{"info/global_step": 459, "train_info/time_within_train_step": 24.996273517608643, "step": 459} +{"train_info/time_between_train_steps": 0.005757570266723633, "step": 459} +{"info/global_step": 460, "train_info/time_within_train_step": 24.726360321044922, "step": 460} +{"train_info/time_between_train_steps": 0.005042314529418945, "step": 460} +{"info/global_step": 461, "train_info/time_within_train_step": 24.72841191291809, "step": 461} +{"train_info/time_between_train_steps": 0.005257844924926758, "step": 461} +{"info/global_step": 462, "train_info/time_within_train_step": 24.726324558258057, "step": 462} +{"train_info/time_between_train_steps": 0.004974842071533203, "step": 462} +{"info/global_step": 463, "train_info/time_within_train_step": 24.818670988082886, "step": 463} +{"train_info/time_between_train_steps": 0.00497126579284668, "step": 463} +{"info/global_step": 464, "train_info/time_within_train_step": 24.727489471435547, "step": 464} +{"train_info/time_between_train_steps": 0.005103111267089844, "step": 464} +{"info/global_step": 465, "train_info/time_within_train_step": 24.737344980239868, "step": 465} +{"train_info/time_between_train_steps": 0.005300045013427734, "step": 465} +{"info/global_step": 466, "train_info/time_within_train_step": 24.762330770492554, "step": 466} +{"train_info/time_between_train_steps": 0.005414485931396484, "step": 466} +{"info/global_step": 467, "train_info/time_within_train_step": 24.74667239189148, "step": 467} +{"train_info/time_between_train_steps": 0.006577253341674805, "step": 467} +{"info/global_step": 468, "train_info/time_within_train_step": 24.767557382583618, "step": 468} +{"train_info/time_between_train_steps": 0.005872964859008789, "step": 468} +{"train_info/time_between_train_steps": 16.677207231521606, "step": 468} +{"info/global_step": 469, "train_info/time_within_train_step": 24.803948402404785, "step": 469} +{"train_info/time_between_train_steps": 0.005022764205932617, "step": 469} +{"info/global_step": 470, "train_info/time_within_train_step": 24.867295026779175, "step": 470} +{"train_info/time_between_train_steps": 0.005033969879150391, "step": 470} +{"info/global_step": 471, "train_info/time_within_train_step": 24.750385761260986, "step": 471} +{"train_info/time_between_train_steps": 0.005127668380737305, "step": 471} +{"info/global_step": 472, "train_info/time_within_train_step": 24.93746590614319, "step": 472} +{"train_info/time_between_train_steps": 0.0054204463958740234, "step": 472} +{"info/global_step": 473, "train_info/time_within_train_step": 24.748306035995483, "step": 473} +{"train_info/time_between_train_steps": 0.005203962326049805, "step": 473} +{"info/global_step": 474, "train_info/time_within_train_step": 24.925850868225098, "step": 474} +{"train_info/time_between_train_steps": 0.005079746246337891, "step": 474} +{"info/global_step": 475, "train_info/time_within_train_step": 24.750608682632446, "step": 475} +{"train_info/time_between_train_steps": 0.005316495895385742, "step": 475} +{"info/global_step": 476, "train_info/time_within_train_step": 25.09683322906494, "step": 476} +{"train_info/time_between_train_steps": 0.00530242919921875, "step": 476} +{"info/global_step": 477, "train_info/time_within_train_step": 24.746479749679565, "step": 477} +{"train_info/time_between_train_steps": 0.005099773406982422, "step": 477} +{"info/global_step": 478, "train_info/time_within_train_step": 24.97159457206726, "step": 478} +{"train_info/time_between_train_steps": 0.00508880615234375, "step": 478} +{"info/global_step": 479, "train_info/time_within_train_step": 24.74760603904724, "step": 479} +{"train_info/time_between_train_steps": 0.005551815032958984, "step": 479} +{"info/global_step": 480, "train_info/time_within_train_step": 24.90778350830078, "step": 480} +{"train_info/time_between_train_steps": 0.005378007888793945, "step": 480} +{"info/global_step": 481, "train_info/time_within_train_step": 24.796776056289673, "step": 481} +{"train_info/time_between_train_steps": 0.005512714385986328, "step": 481} +{"info/global_step": 482, "train_info/time_within_train_step": 24.854191541671753, "step": 482} +{"train_info/time_between_train_steps": 0.005064964294433594, "step": 482} +{"info/global_step": 483, "train_info/time_within_train_step": 24.750453233718872, "step": 483} +{"train_info/time_between_train_steps": 0.005523204803466797, "step": 483} +{"info/global_step": 484, "train_info/time_within_train_step": 24.874800443649292, "step": 484} +{"train_info/time_between_train_steps": 0.005319356918334961, "step": 484} +{"info/global_step": 485, "train_info/time_within_train_step": 24.749821186065674, "step": 485} +{"train_info/time_between_train_steps": 0.024091482162475586, "step": 485} +{"info/global_step": 486, "train_info/time_within_train_step": 24.73492407798767, "step": 486} +{"train_info/time_between_train_steps": 0.005177021026611328, "step": 486} +{"info/global_step": 487, "train_info/time_within_train_step": 24.73156499862671, "step": 487} +{"train_info/time_between_train_steps": 0.005067110061645508, "step": 487} +{"info/global_step": 488, "train_info/time_within_train_step": 24.73649263381958, "step": 488} +{"train_info/time_between_train_steps": 0.0050106048583984375, "step": 488} +{"info/global_step": 489, "train_info/time_within_train_step": 24.728426456451416, "step": 489} +{"train_info/time_between_train_steps": 0.005023479461669922, "step": 489} +{"info/global_step": 490, "train_info/time_within_train_step": 24.736115217208862, "step": 490} +{"train_info/time_between_train_steps": 0.00520777702331543, "step": 490} +{"info/global_step": 491, "train_info/time_within_train_step": 24.802594900131226, "step": 491} +{"train_info/time_between_train_steps": 0.005124092102050781, "step": 491} +{"info/global_step": 492, "train_info/time_within_train_step": 24.73031449317932, "step": 492} +{"train_info/time_between_train_steps": 0.005076169967651367, "step": 492} +{"info/global_step": 493, "train_info/time_within_train_step": 24.83853244781494, "step": 493} +{"train_info/time_between_train_steps": 0.005272626876831055, "step": 493} +{"info/global_step": 494, "train_info/time_within_train_step": 24.733875036239624, "step": 494} +{"train_info/time_between_train_steps": 0.00717616081237793, "step": 494} +{"info/global_step": 495, "train_info/time_within_train_step": 24.729239463806152, "step": 495} +{"train_info/time_between_train_steps": 0.005280256271362305, "step": 495} +{"info/global_step": 496, "train_info/time_within_train_step": 24.78907871246338, "step": 496} +{"train_info/time_between_train_steps": 0.005021572113037109, "step": 496} +{"info/global_step": 497, "train_info/time_within_train_step": 24.73161268234253, "step": 497} +{"train_info/time_between_train_steps": 0.005076885223388672, "step": 497} +{"info/global_step": 498, "train_info/time_within_train_step": 26.43409013748169, "step": 498} +{"train_info/time_between_train_steps": 0.00550079345703125, "step": 498} +{"info/global_step": 499, "train_info/time_within_train_step": 24.760005474090576, "step": 499} +{"train_info/time_between_train_steps": 0.005096435546875, "step": 499} +{"info/global_step": 500, "train_info/time_within_train_step": 24.72420072555542, "step": 500} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740935560, "_runtime": 13067}, "step": 500} +{"logs": {"train/loss": 3.2223, "train/learning_rate": 0.00038888888888888887, "train/epoch": 13.03, "_timestamp": 1740935560, "_runtime": 13067}, "step": 500} +{"train_info/time_between_train_steps": 68.96880793571472, "step": 500} +{"info/global_step": 501, "train_info/time_within_train_step": 25.007120370864868, "step": 501} +{"train_info/time_between_train_steps": 0.010151147842407227, "step": 501} +{"info/global_step": 502, "train_info/time_within_train_step": 24.75459384918213, "step": 502} +{"train_info/time_between_train_steps": 0.005455732345581055, "step": 502} +{"info/global_step": 503, "train_info/time_within_train_step": 24.81337809562683, "step": 503} +{"train_info/time_between_train_steps": 0.005633354187011719, "step": 503} +{"info/global_step": 504, "train_info/time_within_train_step": 24.78805708885193, "step": 504} +{"train_info/time_between_train_steps": 0.0060617923736572266, "step": 504} +{"train_info/time_between_train_steps": 16.87895369529724, "step": 504} +{"info/global_step": 505, "train_info/time_within_train_step": 24.720951795578003, "step": 505} +{"train_info/time_between_train_steps": 0.0050585269927978516, "step": 505} +{"info/global_step": 506, "train_info/time_within_train_step": 24.869980335235596, "step": 506} +{"train_info/time_between_train_steps": 0.005068063735961914, "step": 506} +{"info/global_step": 507, "train_info/time_within_train_step": 24.74945592880249, "step": 507} +{"train_info/time_between_train_steps": 0.010198831558227539, "step": 507} +{"info/global_step": 508, "train_info/time_within_train_step": 24.957356214523315, "step": 508} +{"train_info/time_between_train_steps": 0.0053637027740478516, "step": 508} +{"info/global_step": 509, "train_info/time_within_train_step": 24.810423135757446, "step": 509} +{"train_info/time_between_train_steps": 0.010007858276367188, "step": 509} +{"info/global_step": 510, "train_info/time_within_train_step": 24.853644609451294, "step": 510} +{"train_info/time_between_train_steps": 0.007728099822998047, "step": 510} +{"info/global_step": 511, "train_info/time_within_train_step": 24.7387752532959, "step": 511} +{"train_info/time_between_train_steps": 0.010348796844482422, "step": 511} +{"info/global_step": 512, "train_info/time_within_train_step": 24.8867506980896, "step": 512} +{"train_info/time_between_train_steps": 0.005188703536987305, "step": 512} +{"info/global_step": 513, "train_info/time_within_train_step": 24.738613605499268, "step": 513} +{"train_info/time_between_train_steps": 0.005156517028808594, "step": 513} +{"info/global_step": 514, "train_info/time_within_train_step": 24.8778076171875, "step": 514} +{"train_info/time_between_train_steps": 0.005337715148925781, "step": 514} +{"info/global_step": 515, "train_info/time_within_train_step": 24.748167514801025, "step": 515} +{"train_info/time_between_train_steps": 0.005418300628662109, "step": 515} +{"info/global_step": 516, "train_info/time_within_train_step": 25.039752960205078, "step": 516} +{"train_info/time_between_train_steps": 0.0051784515380859375, "step": 516} +{"info/global_step": 517, "train_info/time_within_train_step": 24.779347896575928, "step": 517} +{"train_info/time_between_train_steps": 0.005670785903930664, "step": 517} +{"info/global_step": 518, "train_info/time_within_train_step": 24.886354446411133, "step": 518} +{"train_info/time_between_train_steps": 0.0052258968353271484, "step": 518} +{"info/global_step": 519, "train_info/time_within_train_step": 24.731343030929565, "step": 519} +{"train_info/time_between_train_steps": 0.005227088928222656, "step": 519} +{"info/global_step": 520, "train_info/time_within_train_step": 24.82636857032776, "step": 520} +{"train_info/time_between_train_steps": 0.005508899688720703, "step": 520} +{"info/global_step": 521, "train_info/time_within_train_step": 24.759292125701904, "step": 521} +{"train_info/time_between_train_steps": 0.022135257720947266, "step": 521} +{"info/global_step": 522, "train_info/time_within_train_step": 24.724204540252686, "step": 522} +{"train_info/time_between_train_steps": 0.013526201248168945, "step": 522} +{"info/global_step": 523, "train_info/time_within_train_step": 25.069180250167847, "step": 523} +{"train_info/time_between_train_steps": 0.01393890380859375, "step": 523} +{"info/global_step": 524, "train_info/time_within_train_step": 25.19942617416382, "step": 524} +{"train_info/time_between_train_steps": 0.013825416564941406, "step": 524} +{"info/global_step": 525, "train_info/time_within_train_step": 24.92778968811035, "step": 525} +{"train_info/time_between_train_steps": 0.009188175201416016, "step": 525} +{"info/global_step": 526, "train_info/time_within_train_step": 24.736388206481934, "step": 526} +{"train_info/time_between_train_steps": 0.004992961883544922, "step": 526} +{"info/global_step": 527, "train_info/time_within_train_step": 24.727646350860596, "step": 527} +{"train_info/time_between_train_steps": 0.005151033401489258, "step": 527} +{"info/global_step": 528, "train_info/time_within_train_step": 24.72180724143982, "step": 528} +{"train_info/time_between_train_steps": 0.0052089691162109375, "step": 528} +{"info/global_step": 529, "train_info/time_within_train_step": 24.723212242126465, "step": 529} +{"train_info/time_between_train_steps": 0.005286455154418945, "step": 529} +{"info/global_step": 530, "train_info/time_within_train_step": 24.72964906692505, "step": 530} +{"train_info/time_between_train_steps": 0.005196571350097656, "step": 530} +{"info/global_step": 531, "train_info/time_within_train_step": 24.767659902572632, "step": 531} +{"train_info/time_between_train_steps": 0.005034208297729492, "step": 531} +{"info/global_step": 532, "train_info/time_within_train_step": 24.73479199409485, "step": 532} +{"train_info/time_between_train_steps": 0.00513458251953125, "step": 532} +{"info/global_step": 533, "train_info/time_within_train_step": 24.80782961845398, "step": 533} +{"train_info/time_between_train_steps": 0.005175590515136719, "step": 533} +{"info/global_step": 534, "train_info/time_within_train_step": 24.723798036575317, "step": 534} +{"train_info/time_between_train_steps": 0.005029439926147461, "step": 534} +{"info/global_step": 535, "train_info/time_within_train_step": 24.735053539276123, "step": 535} +{"train_info/time_between_train_steps": 0.005141019821166992, "step": 535} +{"info/global_step": 536, "train_info/time_within_train_step": 24.81102752685547, "step": 536} +{"train_info/time_between_train_steps": 0.005276918411254883, "step": 536} +{"info/global_step": 537, "train_info/time_within_train_step": 24.760096549987793, "step": 537} +{"train_info/time_between_train_steps": 0.005114555358886719, "step": 537} +{"info/global_step": 538, "train_info/time_within_train_step": 24.806875944137573, "step": 538} +{"train_info/time_between_train_steps": 0.005437374114990234, "step": 538} +{"info/global_step": 539, "train_info/time_within_train_step": 24.749807596206665, "step": 539} +{"train_info/time_between_train_steps": 0.005788564682006836, "step": 539} +{"info/global_step": 540, "train_info/time_within_train_step": 24.85267925262451, "step": 540} +{"train_info/time_between_train_steps": 0.006162405014038086, "step": 540} +{"train_info/time_between_train_steps": 17.006163597106934, "step": 540} +{"info/global_step": 541, "train_info/time_within_train_step": 24.736087799072266, "step": 541} +{"train_info/time_between_train_steps": 0.00530695915222168, "step": 541} +{"info/global_step": 542, "train_info/time_within_train_step": 24.926260471343994, "step": 542} +{"train_info/time_between_train_steps": 0.005255699157714844, "step": 542} +{"info/global_step": 543, "train_info/time_within_train_step": 24.726115942001343, "step": 543} +{"train_info/time_between_train_steps": 0.0050165653228759766, "step": 543} +{"info/global_step": 544, "train_info/time_within_train_step": 24.862373113632202, "step": 544} +{"train_info/time_between_train_steps": 0.0052394866943359375, "step": 544} +{"info/global_step": 545, "train_info/time_within_train_step": 24.798430919647217, "step": 545} +{"train_info/time_between_train_steps": 0.005204677581787109, "step": 545} +{"info/global_step": 546, "train_info/time_within_train_step": 24.85863733291626, "step": 546} +{"train_info/time_between_train_steps": 0.005276679992675781, "step": 546} +{"info/global_step": 547, "train_info/time_within_train_step": 24.81388759613037, "step": 547} +{"train_info/time_between_train_steps": 0.005247831344604492, "step": 547} +{"info/global_step": 548, "train_info/time_within_train_step": 24.87496042251587, "step": 548} +{"train_info/time_between_train_steps": 0.005299806594848633, "step": 548} +{"info/global_step": 549, "train_info/time_within_train_step": 24.746588468551636, "step": 549} +{"train_info/time_between_train_steps": 0.005301237106323242, "step": 549} +{"info/global_step": 550, "train_info/time_within_train_step": 24.849151849746704, "step": 550} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740936905, "_runtime": 14412}, "step": 550} +{"logs": {"train/loss": 3.1724, "train/learning_rate": 0.0003611111111111111, "train/epoch": 15.01, "_timestamp": 1740936905, "_runtime": 14412}, "step": 550} +{"train_info/time_between_train_steps": 0.031647682189941406, "step": 550} +{"info/global_step": 551, "train_info/time_within_train_step": 24.74155306816101, "step": 551} +{"train_info/time_between_train_steps": 0.005282878875732422, "step": 551} +{"info/global_step": 552, "train_info/time_within_train_step": 24.8587806224823, "step": 552} +{"train_info/time_between_train_steps": 0.005437135696411133, "step": 552} +{"info/global_step": 553, "train_info/time_within_train_step": 24.96762776374817, "step": 553} +{"train_info/time_between_train_steps": 0.005064964294433594, "step": 553} +{"info/global_step": 554, "train_info/time_within_train_step": 24.885009765625, "step": 554} +{"train_info/time_between_train_steps": 0.005101203918457031, "step": 554} +{"info/global_step": 555, "train_info/time_within_train_step": 24.83517074584961, "step": 555} +{"train_info/time_between_train_steps": 0.0049250125885009766, "step": 555} +{"info/global_step": 556, "train_info/time_within_train_step": 24.803467273712158, "step": 556} +{"train_info/time_between_train_steps": 0.005211591720581055, "step": 556} +{"info/global_step": 557, "train_info/time_within_train_step": 24.72118306159973, "step": 557} +{"train_info/time_between_train_steps": 0.022020816802978516, "step": 557} +{"info/global_step": 558, "train_info/time_within_train_step": 24.715293169021606, "step": 558} +{"train_info/time_between_train_steps": 0.004952907562255859, "step": 558} +{"info/global_step": 559, "train_info/time_within_train_step": 24.72656488418579, "step": 559} +{"train_info/time_between_train_steps": 0.005041360855102539, "step": 559} +{"info/global_step": 560, "train_info/time_within_train_step": 24.727290391921997, "step": 560} +{"train_info/time_between_train_steps": 0.004984140396118164, "step": 560} +{"info/global_step": 561, "train_info/time_within_train_step": 24.720921516418457, "step": 561} +{"train_info/time_between_train_steps": 0.005019426345825195, "step": 561} +{"info/global_step": 562, "train_info/time_within_train_step": 24.78933572769165, "step": 562} +{"train_info/time_between_train_steps": 0.005084514617919922, "step": 562} +{"info/global_step": 563, "train_info/time_within_train_step": 24.73223042488098, "step": 563} +{"train_info/time_between_train_steps": 0.005189418792724609, "step": 563} +{"info/global_step": 564, "train_info/time_within_train_step": 24.81226134300232, "step": 564} +{"train_info/time_between_train_steps": 0.00516200065612793, "step": 564} +{"info/global_step": 565, "train_info/time_within_train_step": 24.724648475646973, "step": 565} +{"train_info/time_between_train_steps": 0.0050852298736572266, "step": 565} +{"info/global_step": 566, "train_info/time_within_train_step": 24.737491846084595, "step": 566} +{"train_info/time_between_train_steps": 0.005052328109741211, "step": 566} +{"info/global_step": 567, "train_info/time_within_train_step": 24.796103477478027, "step": 567} +{"train_info/time_between_train_steps": 0.00518035888671875, "step": 567} +{"info/global_step": 568, "train_info/time_within_train_step": 24.73525905609131, "step": 568} +{"train_info/time_between_train_steps": 0.005025625228881836, "step": 568} +{"info/global_step": 569, "train_info/time_within_train_step": 24.788970470428467, "step": 569} +{"train_info/time_between_train_steps": 0.0050623416900634766, "step": 569} +{"info/global_step": 570, "train_info/time_within_train_step": 24.815572261810303, "step": 570} +{"train_info/time_between_train_steps": 0.005570650100708008, "step": 570} +{"info/global_step": 571, "train_info/time_within_train_step": 24.874919414520264, "step": 571} +{"train_info/time_between_train_steps": 0.005155324935913086, "step": 571} +{"info/global_step": 572, "train_info/time_within_train_step": 24.725632905960083, "step": 572} +{"train_info/time_between_train_steps": 0.0050585269927978516, "step": 572} +{"info/global_step": 573, "train_info/time_within_train_step": 24.77717113494873, "step": 573} +{"train_info/time_between_train_steps": 0.010311603546142578, "step": 573} +{"info/global_step": 574, "train_info/time_within_train_step": 24.740370273590088, "step": 574} +{"train_info/time_between_train_steps": 0.005450725555419922, "step": 574} +{"info/global_step": 575, "train_info/time_within_train_step": 24.75084161758423, "step": 575} +{"train_info/time_between_train_steps": 0.01109766960144043, "step": 575} +{"info/global_step": 576, "train_info/time_within_train_step": 24.862184286117554, "step": 576} +{"train_info/time_between_train_steps": 0.011320114135742188, "step": 576} +{"train_info/time_between_train_steps": 17.083806037902832, "step": 576} +{"info/global_step": 577, "train_info/time_within_train_step": 24.72799825668335, "step": 577} +{"train_info/time_between_train_steps": 0.004853248596191406, "step": 577} +{"info/global_step": 578, "train_info/time_within_train_step": 24.895591497421265, "step": 578} +{"train_info/time_between_train_steps": 0.004920482635498047, "step": 578} +{"info/global_step": 579, "train_info/time_within_train_step": 24.731446504592896, "step": 579} +{"train_info/time_between_train_steps": 0.0052721500396728516, "step": 579} +{"info/global_step": 580, "train_info/time_within_train_step": 24.885701656341553, "step": 580} +{"train_info/time_between_train_steps": 0.005364418029785156, "step": 580} +{"info/global_step": 581, "train_info/time_within_train_step": 24.729042768478394, "step": 581} +{"train_info/time_between_train_steps": 0.009942054748535156, "step": 581} +{"info/global_step": 582, "train_info/time_within_train_step": 24.845436334609985, "step": 582} +{"train_info/time_between_train_steps": 0.005342245101928711, "step": 582} +{"info/global_step": 583, "train_info/time_within_train_step": 24.736042022705078, "step": 583} +{"train_info/time_between_train_steps": 0.010282039642333984, "step": 583} +{"info/global_step": 584, "train_info/time_within_train_step": 24.858436584472656, "step": 584} +{"train_info/time_between_train_steps": 0.005132436752319336, "step": 584} +{"info/global_step": 585, "train_info/time_within_train_step": 24.849905967712402, "step": 585} +{"train_info/time_between_train_steps": 0.005239963531494141, "step": 585} +{"info/global_step": 586, "train_info/time_within_train_step": 24.904391765594482, "step": 586} +{"train_info/time_between_train_steps": 0.005347728729248047, "step": 586} +{"info/global_step": 587, "train_info/time_within_train_step": 24.74404811859131, "step": 587} +{"train_info/time_between_train_steps": 0.010250329971313477, "step": 587} +{"info/global_step": 588, "train_info/time_within_train_step": 24.851814031600952, "step": 588} +{"train_info/time_between_train_steps": 0.010134458541870117, "step": 588} +{"info/global_step": 589, "train_info/time_within_train_step": 24.756343603134155, "step": 589} +{"train_info/time_between_train_steps": 0.005461215972900391, "step": 589} +{"info/global_step": 590, "train_info/time_within_train_step": 24.905765533447266, "step": 590} +{"train_info/time_between_train_steps": 0.005249977111816406, "step": 590} +{"info/global_step": 591, "train_info/time_within_train_step": 24.93760919570923, "step": 591} +{"train_info/time_between_train_steps": 0.009962081909179688, "step": 591} +{"info/global_step": 592, "train_info/time_within_train_step": 24.822629690170288, "step": 592} +{"train_info/time_between_train_steps": 0.010261058807373047, "step": 592} +{"info/global_step": 593, "train_info/time_within_train_step": 24.73758292198181, "step": 593} +{"train_info/time_between_train_steps": 0.027427196502685547, "step": 593} +{"info/global_step": 594, "train_info/time_within_train_step": 24.739976406097412, "step": 594} +{"train_info/time_between_train_steps": 0.005027294158935547, "step": 594} +{"info/global_step": 595, "train_info/time_within_train_step": 24.72195863723755, "step": 595} +{"train_info/time_between_train_steps": 0.010063409805297852, "step": 595} +{"info/global_step": 596, "train_info/time_within_train_step": 24.721233367919922, "step": 596} +{"train_info/time_between_train_steps": 0.009927511215209961, "step": 596} +{"info/global_step": 597, "train_info/time_within_train_step": 26.933168411254883, "step": 597} +{"train_info/time_between_train_steps": 0.005367755889892578, "step": 597} +{"info/global_step": 598, "train_info/time_within_train_step": 24.7553071975708, "step": 598} +{"train_info/time_between_train_steps": 0.005185365676879883, "step": 598} +{"info/global_step": 599, "train_info/time_within_train_step": 24.742791652679443, "step": 599} +{"train_info/time_between_train_steps": 0.009752273559570312, "step": 599} +{"info/global_step": 600, "train_info/time_within_train_step": 24.95790934562683, "step": 600} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740938166, "_runtime": 15673}, "step": 600} +{"logs": {"train/loss": 3.0668, "train/learning_rate": 0.0003333333333333333, "train/epoch": 16.02, "_timestamp": 1740938166, "_runtime": 15673}, "step": 600} +{"train_info/time_between_train_steps": 53.943294048309326, "step": 600} +{"info/global_step": 601, "train_info/time_within_train_step": 24.825185298919678, "step": 601} +{"train_info/time_between_train_steps": 0.005177736282348633, "step": 601} +{"info/global_step": 602, "train_info/time_within_train_step": 24.766774892807007, "step": 602} +{"train_info/time_between_train_steps": 0.0050983428955078125, "step": 602} +{"info/global_step": 603, "train_info/time_within_train_step": 24.720672130584717, "step": 603} +{"train_info/time_between_train_steps": 0.005158662796020508, "step": 603} +{"info/global_step": 604, "train_info/time_within_train_step": 24.732523918151855, "step": 604} +{"train_info/time_between_train_steps": 0.005183696746826172, "step": 604} +{"info/global_step": 605, "train_info/time_within_train_step": 24.770317792892456, "step": 605} +{"train_info/time_between_train_steps": 0.005017280578613281, "step": 605} +{"info/global_step": 606, "train_info/time_within_train_step": 24.725789785385132, "step": 606} +{"train_info/time_between_train_steps": 0.00503993034362793, "step": 606} +{"info/global_step": 607, "train_info/time_within_train_step": 24.729480028152466, "step": 607} +{"train_info/time_between_train_steps": 0.005136966705322266, "step": 607} +{"info/global_step": 608, "train_info/time_within_train_step": 24.73749303817749, "step": 608} +{"train_info/time_between_train_steps": 0.0052337646484375, "step": 608} +{"info/global_step": 609, "train_info/time_within_train_step": 24.741074562072754, "step": 609} +{"train_info/time_between_train_steps": 0.009293079376220703, "step": 609} +{"info/global_step": 610, "train_info/time_within_train_step": 24.862913131713867, "step": 610} +{"train_info/time_between_train_steps": 0.0141448974609375, "step": 610} +{"info/global_step": 611, "train_info/time_within_train_step": 24.7557270526886, "step": 611} +{"train_info/time_between_train_steps": 0.005888462066650391, "step": 611} +{"info/global_step": 612, "train_info/time_within_train_step": 24.821255445480347, "step": 612} +{"train_info/time_between_train_steps": 0.0060961246490478516, "step": 612} +{"train_info/time_between_train_steps": 17.146178245544434, "step": 612} +{"info/global_step": 613, "train_info/time_within_train_step": 24.71886897087097, "step": 613} +{"train_info/time_between_train_steps": 0.004851579666137695, "step": 613} +{"info/global_step": 614, "train_info/time_within_train_step": 24.858389139175415, "step": 614} +{"train_info/time_between_train_steps": 0.0049593448638916016, "step": 614} +{"info/global_step": 615, "train_info/time_within_train_step": 24.71799612045288, "step": 615} +{"train_info/time_between_train_steps": 0.0049097537994384766, "step": 615} +{"info/global_step": 616, "train_info/time_within_train_step": 24.97925639152527, "step": 616} +{"train_info/time_between_train_steps": 0.005247831344604492, "step": 616} +{"info/global_step": 617, "train_info/time_within_train_step": 24.730900287628174, "step": 617} +{"train_info/time_between_train_steps": 0.0052738189697265625, "step": 617} +{"info/global_step": 618, "train_info/time_within_train_step": 24.854484796524048, "step": 618} +{"train_info/time_between_train_steps": 0.005094051361083984, "step": 618} +{"info/global_step": 619, "train_info/time_within_train_step": 24.765458822250366, "step": 619} +{"train_info/time_between_train_steps": 0.005110263824462891, "step": 619} +{"info/global_step": 620, "train_info/time_within_train_step": 24.873207330703735, "step": 620} +{"train_info/time_between_train_steps": 0.005151033401489258, "step": 620} +{"info/global_step": 621, "train_info/time_within_train_step": 24.783437728881836, "step": 621} +{"train_info/time_between_train_steps": 0.005501985549926758, "step": 621} +{"info/global_step": 622, "train_info/time_within_train_step": 24.877445697784424, "step": 622} +{"train_info/time_between_train_steps": 0.005273580551147461, "step": 622} +{"info/global_step": 623, "train_info/time_within_train_step": 24.73075222969055, "step": 623} +{"train_info/time_between_train_steps": 0.005230903625488281, "step": 623} +{"info/global_step": 624, "train_info/time_within_train_step": 24.848021030426025, "step": 624} +{"train_info/time_between_train_steps": 0.005190610885620117, "step": 624} +{"info/global_step": 625, "train_info/time_within_train_step": 24.737135648727417, "step": 625} +{"train_info/time_between_train_steps": 0.005462169647216797, "step": 625} +{"info/global_step": 626, "train_info/time_within_train_step": 24.953022480010986, "step": 626} +{"train_info/time_between_train_steps": 0.005179882049560547, "step": 626} +{"info/global_step": 627, "train_info/time_within_train_step": 24.727657318115234, "step": 627} +{"train_info/time_between_train_steps": 0.005054473876953125, "step": 627} +{"info/global_step": 628, "train_info/time_within_train_step": 24.81790256500244, "step": 628} +{"train_info/time_between_train_steps": 0.005541801452636719, "step": 628} +{"info/global_step": 629, "train_info/time_within_train_step": 24.736637830734253, "step": 629} +{"train_info/time_between_train_steps": 0.034850120544433594, "step": 629} +{"info/global_step": 630, "train_info/time_within_train_step": 24.724758625030518, "step": 630} +{"train_info/time_between_train_steps": 0.013625383377075195, "step": 630} +{"info/global_step": 631, "train_info/time_within_train_step": 24.957374811172485, "step": 631} +{"train_info/time_between_train_steps": 0.004919290542602539, "step": 631} +{"info/global_step": 632, "train_info/time_within_train_step": 24.839756965637207, "step": 632} +{"train_info/time_between_train_steps": 0.005040168762207031, "step": 632} +{"info/global_step": 633, "train_info/time_within_train_step": 24.796120405197144, "step": 633} +{"train_info/time_between_train_steps": 0.005197048187255859, "step": 633} +{"info/global_step": 634, "train_info/time_within_train_step": 24.737342596054077, "step": 634} +{"train_info/time_between_train_steps": 0.009673595428466797, "step": 634} +{"info/global_step": 635, "train_info/time_within_train_step": 24.727523803710938, "step": 635} +{"train_info/time_between_train_steps": 0.009692668914794922, "step": 635} +{"info/global_step": 636, "train_info/time_within_train_step": 24.74760890007019, "step": 636} +{"train_info/time_between_train_steps": 0.005335569381713867, "step": 636} +{"info/global_step": 637, "train_info/time_within_train_step": 24.727483987808228, "step": 637} +{"train_info/time_between_train_steps": 0.00611567497253418, "step": 637} +{"info/global_step": 638, "train_info/time_within_train_step": 24.788630723953247, "step": 638} +{"train_info/time_between_train_steps": 0.0059871673583984375, "step": 638} +{"info/global_step": 639, "train_info/time_within_train_step": 24.741323709487915, "step": 639} +{"train_info/time_between_train_steps": 0.0051479339599609375, "step": 639} +{"info/global_step": 640, "train_info/time_within_train_step": 24.79787302017212, "step": 640} +{"train_info/time_between_train_steps": 0.005178213119506836, "step": 640} +{"info/global_step": 641, "train_info/time_within_train_step": 24.720142364501953, "step": 641} +{"train_info/time_between_train_steps": 0.00982809066772461, "step": 641} +{"info/global_step": 642, "train_info/time_within_train_step": 24.742126941680908, "step": 642} +{"train_info/time_between_train_steps": 0.0049896240234375, "step": 642} +{"info/global_step": 643, "train_info/time_within_train_step": 24.79582929611206, "step": 643} +{"train_info/time_between_train_steps": 0.005075216293334961, "step": 643} +{"info/global_step": 644, "train_info/time_within_train_step": 24.748512268066406, "step": 644} +{"train_info/time_between_train_steps": 0.005439043045043945, "step": 644} +{"info/global_step": 645, "train_info/time_within_train_step": 24.810582160949707, "step": 645} +{"train_info/time_between_train_steps": 0.005064249038696289, "step": 645} +{"info/global_step": 646, "train_info/time_within_train_step": 24.729743003845215, "step": 646} +{"train_info/time_between_train_steps": 0.009938240051269531, "step": 646} +{"info/global_step": 647, "train_info/time_within_train_step": 24.853415727615356, "step": 647} +{"train_info/time_between_train_steps": 0.006043672561645508, "step": 647} +{"info/global_step": 648, "train_info/time_within_train_step": 24.810677528381348, "step": 648} +{"train_info/time_between_train_steps": 0.005850791931152344, "step": 648} +{"train_info/time_between_train_steps": 16.830501794815063, "step": 648} +{"info/global_step": 649, "train_info/time_within_train_step": 24.752386808395386, "step": 649} +{"train_info/time_between_train_steps": 0.010231256484985352, "step": 649} +{"info/global_step": 650, "train_info/time_within_train_step": 24.881980419158936, "step": 650} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740939495, "_runtime": 17002}, "step": 650} +{"logs": {"train/loss": 3.0338, "train/learning_rate": 0.00030555555555555555, "train/epoch": 18.0, "_timestamp": 1740939495, "_runtime": 17002}, "step": 650} +{"train_info/time_between_train_steps": 0.026485204696655273, "step": 650} +{"info/global_step": 651, "train_info/time_within_train_step": 24.761197328567505, "step": 651} +{"train_info/time_between_train_steps": 0.010125875473022461, "step": 651} +{"info/global_step": 652, "train_info/time_within_train_step": 24.86106538772583, "step": 652} +{"train_info/time_between_train_steps": 0.010151386260986328, "step": 652} +{"info/global_step": 653, "train_info/time_within_train_step": 24.745200395584106, "step": 653} +{"train_info/time_between_train_steps": 0.0053539276123046875, "step": 653} +{"info/global_step": 654, "train_info/time_within_train_step": 24.87554693222046, "step": 654} +{"train_info/time_between_train_steps": 0.005301237106323242, "step": 654} +{"info/global_step": 655, "train_info/time_within_train_step": 24.72724199295044, "step": 655} +{"train_info/time_between_train_steps": 0.0050923824310302734, "step": 655} +{"info/global_step": 656, "train_info/time_within_train_step": 24.916043758392334, "step": 656} +{"train_info/time_between_train_steps": 0.005156755447387695, "step": 656} +{"info/global_step": 657, "train_info/time_within_train_step": 24.787420749664307, "step": 657} +{"train_info/time_between_train_steps": 0.011069297790527344, "step": 657} +{"info/global_step": 658, "train_info/time_within_train_step": 24.86736536026001, "step": 658} +{"train_info/time_between_train_steps": 0.0051975250244140625, "step": 658} +{"info/global_step": 659, "train_info/time_within_train_step": 24.760985851287842, "step": 659} +{"train_info/time_between_train_steps": 0.00536036491394043, "step": 659} +{"info/global_step": 660, "train_info/time_within_train_step": 24.854884386062622, "step": 660} +{"train_info/time_between_train_steps": 0.005281925201416016, "step": 660} +{"info/global_step": 661, "train_info/time_within_train_step": 24.742483377456665, "step": 661} +{"train_info/time_between_train_steps": 0.005522012710571289, "step": 661} +{"info/global_step": 662, "train_info/time_within_train_step": 24.871367692947388, "step": 662} +{"train_info/time_between_train_steps": 0.0051422119140625, "step": 662} +{"info/global_step": 663, "train_info/time_within_train_step": 24.822421550750732, "step": 663} +{"train_info/time_between_train_steps": 0.0052814483642578125, "step": 663} +{"info/global_step": 664, "train_info/time_within_train_step": 24.839616060256958, "step": 664} +{"train_info/time_between_train_steps": 0.00525355339050293, "step": 664} +{"info/global_step": 665, "train_info/time_within_train_step": 24.73707604408264, "step": 665} +{"train_info/time_between_train_steps": 0.01929783821105957, "step": 665} +{"info/global_step": 666, "train_info/time_within_train_step": 24.785510778427124, "step": 666} +{"train_info/time_between_train_steps": 0.0052337646484375, "step": 666} +{"info/global_step": 667, "train_info/time_within_train_step": 24.71696639060974, "step": 667} +{"train_info/time_between_train_steps": 0.004906415939331055, "step": 667} +{"info/global_step": 668, "train_info/time_within_train_step": 24.724844455718994, "step": 668} +{"train_info/time_between_train_steps": 0.004962444305419922, "step": 668} +{"info/global_step": 669, "train_info/time_within_train_step": 24.779653549194336, "step": 669} +{"train_info/time_between_train_steps": 0.004982709884643555, "step": 669} +{"info/global_step": 670, "train_info/time_within_train_step": 24.727424144744873, "step": 670} +{"train_info/time_between_train_steps": 0.0050258636474609375, "step": 670} +{"info/global_step": 671, "train_info/time_within_train_step": 24.782188415527344, "step": 671} +{"train_info/time_between_train_steps": 0.005147695541381836, "step": 671} +{"info/global_step": 672, "train_info/time_within_train_step": 24.719937562942505, "step": 672} +{"train_info/time_between_train_steps": 0.005110979080200195, "step": 672} +{"info/global_step": 673, "train_info/time_within_train_step": 24.7425754070282, "step": 673} +{"train_info/time_between_train_steps": 0.005059003829956055, "step": 673} +{"info/global_step": 674, "train_info/time_within_train_step": 24.813189029693604, "step": 674} +{"train_info/time_between_train_steps": 0.0050275325775146484, "step": 674} +{"info/global_step": 675, "train_info/time_within_train_step": 24.74696660041809, "step": 675} +{"train_info/time_between_train_steps": 0.0051305294036865234, "step": 675} +{"info/global_step": 676, "train_info/time_within_train_step": 24.804067850112915, "step": 676} +{"train_info/time_between_train_steps": 0.005138874053955078, "step": 676} +{"info/global_step": 677, "train_info/time_within_train_step": 24.7394757270813, "step": 677} +{"train_info/time_between_train_steps": 0.005059480667114258, "step": 677} +{"info/global_step": 678, "train_info/time_within_train_step": 24.828415632247925, "step": 678} +{"train_info/time_between_train_steps": 0.00512242317199707, "step": 678} +{"info/global_step": 679, "train_info/time_within_train_step": 24.772902488708496, "step": 679} +{"train_info/time_between_train_steps": 0.005597829818725586, "step": 679} +{"info/global_step": 680, "train_info/time_within_train_step": 24.758158445358276, "step": 680} +{"train_info/time_between_train_steps": 0.00889277458190918, "step": 680} +{"info/global_step": 681, "train_info/time_within_train_step": 24.822669744491577, "step": 681} +{"train_info/time_between_train_steps": 0.0052411556243896484, "step": 681} +{"info/global_step": 682, "train_info/time_within_train_step": 24.73421835899353, "step": 682} +{"train_info/time_between_train_steps": 0.0056188106536865234, "step": 682} +{"info/global_step": 683, "train_info/time_within_train_step": 24.824275493621826, "step": 683} +{"train_info/time_between_train_steps": 0.0056188106536865234, "step": 683} +{"info/global_step": 684, "train_info/time_within_train_step": 24.764208793640137, "step": 684} +{"train_info/time_between_train_steps": 0.005984783172607422, "step": 684} +{"train_info/time_between_train_steps": 17.330841541290283, "step": 684} +{"info/global_step": 685, "train_info/time_within_train_step": 24.757073163986206, "step": 685} +{"train_info/time_between_train_steps": 0.004973173141479492, "step": 685} +{"info/global_step": 686, "train_info/time_within_train_step": 24.86412215232849, "step": 686} +{"train_info/time_between_train_steps": 0.0051462650299072266, "step": 686} +{"info/global_step": 687, "train_info/time_within_train_step": 24.778199911117554, "step": 687} +{"train_info/time_between_train_steps": 0.0052793025970458984, "step": 687} +{"info/global_step": 688, "train_info/time_within_train_step": 24.892030000686646, "step": 688} +{"train_info/time_between_train_steps": 0.0053446292877197266, "step": 688} +{"info/global_step": 689, "train_info/time_within_train_step": 24.73710870742798, "step": 689} +{"train_info/time_between_train_steps": 0.005158662796020508, "step": 689} +{"info/global_step": 690, "train_info/time_within_train_step": 24.911781311035156, "step": 690} +{"train_info/time_between_train_steps": 0.0050623416900634766, "step": 690} +{"info/global_step": 691, "train_info/time_within_train_step": 24.733175039291382, "step": 691} +{"train_info/time_between_train_steps": 0.005236148834228516, "step": 691} +{"info/global_step": 692, "train_info/time_within_train_step": 24.920443534851074, "step": 692} +{"train_info/time_between_train_steps": 0.005216360092163086, "step": 692} +{"info/global_step": 693, "train_info/time_within_train_step": 24.932525873184204, "step": 693} +{"train_info/time_between_train_steps": 0.005182504653930664, "step": 693} +{"info/global_step": 694, "train_info/time_within_train_step": 24.862853050231934, "step": 694} +{"train_info/time_between_train_steps": 0.005082607269287109, "step": 694} +{"info/global_step": 695, "train_info/time_within_train_step": 24.80867338180542, "step": 695} +{"train_info/time_between_train_steps": 0.009854555130004883, "step": 695} +{"info/global_step": 696, "train_info/time_within_train_step": 25.249539852142334, "step": 696} +{"train_info/time_between_train_steps": 0.005833625793457031, "step": 696} +{"info/global_step": 697, "train_info/time_within_train_step": 26.262026071548462, "step": 697} +{"train_info/time_between_train_steps": 0.00555729866027832, "step": 697} +{"info/global_step": 698, "train_info/time_within_train_step": 24.87285280227661, "step": 698} +{"train_info/time_between_train_steps": 0.005216360092163086, "step": 698} +{"info/global_step": 699, "train_info/time_within_train_step": 24.78266978263855, "step": 699} +{"train_info/time_between_train_steps": 0.005055665969848633, "step": 699} +{"info/global_step": 700, "train_info/time_within_train_step": 24.821552515029907, "step": 700} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740940757, "_runtime": 18264}, "step": 700} +{"logs": {"train/loss": 2.9462, "train/learning_rate": 0.0002777777777777778, "train/epoch": 19.01, "_timestamp": 1740940757, "_runtime": 18264}, "step": 700} +{"train_info/time_between_train_steps": 62.247464656829834, "step": 700} +{"info/global_step": 701, "train_info/time_within_train_step": 24.944709062576294, "step": 701} +{"train_info/time_between_train_steps": 0.03172922134399414, "step": 701} +{"info/global_step": 702, "train_info/time_within_train_step": 24.76601004600525, "step": 702} +{"train_info/time_between_train_steps": 0.005090475082397461, "step": 702} +{"info/global_step": 703, "train_info/time_within_train_step": 24.724320650100708, "step": 703} +{"train_info/time_between_train_steps": 0.00493311882019043, "step": 703} +{"info/global_step": 704, "train_info/time_within_train_step": 24.732539653778076, "step": 704} +{"train_info/time_between_train_steps": 0.0049130916595458984, "step": 704} +{"info/global_step": 705, "train_info/time_within_train_step": 24.728544235229492, "step": 705} +{"train_info/time_between_train_steps": 0.004926919937133789, "step": 705} +{"info/global_step": 706, "train_info/time_within_train_step": 24.7242374420166, "step": 706} +{"train_info/time_between_train_steps": 0.004984378814697266, "step": 706} +{"info/global_step": 707, "train_info/time_within_train_step": 24.72723412513733, "step": 707} +{"train_info/time_between_train_steps": 0.0051631927490234375, "step": 707} +{"info/global_step": 708, "train_info/time_within_train_step": 24.736929416656494, "step": 708} +{"train_info/time_between_train_steps": 0.005068778991699219, "step": 708} +{"info/global_step": 709, "train_info/time_within_train_step": 24.814722537994385, "step": 709} +{"train_info/time_between_train_steps": 0.004986286163330078, "step": 709} +{"info/global_step": 710, "train_info/time_within_train_step": 24.727452516555786, "step": 710} +{"train_info/time_between_train_steps": 0.00500798225402832, "step": 710} +{"info/global_step": 711, "train_info/time_within_train_step": 24.727279663085938, "step": 711} +{"train_info/time_between_train_steps": 0.005208253860473633, "step": 711} +{"info/global_step": 712, "train_info/time_within_train_step": 24.72803235054016, "step": 712} +{"train_info/time_between_train_steps": 0.005056858062744141, "step": 712} +{"info/global_step": 713, "train_info/time_within_train_step": 24.728704929351807, "step": 713} +{"train_info/time_between_train_steps": 0.009455680847167969, "step": 713} +{"info/global_step": 714, "train_info/time_within_train_step": 24.730613946914673, "step": 714} +{"train_info/time_between_train_steps": 0.005414485931396484, "step": 714} +{"info/global_step": 715, "train_info/time_within_train_step": 24.738581657409668, "step": 715} +{"train_info/time_between_train_steps": 0.005248546600341797, "step": 715} +{"info/global_step": 716, "train_info/time_within_train_step": 24.7983295917511, "step": 716} +{"train_info/time_between_train_steps": 0.0050890445709228516, "step": 716} +{"info/global_step": 717, "train_info/time_within_train_step": 24.73218870162964, "step": 717} +{"train_info/time_between_train_steps": 0.005239963531494141, "step": 717} +{"info/global_step": 718, "train_info/time_within_train_step": 24.749069452285767, "step": 718} +{"train_info/time_between_train_steps": 0.005472660064697266, "step": 718} +{"info/global_step": 719, "train_info/time_within_train_step": 24.813745975494385, "step": 719} +{"train_info/time_between_train_steps": 0.005506038665771484, "step": 719} +{"info/global_step": 720, "train_info/time_within_train_step": 24.757867574691772, "step": 720} +{"train_info/time_between_train_steps": 0.005931854248046875, "step": 720} +{"train_info/time_between_train_steps": 16.662338972091675, "step": 720} +{"info/global_step": 721, "train_info/time_within_train_step": 24.96833634376526, "step": 721} +{"train_info/time_between_train_steps": 0.014212608337402344, "step": 721} +{"info/global_step": 722, "train_info/time_within_train_step": 25.27589988708496, "step": 722} +{"train_info/time_between_train_steps": 0.005218029022216797, "step": 722} +{"info/global_step": 723, "train_info/time_within_train_step": 24.749303102493286, "step": 723} +{"train_info/time_between_train_steps": 0.005205869674682617, "step": 723} +{"info/global_step": 724, "train_info/time_within_train_step": 24.946330547332764, "step": 724} +{"train_info/time_between_train_steps": 0.005129814147949219, "step": 724} +{"info/global_step": 725, "train_info/time_within_train_step": 24.740745782852173, "step": 725} +{"train_info/time_between_train_steps": 0.005392313003540039, "step": 725} +{"info/global_step": 726, "train_info/time_within_train_step": 24.898520469665527, "step": 726} +{"train_info/time_between_train_steps": 0.00537872314453125, "step": 726} +{"info/global_step": 727, "train_info/time_within_train_step": 24.7390718460083, "step": 727} +{"train_info/time_between_train_steps": 0.0051763057708740234, "step": 727} +{"info/global_step": 728, "train_info/time_within_train_step": 24.852141857147217, "step": 728} +{"train_info/time_between_train_steps": 0.005126476287841797, "step": 728} +{"info/global_step": 729, "train_info/time_within_train_step": 24.754323720932007, "step": 729} +{"train_info/time_between_train_steps": 0.005437612533569336, "step": 729} +{"info/global_step": 730, "train_info/time_within_train_step": 24.95819592475891, "step": 730} +{"train_info/time_between_train_steps": 0.005301713943481445, "step": 730} +{"info/global_step": 731, "train_info/time_within_train_step": 24.73341655731201, "step": 731} +{"train_info/time_between_train_steps": 0.005190134048461914, "step": 731} +{"info/global_step": 732, "train_info/time_within_train_step": 24.847167253494263, "step": 732} +{"train_info/time_between_train_steps": 0.005330085754394531, "step": 732} +{"info/global_step": 733, "train_info/time_within_train_step": 24.737390279769897, "step": 733} +{"train_info/time_between_train_steps": 0.00534510612487793, "step": 733} +{"info/global_step": 734, "train_info/time_within_train_step": 24.900729417800903, "step": 734} +{"train_info/time_between_train_steps": 0.005219221115112305, "step": 734} +{"info/global_step": 735, "train_info/time_within_train_step": 24.7979736328125, "step": 735} +{"train_info/time_between_train_steps": 0.005370378494262695, "step": 735} +{"info/global_step": 736, "train_info/time_within_train_step": 24.839669227600098, "step": 736} +{"train_info/time_between_train_steps": 0.00530552864074707, "step": 736} +{"info/global_step": 737, "train_info/time_within_train_step": 24.740396738052368, "step": 737} +{"train_info/time_between_train_steps": 0.022294998168945312, "step": 737} +{"info/global_step": 738, "train_info/time_within_train_step": 24.72524094581604, "step": 738} +{"train_info/time_between_train_steps": 0.005075693130493164, "step": 738} +{"info/global_step": 739, "train_info/time_within_train_step": 24.744115352630615, "step": 739} +{"train_info/time_between_train_steps": 0.005194425582885742, "step": 739} +{"info/global_step": 740, "train_info/time_within_train_step": 24.815584659576416, "step": 740} +{"train_info/time_between_train_steps": 0.005167961120605469, "step": 740} +{"info/global_step": 741, "train_info/time_within_train_step": 24.74290370941162, "step": 741} +{"train_info/time_between_train_steps": 0.010239124298095703, "step": 741} +{"info/global_step": 742, "train_info/time_within_train_step": 24.76502776145935, "step": 742} +{"train_info/time_between_train_steps": 0.0096588134765625, "step": 742} +{"info/global_step": 743, "train_info/time_within_train_step": 24.730873346328735, "step": 743} +{"train_info/time_between_train_steps": 0.009907007217407227, "step": 743} +{"info/global_step": 744, "train_info/time_within_train_step": 24.746002197265625, "step": 744} +{"train_info/time_between_train_steps": 0.0051767826080322266, "step": 744} +{"info/global_step": 745, "train_info/time_within_train_step": 24.80105686187744, "step": 745} +{"train_info/time_between_train_steps": 0.009756326675415039, "step": 745} +{"info/global_step": 746, "train_info/time_within_train_step": 24.74150586128235, "step": 746} +{"train_info/time_between_train_steps": 0.00510406494140625, "step": 746} +{"info/global_step": 747, "train_info/time_within_train_step": 24.839800357818604, "step": 747} +{"train_info/time_between_train_steps": 0.0052051544189453125, "step": 747} +{"info/global_step": 748, "train_info/time_within_train_step": 24.74815821647644, "step": 748} +{"train_info/time_between_train_steps": 0.009979724884033203, "step": 748} +{"info/global_step": 749, "train_info/time_within_train_step": 24.755234479904175, "step": 749} +{"train_info/time_between_train_steps": 0.005112171173095703, "step": 749} +{"info/global_step": 750, "train_info/time_within_train_step": 24.774919033050537, "step": 750} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740942077, "_runtime": 19584}, "step": 750} +{"logs": {"train/loss": 2.8965, "train/learning_rate": 0.00025, "train/epoch": 20.02, "_timestamp": 1740942077, "_runtime": 19584}, "step": 750} +{"train_info/time_between_train_steps": 0.05366206169128418, "step": 750} +{"info/global_step": 751, "train_info/time_within_train_step": 24.75253176689148, "step": 751} +{"train_info/time_between_train_steps": 0.005207538604736328, "step": 751} +{"info/global_step": 752, "train_info/time_within_train_step": 24.804935932159424, "step": 752} +{"train_info/time_between_train_steps": 0.005065202713012695, "step": 752} +{"info/global_step": 753, "train_info/time_within_train_step": 24.76252818107605, "step": 753} +{"train_info/time_between_train_steps": 0.005376100540161133, "step": 753} +{"info/global_step": 754, "train_info/time_within_train_step": 24.80316138267517, "step": 754} +{"train_info/time_between_train_steps": 0.005289316177368164, "step": 754} +{"info/global_step": 755, "train_info/time_within_train_step": 24.833272695541382, "step": 755} +{"train_info/time_between_train_steps": 0.010566473007202148, "step": 755} +{"info/global_step": 756, "train_info/time_within_train_step": 24.763903617858887, "step": 756} +{"train_info/time_between_train_steps": 0.00594329833984375, "step": 756} +{"train_info/time_between_train_steps": 16.84533429145813, "step": 756} +{"info/global_step": 757, "train_info/time_within_train_step": 24.72954773902893, "step": 757} +{"train_info/time_between_train_steps": 0.005096912384033203, "step": 757} +{"info/global_step": 758, "train_info/time_within_train_step": 24.858292818069458, "step": 758} +{"train_info/time_between_train_steps": 0.00504755973815918, "step": 758} +{"info/global_step": 759, "train_info/time_within_train_step": 24.723742961883545, "step": 759} +{"train_info/time_between_train_steps": 0.005043506622314453, "step": 759} +{"info/global_step": 760, "train_info/time_within_train_step": 24.857040643692017, "step": 760} +{"train_info/time_between_train_steps": 0.010012626647949219, "step": 760} +{"info/global_step": 761, "train_info/time_within_train_step": 24.803312301635742, "step": 761} +{"train_info/time_between_train_steps": 0.009719610214233398, "step": 761} +{"info/global_step": 762, "train_info/time_within_train_step": 24.84162712097168, "step": 762} +{"train_info/time_between_train_steps": 0.009518623352050781, "step": 762} +{"info/global_step": 763, "train_info/time_within_train_step": 24.80108618736267, "step": 763} +{"train_info/time_between_train_steps": 0.009866476058959961, "step": 763} +{"info/global_step": 764, "train_info/time_within_train_step": 24.89450454711914, "step": 764} +{"train_info/time_between_train_steps": 0.0099334716796875, "step": 764} +{"info/global_step": 765, "train_info/time_within_train_step": 24.75266146659851, "step": 765} +{"train_info/time_between_train_steps": 0.010384559631347656, "step": 765} +{"info/global_step": 766, "train_info/time_within_train_step": 24.92173957824707, "step": 766} +{"train_info/time_between_train_steps": 0.009719133377075195, "step": 766} +{"info/global_step": 767, "train_info/time_within_train_step": 24.74669051170349, "step": 767} +{"train_info/time_between_train_steps": 0.00997781753540039, "step": 767} +{"info/global_step": 768, "train_info/time_within_train_step": 24.864579916000366, "step": 768} +{"train_info/time_between_train_steps": 0.0052335262298583984, "step": 768} +{"info/global_step": 769, "train_info/time_within_train_step": 24.735082387924194, "step": 769} +{"train_info/time_between_train_steps": 0.0052378177642822266, "step": 769} +{"info/global_step": 770, "train_info/time_within_train_step": 24.96723699569702, "step": 770} +{"train_info/time_between_train_steps": 0.005150318145751953, "step": 770} +{"info/global_step": 771, "train_info/time_within_train_step": 24.745615243911743, "step": 771} +{"train_info/time_between_train_steps": 0.005135297775268555, "step": 771} +{"info/global_step": 772, "train_info/time_within_train_step": 24.81569480895996, "step": 772} +{"train_info/time_between_train_steps": 0.005258321762084961, "step": 772} +{"info/global_step": 773, "train_info/time_within_train_step": 24.735496759414673, "step": 773} +{"train_info/time_between_train_steps": 0.022410869598388672, "step": 773} +{"info/global_step": 774, "train_info/time_within_train_step": 24.720401525497437, "step": 774} +{"train_info/time_between_train_steps": 0.005063295364379883, "step": 774} +{"info/global_step": 775, "train_info/time_within_train_step": 24.73737072944641, "step": 775} +{"train_info/time_between_train_steps": 0.005139827728271484, "step": 775} +{"info/global_step": 776, "train_info/time_within_train_step": 24.7236750125885, "step": 776} +{"train_info/time_between_train_steps": 0.004972696304321289, "step": 776} +{"info/global_step": 777, "train_info/time_within_train_step": 24.72750449180603, "step": 777} +{"train_info/time_between_train_steps": 0.0049440860748291016, "step": 777} +{"info/global_step": 778, "train_info/time_within_train_step": 24.719197750091553, "step": 778} +{"train_info/time_between_train_steps": 0.005052804946899414, "step": 778} +{"info/global_step": 779, "train_info/time_within_train_step": 24.723901748657227, "step": 779} +{"train_info/time_between_train_steps": 0.005034685134887695, "step": 779} +{"info/global_step": 780, "train_info/time_within_train_step": 24.798575401306152, "step": 780} +{"train_info/time_between_train_steps": 0.0052416324615478516, "step": 780} +{"info/global_step": 781, "train_info/time_within_train_step": 24.72193145751953, "step": 781} +{"train_info/time_between_train_steps": 0.005079507827758789, "step": 781} +{"info/global_step": 782, "train_info/time_within_train_step": 24.724289894104004, "step": 782} +{"train_info/time_between_train_steps": 0.005018711090087891, "step": 782} +{"info/global_step": 783, "train_info/time_within_train_step": 24.782485246658325, "step": 783} +{"train_info/time_between_train_steps": 0.0050508975982666016, "step": 783} +{"info/global_step": 784, "train_info/time_within_train_step": 24.733108282089233, "step": 784} +{"train_info/time_between_train_steps": 0.005074501037597656, "step": 784} +{"info/global_step": 785, "train_info/time_within_train_step": 24.79958200454712, "step": 785} +{"train_info/time_between_train_steps": 0.005945682525634766, "step": 785} +{"info/global_step": 786, "train_info/time_within_train_step": 24.816203355789185, "step": 786} +{"train_info/time_between_train_steps": 0.005032777786254883, "step": 786} +{"info/global_step": 787, "train_info/time_within_train_step": 24.738206386566162, "step": 787} +{"train_info/time_between_train_steps": 0.005069732666015625, "step": 787} +{"info/global_step": 788, "train_info/time_within_train_step": 24.79548454284668, "step": 788} +{"train_info/time_between_train_steps": 0.0049970149993896484, "step": 788} +{"info/global_step": 789, "train_info/time_within_train_step": 24.74788761138916, "step": 789} +{"train_info/time_between_train_steps": 0.005224466323852539, "step": 789} +{"info/global_step": 790, "train_info/time_within_train_step": 24.80906391143799, "step": 790} +{"train_info/time_between_train_steps": 0.010315895080566406, "step": 790} +{"info/global_step": 791, "train_info/time_within_train_step": 24.822168111801147, "step": 791} +{"train_info/time_between_train_steps": 0.005459308624267578, "step": 791} +{"info/global_step": 792, "train_info/time_within_train_step": 24.81213402748108, "step": 792} +{"train_info/time_between_train_steps": 0.0058367252349853516, "step": 792} +{"train_info/time_between_train_steps": 16.890124320983887, "step": 792} +{"info/global_step": 793, "train_info/time_within_train_step": 24.72067165374756, "step": 793} +{"train_info/time_between_train_steps": 0.0048100948333740234, "step": 793} +{"info/global_step": 794, "train_info/time_within_train_step": 24.901129007339478, "step": 794} +{"train_info/time_between_train_steps": 0.004923820495605469, "step": 794} +{"info/global_step": 795, "train_info/time_within_train_step": 25.058910846710205, "step": 795} +{"train_info/time_between_train_steps": 0.005678892135620117, "step": 795} +{"info/global_step": 796, "train_info/time_within_train_step": 25.431345224380493, "step": 796} +{"train_info/time_between_train_steps": 0.005446910858154297, "step": 796} +{"info/global_step": 797, "train_info/time_within_train_step": 24.777690410614014, "step": 797} +{"train_info/time_between_train_steps": 0.005558013916015625, "step": 797} +{"info/global_step": 798, "train_info/time_within_train_step": 24.849391222000122, "step": 798} +{"train_info/time_between_train_steps": 0.010002851486206055, "step": 798} +{"info/global_step": 799, "train_info/time_within_train_step": 24.804245710372925, "step": 799} +{"train_info/time_between_train_steps": 0.005331993103027344, "step": 799} +{"info/global_step": 800, "train_info/time_within_train_step": 24.88936424255371, "step": 800} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740943354, "_runtime": 20861}, "step": 800} +{"logs": {"train/loss": 2.8837, "train/learning_rate": 0.00022222222222222218, "train/epoch": 22.01, "_timestamp": 1740943354, "_runtime": 20861}, "step": 800} +{"train_info/time_between_train_steps": 73.13836598396301, "step": 800} +{"info/global_step": 801, "train_info/time_within_train_step": 25.303372859954834, "step": 801} +{"train_info/time_between_train_steps": 0.005194664001464844, "step": 801} +{"info/global_step": 802, "train_info/time_within_train_step": 24.89709162712097, "step": 802} +{"train_info/time_between_train_steps": 0.005258321762084961, "step": 802} +{"info/global_step": 803, "train_info/time_within_train_step": 24.73775815963745, "step": 803} +{"train_info/time_between_train_steps": 0.005400419235229492, "step": 803} +{"info/global_step": 804, "train_info/time_within_train_step": 24.84834384918213, "step": 804} +{"train_info/time_between_train_steps": 0.005289554595947266, "step": 804} +{"info/global_step": 805, "train_info/time_within_train_step": 24.730615615844727, "step": 805} +{"train_info/time_between_train_steps": 0.005220174789428711, "step": 805} +{"info/global_step": 806, "train_info/time_within_train_step": 24.891767263412476, "step": 806} +{"train_info/time_between_train_steps": 0.005224466323852539, "step": 806} +{"info/global_step": 807, "train_info/time_within_train_step": 24.736618995666504, "step": 807} +{"train_info/time_between_train_steps": 0.005251884460449219, "step": 807} +{"info/global_step": 808, "train_info/time_within_train_step": 24.816319942474365, "step": 808} +{"train_info/time_between_train_steps": 0.005220174789428711, "step": 808} +{"info/global_step": 809, "train_info/time_within_train_step": 24.733312368392944, "step": 809} +{"train_info/time_between_train_steps": 0.025946617126464844, "step": 809} +{"info/global_step": 810, "train_info/time_within_train_step": 24.73227071762085, "step": 810} +{"train_info/time_between_train_steps": 0.009824991226196289, "step": 810} +{"info/global_step": 811, "train_info/time_within_train_step": 24.725953817367554, "step": 811} +{"train_info/time_between_train_steps": 0.0052607059478759766, "step": 811} +{"info/global_step": 812, "train_info/time_within_train_step": 24.7254581451416, "step": 812} +{"train_info/time_between_train_steps": 0.009861230850219727, "step": 812} +{"info/global_step": 813, "train_info/time_within_train_step": 24.723206520080566, "step": 813} +{"train_info/time_between_train_steps": 0.005022287368774414, "step": 813} +{"info/global_step": 814, "train_info/time_within_train_step": 24.724952697753906, "step": 814} +{"train_info/time_between_train_steps": 0.004952430725097656, "step": 814} +{"info/global_step": 815, "train_info/time_within_train_step": 24.753435373306274, "step": 815} +{"train_info/time_between_train_steps": 0.009968042373657227, "step": 815} +{"info/global_step": 816, "train_info/time_within_train_step": 24.726907968521118, "step": 816} +{"train_info/time_between_train_steps": 0.010109901428222656, "step": 816} +{"info/global_step": 817, "train_info/time_within_train_step": 24.814761638641357, "step": 817} +{"train_info/time_between_train_steps": 0.005075931549072266, "step": 817} +{"info/global_step": 818, "train_info/time_within_train_step": 24.73028302192688, "step": 818} +{"train_info/time_between_train_steps": 0.005047798156738281, "step": 818} +{"info/global_step": 819, "train_info/time_within_train_step": 24.74017906188965, "step": 819} +{"train_info/time_between_train_steps": 0.005100727081298828, "step": 819} +{"info/global_step": 820, "train_info/time_within_train_step": 24.814683437347412, "step": 820} +{"train_info/time_between_train_steps": 0.005084514617919922, "step": 820} +{"info/global_step": 821, "train_info/time_within_train_step": 24.740139484405518, "step": 821} +{"train_info/time_between_train_steps": 0.0052340030670166016, "step": 821} +{"info/global_step": 822, "train_info/time_within_train_step": 24.749627113342285, "step": 822} +{"train_info/time_between_train_steps": 0.005127429962158203, "step": 822} +{"info/global_step": 823, "train_info/time_within_train_step": 24.79357099533081, "step": 823} +{"train_info/time_between_train_steps": 0.005035400390625, "step": 823} +{"info/global_step": 824, "train_info/time_within_train_step": 24.747036457061768, "step": 824} +{"train_info/time_between_train_steps": 0.009990453720092773, "step": 824} +{"info/global_step": 825, "train_info/time_within_train_step": 24.8368878364563, "step": 825} +{"train_info/time_between_train_steps": 0.005514383316040039, "step": 825} +{"info/global_step": 826, "train_info/time_within_train_step": 24.734448671340942, "step": 826} +{"train_info/time_between_train_steps": 0.005251169204711914, "step": 826} +{"info/global_step": 827, "train_info/time_within_train_step": 24.798367261886597, "step": 827} +{"train_info/time_between_train_steps": 0.00547480583190918, "step": 827} +{"info/global_step": 828, "train_info/time_within_train_step": 24.75233793258667, "step": 828} +{"train_info/time_between_train_steps": 0.0057315826416015625, "step": 828} +{"train_info/time_between_train_steps": 16.708829641342163, "step": 828} +{"info/global_step": 829, "train_info/time_within_train_step": 25.070322036743164, "step": 829} +{"train_info/time_between_train_steps": 0.004903078079223633, "step": 829} +{"info/global_step": 830, "train_info/time_within_train_step": 24.83016014099121, "step": 830} +{"train_info/time_between_train_steps": 0.0049686431884765625, "step": 830} +{"info/global_step": 831, "train_info/time_within_train_step": 24.73073387145996, "step": 831} +{"train_info/time_between_train_steps": 0.004987001419067383, "step": 831} +{"info/global_step": 832, "train_info/time_within_train_step": 24.921006441116333, "step": 832} +{"train_info/time_between_train_steps": 0.004986763000488281, "step": 832} +{"info/global_step": 833, "train_info/time_within_train_step": 24.722442865371704, "step": 833} +{"train_info/time_between_train_steps": 0.005274534225463867, "step": 833} +{"info/global_step": 834, "train_info/time_within_train_step": 24.90383744239807, "step": 834} +{"train_info/time_between_train_steps": 0.016105175018310547, "step": 834} +{"info/global_step": 835, "train_info/time_within_train_step": 24.73271679878235, "step": 835} +{"train_info/time_between_train_steps": 0.010064840316772461, "step": 835} +{"info/global_step": 836, "train_info/time_within_train_step": 24.991507530212402, "step": 836} +{"train_info/time_between_train_steps": 0.005291461944580078, "step": 836} +{"info/global_step": 837, "train_info/time_within_train_step": 24.774540424346924, "step": 837} +{"train_info/time_between_train_steps": 0.010590791702270508, "step": 837} +{"info/global_step": 838, "train_info/time_within_train_step": 24.875624656677246, "step": 838} +{"train_info/time_between_train_steps": 0.005100250244140625, "step": 838} +{"info/global_step": 839, "train_info/time_within_train_step": 24.746325969696045, "step": 839} +{"train_info/time_between_train_steps": 0.005314826965332031, "step": 839} +{"info/global_step": 840, "train_info/time_within_train_step": 24.899530172348022, "step": 840} +{"train_info/time_between_train_steps": 0.0053730010986328125, "step": 840} +{"info/global_step": 841, "train_info/time_within_train_step": 24.776708364486694, "step": 841} +{"train_info/time_between_train_steps": 0.005365848541259766, "step": 841} +{"info/global_step": 842, "train_info/time_within_train_step": 24.844507694244385, "step": 842} +{"train_info/time_between_train_steps": 0.005135536193847656, "step": 842} +{"info/global_step": 843, "train_info/time_within_train_step": 24.74246072769165, "step": 843} +{"train_info/time_between_train_steps": 0.005413532257080078, "step": 843} +{"info/global_step": 844, "train_info/time_within_train_step": 24.862993478775024, "step": 844} +{"train_info/time_between_train_steps": 0.005304574966430664, "step": 844} +{"info/global_step": 845, "train_info/time_within_train_step": 24.74372959136963, "step": 845} +{"train_info/time_between_train_steps": 0.018962621688842773, "step": 845} +{"info/global_step": 846, "train_info/time_within_train_step": 24.75003147125244, "step": 846} +{"train_info/time_between_train_steps": 0.00482487678527832, "step": 846} +{"info/global_step": 847, "train_info/time_within_train_step": 24.84656071662903, "step": 847} +{"train_info/time_between_train_steps": 0.0049898624420166016, "step": 847} +{"info/global_step": 848, "train_info/time_within_train_step": 24.719672441482544, "step": 848} +{"train_info/time_between_train_steps": 0.009910106658935547, "step": 848} +{"info/global_step": 849, "train_info/time_within_train_step": 24.75291109085083, "step": 849} +{"train_info/time_between_train_steps": 0.005022764205932617, "step": 849} +{"info/global_step": 850, "train_info/time_within_train_step": 24.735848665237427, "step": 850} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740944686, "_runtime": 22193}, "step": 850} +{"logs": {"train/loss": 2.8106, "train/learning_rate": 0.00019444444444444443, "train/epoch": 23.02, "_timestamp": 1740944686, "_runtime": 22193}, "step": 850} +{"train_info/time_between_train_steps": 0.048512935638427734, "step": 850} +{"info/global_step": 851, "train_info/time_within_train_step": 24.75815224647522, "step": 851} +{"train_info/time_between_train_steps": 0.005202531814575195, "step": 851} +{"info/global_step": 852, "train_info/time_within_train_step": 24.732919454574585, "step": 852} +{"train_info/time_between_train_steps": 0.005158662796020508, "step": 852} +{"info/global_step": 853, "train_info/time_within_train_step": 24.799421548843384, "step": 853} +{"train_info/time_between_train_steps": 0.005128383636474609, "step": 853} +{"info/global_step": 854, "train_info/time_within_train_step": 24.72573494911194, "step": 854} +{"train_info/time_between_train_steps": 0.00991678237915039, "step": 854} +{"info/global_step": 855, "train_info/time_within_train_step": 24.733285903930664, "step": 855} +{"train_info/time_between_train_steps": 0.005162954330444336, "step": 855} +{"info/global_step": 856, "train_info/time_within_train_step": 24.78001070022583, "step": 856} +{"train_info/time_between_train_steps": 0.005081653594970703, "step": 856} +{"info/global_step": 857, "train_info/time_within_train_step": 24.73698329925537, "step": 857} +{"train_info/time_between_train_steps": 0.005100727081298828, "step": 857} +{"info/global_step": 858, "train_info/time_within_train_step": 24.80953335762024, "step": 858} +{"train_info/time_between_train_steps": 0.00506138801574707, "step": 858} +{"info/global_step": 859, "train_info/time_within_train_step": 24.728479146957397, "step": 859} +{"train_info/time_between_train_steps": 0.005169391632080078, "step": 859} +{"info/global_step": 860, "train_info/time_within_train_step": 24.729862213134766, "step": 860} +{"train_info/time_between_train_steps": 0.0049970149993896484, "step": 860} +{"info/global_step": 861, "train_info/time_within_train_step": 24.79707980155945, "step": 861} +{"train_info/time_between_train_steps": 0.0050618648529052734, "step": 861} +{"info/global_step": 862, "train_info/time_within_train_step": 24.731895208358765, "step": 862} +{"train_info/time_between_train_steps": 0.0052602291107177734, "step": 862} +{"info/global_step": 863, "train_info/time_within_train_step": 24.90120506286621, "step": 863} +{"train_info/time_between_train_steps": 0.00581669807434082, "step": 863} +{"info/global_step": 864, "train_info/time_within_train_step": 24.754596710205078, "step": 864} +{"train_info/time_between_train_steps": 0.0056552886962890625, "step": 864} +{"train_info/time_between_train_steps": 16.58609628677368, "step": 864} +{"info/global_step": 865, "train_info/time_within_train_step": 24.7710919380188, "step": 865} +{"train_info/time_between_train_steps": 0.005181074142456055, "step": 865} +{"info/global_step": 866, "train_info/time_within_train_step": 24.896857023239136, "step": 866} +{"train_info/time_between_train_steps": 0.005182743072509766, "step": 866} +{"info/global_step": 867, "train_info/time_within_train_step": 24.801791429519653, "step": 867} +{"train_info/time_between_train_steps": 0.005191802978515625, "step": 867} +{"info/global_step": 868, "train_info/time_within_train_step": 24.844171285629272, "step": 868} +{"train_info/time_between_train_steps": 0.005190849304199219, "step": 868} +{"info/global_step": 869, "train_info/time_within_train_step": 24.72906517982483, "step": 869} +{"train_info/time_between_train_steps": 0.005251407623291016, "step": 869} +{"info/global_step": 870, "train_info/time_within_train_step": 24.9337637424469, "step": 870} +{"train_info/time_between_train_steps": 0.005474567413330078, "step": 870} +{"info/global_step": 871, "train_info/time_within_train_step": 24.76639723777771, "step": 871} +{"train_info/time_between_train_steps": 0.005413532257080078, "step": 871} +{"info/global_step": 872, "train_info/time_within_train_step": 24.897751331329346, "step": 872} +{"train_info/time_between_train_steps": 0.005129575729370117, "step": 872} +{"info/global_step": 873, "train_info/time_within_train_step": 24.738792896270752, "step": 873} +{"train_info/time_between_train_steps": 0.0051496028900146484, "step": 873} +{"info/global_step": 874, "train_info/time_within_train_step": 24.886149644851685, "step": 874} +{"train_info/time_between_train_steps": 0.005253791809082031, "step": 874} +{"info/global_step": 875, "train_info/time_within_train_step": 24.741423845291138, "step": 875} +{"train_info/time_between_train_steps": 0.00543975830078125, "step": 875} +{"info/global_step": 876, "train_info/time_within_train_step": 24.85316562652588, "step": 876} +{"train_info/time_between_train_steps": 0.00525355339050293, "step": 876} +{"info/global_step": 877, "train_info/time_within_train_step": 24.736005544662476, "step": 877} +{"train_info/time_between_train_steps": 0.005156278610229492, "step": 877} +{"info/global_step": 878, "train_info/time_within_train_step": 24.94442343711853, "step": 878} +{"train_info/time_between_train_steps": 0.0050237178802490234, "step": 878} +{"info/global_step": 879, "train_info/time_within_train_step": 24.7369441986084, "step": 879} +{"train_info/time_between_train_steps": 0.005295276641845703, "step": 879} +{"info/global_step": 880, "train_info/time_within_train_step": 24.859166383743286, "step": 880} +{"train_info/time_between_train_steps": 0.010424613952636719, "step": 880} +{"info/global_step": 881, "train_info/time_within_train_step": 24.7343327999115, "step": 881} +{"train_info/time_between_train_steps": 0.02445363998413086, "step": 881} +{"info/global_step": 882, "train_info/time_within_train_step": 24.73401379585266, "step": 882} +{"train_info/time_between_train_steps": 0.009469270706176758, "step": 882} +{"info/global_step": 883, "train_info/time_within_train_step": 24.733276844024658, "step": 883} +{"train_info/time_between_train_steps": 0.009600400924682617, "step": 883} +{"info/global_step": 884, "train_info/time_within_train_step": 24.793264150619507, "step": 884} +{"train_info/time_between_train_steps": 0.005036115646362305, "step": 884} +{"info/global_step": 885, "train_info/time_within_train_step": 24.747137784957886, "step": 885} +{"train_info/time_between_train_steps": 0.010047197341918945, "step": 885} +{"info/global_step": 886, "train_info/time_within_train_step": 24.729435682296753, "step": 886} +{"train_info/time_between_train_steps": 0.009541034698486328, "step": 886} +{"info/global_step": 887, "train_info/time_within_train_step": 24.77012801170349, "step": 887} +{"train_info/time_between_train_steps": 0.009791851043701172, "step": 887} +{"info/global_step": 888, "train_info/time_within_train_step": 24.740750074386597, "step": 888} +{"train_info/time_between_train_steps": 0.01003885269165039, "step": 888} +{"info/global_step": 889, "train_info/time_within_train_step": 24.79337501525879, "step": 889} +{"train_info/time_between_train_steps": 0.01004934310913086, "step": 889} +{"info/global_step": 890, "train_info/time_within_train_step": 24.73801898956299, "step": 890} +{"train_info/time_between_train_steps": 0.005150556564331055, "step": 890} +{"info/global_step": 891, "train_info/time_within_train_step": 24.766539573669434, "step": 891} +{"train_info/time_between_train_steps": 0.0051877498626708984, "step": 891} +{"info/global_step": 892, "train_info/time_within_train_step": 24.734975337982178, "step": 892} +{"train_info/time_between_train_steps": 0.009144306182861328, "step": 892} +{"info/global_step": 893, "train_info/time_within_train_step": 24.74428415298462, "step": 893} +{"train_info/time_between_train_steps": 0.0050258636474609375, "step": 893} +{"info/global_step": 894, "train_info/time_within_train_step": 26.234050989151, "step": 894} +{"train_info/time_between_train_steps": 0.00528407096862793, "step": 894} +{"info/global_step": 895, "train_info/time_within_train_step": 25.02953314781189, "step": 895} +{"train_info/time_between_train_steps": 0.005191802978515625, "step": 895} +{"info/global_step": 896, "train_info/time_within_train_step": 24.76716113090515, "step": 896} +{"train_info/time_between_train_steps": 0.004972219467163086, "step": 896} +{"info/global_step": 897, "train_info/time_within_train_step": 24.725308895111084, "step": 897} +{"train_info/time_between_train_steps": 0.005145072937011719, "step": 897} +{"info/global_step": 898, "train_info/time_within_train_step": 24.749016761779785, "step": 898} +{"train_info/time_between_train_steps": 0.005892753601074219, "step": 898} +{"info/global_step": 899, "train_info/time_within_train_step": 24.77386784553528, "step": 899} +{"train_info/time_between_train_steps": 0.010350704193115234, "step": 899} +{"info/global_step": 900, "train_info/time_within_train_step": 24.770761489868164, "step": 900} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740945945, "_runtime": 23452}, "step": 900} +{"logs": {"train/loss": 2.7726, "train/learning_rate": 0.00016666666666666666, "train/epoch": 24.03, "_timestamp": 1740945945, "_runtime": 23452}, "step": 900} +{"train_info/time_between_train_steps": 69.04333400726318, "step": 900} +{"train_info/time_between_train_steps": 87.29278612136841, "step": 900} +{"info/global_step": 901, "train_info/time_within_train_step": 24.738870859146118, "step": 901} +{"train_info/time_between_train_steps": 0.009840726852416992, "step": 901} +{"info/global_step": 902, "train_info/time_within_train_step": 24.837350845336914, "step": 902} +{"train_info/time_between_train_steps": 0.00489354133605957, "step": 902} +{"info/global_step": 903, "train_info/time_within_train_step": 24.720728397369385, "step": 903} +{"train_info/time_between_train_steps": 0.004897117614746094, "step": 903} +{"info/global_step": 904, "train_info/time_within_train_step": 24.896418571472168, "step": 904} +{"train_info/time_between_train_steps": 0.005212068557739258, "step": 904} +{"info/global_step": 905, "train_info/time_within_train_step": 24.846043586730957, "step": 905} +{"train_info/time_between_train_steps": 0.0053234100341796875, "step": 905} +{"info/global_step": 906, "train_info/time_within_train_step": 24.839415311813354, "step": 906} +{"train_info/time_between_train_steps": 0.005202531814575195, "step": 906} +{"info/global_step": 907, "train_info/time_within_train_step": 24.802236795425415, "step": 907} +{"train_info/time_between_train_steps": 0.005106449127197266, "step": 907} +{"info/global_step": 908, "train_info/time_within_train_step": 24.84829592704773, "step": 908} +{"train_info/time_between_train_steps": 0.0053327083587646484, "step": 908} +{"info/global_step": 909, "train_info/time_within_train_step": 24.822173357009888, "step": 909} +{"train_info/time_between_train_steps": 0.010411262512207031, "step": 909} +{"info/global_step": 910, "train_info/time_within_train_step": 24.867243766784668, "step": 910} +{"train_info/time_between_train_steps": 0.005149126052856445, "step": 910} +{"info/global_step": 911, "train_info/time_within_train_step": 24.72804880142212, "step": 911} +{"train_info/time_between_train_steps": 0.005239009857177734, "step": 911} +{"info/global_step": 912, "train_info/time_within_train_step": 24.847641706466675, "step": 912} +{"train_info/time_between_train_steps": 0.0052471160888671875, "step": 912} +{"info/global_step": 913, "train_info/time_within_train_step": 24.731788158416748, "step": 913} +{"train_info/time_between_train_steps": 0.0053179264068603516, "step": 913} +{"info/global_step": 914, "train_info/time_within_train_step": 25.055099964141846, "step": 914} +{"train_info/time_between_train_steps": 0.00513005256652832, "step": 914} +{"info/global_step": 915, "train_info/time_within_train_step": 24.73395538330078, "step": 915} +{"train_info/time_between_train_steps": 0.0052411556243896484, "step": 915} +{"info/global_step": 916, "train_info/time_within_train_step": 24.818752765655518, "step": 916} +{"train_info/time_between_train_steps": 0.005338907241821289, "step": 916} +{"info/global_step": 917, "train_info/time_within_train_step": 24.795511484146118, "step": 917} +{"train_info/time_between_train_steps": 0.034506797790527344, "step": 917} +{"info/global_step": 918, "train_info/time_within_train_step": 24.72104287147522, "step": 918} +{"train_info/time_between_train_steps": 0.005178689956665039, "step": 918} +{"info/global_step": 919, "train_info/time_within_train_step": 24.838518857955933, "step": 919} +{"train_info/time_between_train_steps": 0.005065202713012695, "step": 919} +{"info/global_step": 920, "train_info/time_within_train_step": 24.715959072113037, "step": 920} +{"train_info/time_between_train_steps": 0.009808778762817383, "step": 920} +{"info/global_step": 921, "train_info/time_within_train_step": 24.72584342956543, "step": 921} +{"train_info/time_between_train_steps": 0.009991645812988281, "step": 921} +{"info/global_step": 922, "train_info/time_within_train_step": 24.781486749649048, "step": 922} +{"train_info/time_between_train_steps": 0.0049190521240234375, "step": 922} +{"info/global_step": 923, "train_info/time_within_train_step": 24.742987394332886, "step": 923} +{"train_info/time_between_train_steps": 0.005079507827758789, "step": 923} +{"info/global_step": 924, "train_info/time_within_train_step": 24.890914916992188, "step": 924} +{"train_info/time_between_train_steps": 0.005057811737060547, "step": 924} +{"info/global_step": 925, "train_info/time_within_train_step": 24.737207412719727, "step": 925} +{"train_info/time_between_train_steps": 0.005135536193847656, "step": 925} +{"info/global_step": 926, "train_info/time_within_train_step": 24.725714445114136, "step": 926} +{"train_info/time_between_train_steps": 0.005079030990600586, "step": 926} +{"info/global_step": 927, "train_info/time_within_train_step": 24.724748373031616, "step": 927} +{"train_info/time_between_train_steps": 0.00717473030090332, "step": 927} +{"info/global_step": 928, "train_info/time_within_train_step": 24.74877119064331, "step": 928} +{"train_info/time_between_train_steps": 0.00502467155456543, "step": 928} +{"info/global_step": 929, "train_info/time_within_train_step": 24.800381422042847, "step": 929} +{"train_info/time_between_train_steps": 0.005184650421142578, "step": 929} +{"info/global_step": 930, "train_info/time_within_train_step": 24.739046335220337, "step": 930} +{"train_info/time_between_train_steps": 0.005092620849609375, "step": 930} +{"info/global_step": 931, "train_info/time_within_train_step": 24.763466119766235, "step": 931} +{"train_info/time_between_train_steps": 0.009841203689575195, "step": 931} +{"info/global_step": 932, "train_info/time_within_train_step": 24.722625970840454, "step": 932} +{"train_info/time_between_train_steps": 0.005097627639770508, "step": 932} +{"info/global_step": 933, "train_info/time_within_train_step": 24.75141429901123, "step": 933} +{"train_info/time_between_train_steps": 0.006185054779052734, "step": 933} +{"info/global_step": 934, "train_info/time_within_train_step": 24.80317258834839, "step": 934} +{"train_info/time_between_train_steps": 0.0054607391357421875, "step": 934} +{"info/global_step": 935, "train_info/time_within_train_step": 24.747299671173096, "step": 935} +{"train_info/time_between_train_steps": 0.005408048629760742, "step": 935} +{"info/global_step": 936, "train_info/time_within_train_step": 24.827457189559937, "step": 936} +{"train_info/time_between_train_steps": 0.005592823028564453, "step": 936} +{"train_info/time_between_train_steps": 17.005953073501587, "step": 936} +{"info/global_step": 937, "train_info/time_within_train_step": 24.733644008636475, "step": 937} +{"train_info/time_between_train_steps": 0.004965543746948242, "step": 937} +{"info/global_step": 938, "train_info/time_within_train_step": 24.889525413513184, "step": 938} +{"train_info/time_between_train_steps": 0.004881381988525391, "step": 938} +{"info/global_step": 939, "train_info/time_within_train_step": 24.803688049316406, "step": 939} +{"train_info/time_between_train_steps": 0.007166862487792969, "step": 939} +{"info/global_step": 940, "train_info/time_within_train_step": 24.861055850982666, "step": 940} +{"train_info/time_between_train_steps": 0.006934404373168945, "step": 940} +{"info/global_step": 941, "train_info/time_within_train_step": 24.73171615600586, "step": 941} +{"train_info/time_between_train_steps": 0.0051767826080322266, "step": 941} +{"info/global_step": 942, "train_info/time_within_train_step": 24.86107325553894, "step": 942} +{"train_info/time_between_train_steps": 0.005182743072509766, "step": 942} +{"info/global_step": 943, "train_info/time_within_train_step": 24.74136447906494, "step": 943} +{"train_info/time_between_train_steps": 0.0052073001861572266, "step": 943} +{"info/global_step": 944, "train_info/time_within_train_step": 24.88098120689392, "step": 944} +{"train_info/time_between_train_steps": 0.005192756652832031, "step": 944} +{"info/global_step": 945, "train_info/time_within_train_step": 24.771416902542114, "step": 945} +{"train_info/time_between_train_steps": 0.007611751556396484, "step": 945} +{"info/global_step": 946, "train_info/time_within_train_step": 24.931239366531372, "step": 946} +{"train_info/time_between_train_steps": 0.00698089599609375, "step": 946} +{"info/global_step": 947, "train_info/time_within_train_step": 24.78632950782776, "step": 947} +{"train_info/time_between_train_steps": 0.0060617923736572266, "step": 947} +{"info/global_step": 948, "train_info/time_within_train_step": 24.947481870651245, "step": 948} +{"train_info/time_between_train_steps": 0.005712747573852539, "step": 948} +{"info/global_step": 949, "train_info/time_within_train_step": 24.762252807617188, "step": 949} +{"train_info/time_between_train_steps": 0.005703926086425781, "step": 949} +{"info/global_step": 950, "train_info/time_within_train_step": 24.9210524559021, "step": 950} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740947291, "_runtime": 24798}, "step": 950} +{"logs": {"train/loss": 2.7668, "train/learning_rate": 0.0001388888888888889, "train/epoch": 26.01, "_timestamp": 1740947291, "_runtime": 24798}, "step": 950} +{"train_info/time_between_train_steps": 0.026584386825561523, "step": 950} +{"info/global_step": 951, "train_info/time_within_train_step": 25.027016639709473, "step": 951} +{"train_info/time_between_train_steps": 0.005583763122558594, "step": 951} +{"info/global_step": 952, "train_info/time_within_train_step": 24.900261402130127, "step": 952} +{"train_info/time_between_train_steps": 0.005551815032958984, "step": 952} +{"info/global_step": 953, "train_info/time_within_train_step": 24.79064965248108, "step": 953} +{"train_info/time_between_train_steps": 0.03637242317199707, "step": 953} +{"info/global_step": 954, "train_info/time_within_train_step": 24.732772827148438, "step": 954} +{"train_info/time_between_train_steps": 0.0052149295806884766, "step": 954} +{"info/global_step": 955, "train_info/time_within_train_step": 24.884397506713867, "step": 955} +{"train_info/time_between_train_steps": 0.005143404006958008, "step": 955} +{"info/global_step": 956, "train_info/time_within_train_step": 24.722623586654663, "step": 956} +{"train_info/time_between_train_steps": 0.005228281021118164, "step": 956} +{"info/global_step": 957, "train_info/time_within_train_step": 24.751250982284546, "step": 957} +{"train_info/time_between_train_steps": 0.0050623416900634766, "step": 957} +{"info/global_step": 958, "train_info/time_within_train_step": 24.730473279953003, "step": 958} +{"train_info/time_between_train_steps": 0.005433082580566406, "step": 958} +{"info/global_step": 959, "train_info/time_within_train_step": 24.726187467575073, "step": 959} +{"train_info/time_between_train_steps": 0.005150318145751953, "step": 959} +{"info/global_step": 960, "train_info/time_within_train_step": 24.777963161468506, "step": 960} +{"train_info/time_between_train_steps": 0.0073773860931396484, "step": 960} +{"info/global_step": 961, "train_info/time_within_train_step": 24.774635314941406, "step": 961} +{"train_info/time_between_train_steps": 0.005567789077758789, "step": 961} +{"info/global_step": 962, "train_info/time_within_train_step": 24.78640365600586, "step": 962} +{"train_info/time_between_train_steps": 0.005171298980712891, "step": 962} +{"info/global_step": 963, "train_info/time_within_train_step": 24.74200749397278, "step": 963} +{"train_info/time_between_train_steps": 0.0053250789642333984, "step": 963} +{"info/global_step": 964, "train_info/time_within_train_step": 24.7366623878479, "step": 964} +{"train_info/time_between_train_steps": 0.005658626556396484, "step": 964} +{"info/global_step": 965, "train_info/time_within_train_step": 24.81716299057007, "step": 965} +{"train_info/time_between_train_steps": 0.00516510009765625, "step": 965} +{"info/global_step": 966, "train_info/time_within_train_step": 24.730947017669678, "step": 966} +{"train_info/time_between_train_steps": 0.005086421966552734, "step": 966} +{"info/global_step": 967, "train_info/time_within_train_step": 24.792820692062378, "step": 967} +{"train_info/time_between_train_steps": 0.005148887634277344, "step": 967} +{"info/global_step": 968, "train_info/time_within_train_step": 24.742976665496826, "step": 968} +{"train_info/time_between_train_steps": 0.0051708221435546875, "step": 968} +{"info/global_step": 969, "train_info/time_within_train_step": 24.77263879776001, "step": 969} +{"train_info/time_between_train_steps": 0.005247831344604492, "step": 969} +{"info/global_step": 970, "train_info/time_within_train_step": 24.767253398895264, "step": 970} +{"train_info/time_between_train_steps": 0.0053310394287109375, "step": 970} +{"info/global_step": 971, "train_info/time_within_train_step": 24.842509269714355, "step": 971} +{"train_info/time_between_train_steps": 0.005804300308227539, "step": 971} +{"info/global_step": 972, "train_info/time_within_train_step": 24.78480625152588, "step": 972} +{"train_info/time_between_train_steps": 0.00684356689453125, "step": 972} +{"train_info/time_between_train_steps": 16.658419370651245, "step": 972} +{"info/global_step": 973, "train_info/time_within_train_step": 24.736592769622803, "step": 973} +{"train_info/time_between_train_steps": 0.004993915557861328, "step": 973} +{"info/global_step": 974, "train_info/time_within_train_step": 24.936976432800293, "step": 974} +{"train_info/time_between_train_steps": 0.005048990249633789, "step": 974} +{"info/global_step": 975, "train_info/time_within_train_step": 24.74755835533142, "step": 975} +{"train_info/time_between_train_steps": 0.005330085754394531, "step": 975} +{"info/global_step": 976, "train_info/time_within_train_step": 24.951093196868896, "step": 976} +{"train_info/time_between_train_steps": 0.005285501480102539, "step": 976} +{"info/global_step": 977, "train_info/time_within_train_step": 24.74364948272705, "step": 977} +{"train_info/time_between_train_steps": 0.005165576934814453, "step": 977} +{"info/global_step": 978, "train_info/time_within_train_step": 24.884814977645874, "step": 978} +{"train_info/time_between_train_steps": 0.005117654800415039, "step": 978} +{"info/global_step": 979, "train_info/time_within_train_step": 24.7508647441864, "step": 979} +{"train_info/time_between_train_steps": 0.00544428825378418, "step": 979} +{"info/global_step": 980, "train_info/time_within_train_step": 24.903379917144775, "step": 980} +{"train_info/time_between_train_steps": 0.005352973937988281, "step": 980} +{"info/global_step": 981, "train_info/time_within_train_step": 24.752954244613647, "step": 981} +{"train_info/time_between_train_steps": 0.005171298980712891, "step": 981} +{"info/global_step": 982, "train_info/time_within_train_step": 24.85251522064209, "step": 982} +{"train_info/time_between_train_steps": 0.007578611373901367, "step": 982} +{"info/global_step": 983, "train_info/time_within_train_step": 24.807466745376587, "step": 983} +{"train_info/time_between_train_steps": 0.007264614105224609, "step": 983} +{"info/global_step": 984, "train_info/time_within_train_step": 24.900026082992554, "step": 984} +{"train_info/time_between_train_steps": 0.0077021121978759766, "step": 984} +{"info/global_step": 985, "train_info/time_within_train_step": 24.763143062591553, "step": 985} +{"train_info/time_between_train_steps": 0.0071201324462890625, "step": 985} +{"info/global_step": 986, "train_info/time_within_train_step": 24.942052364349365, "step": 986} +{"train_info/time_between_train_steps": 0.0066509246826171875, "step": 986} +{"info/global_step": 987, "train_info/time_within_train_step": 24.73986506462097, "step": 987} +{"train_info/time_between_train_steps": 0.0051724910736083984, "step": 987} +{"info/global_step": 988, "train_info/time_within_train_step": 24.86194086074829, "step": 988} +{"train_info/time_between_train_steps": 0.0055005550384521484, "step": 988} +{"info/global_step": 989, "train_info/time_within_train_step": 24.753242254257202, "step": 989} +{"train_info/time_between_train_steps": 0.023909807205200195, "step": 989} +{"info/global_step": 990, "train_info/time_within_train_step": 24.720273733139038, "step": 990} +{"train_info/time_between_train_steps": 0.0051116943359375, "step": 990} +{"info/global_step": 991, "train_info/time_within_train_step": 24.720234155654907, "step": 991} +{"train_info/time_between_train_steps": 0.004969596862792969, "step": 991} +{"info/global_step": 992, "train_info/time_within_train_step": 24.729439973831177, "step": 992} +{"train_info/time_between_train_steps": 0.005132198333740234, "step": 992} +{"info/global_step": 993, "train_info/time_within_train_step": 27.397562980651855, "step": 993} +{"train_info/time_between_train_steps": 0.005257844924926758, "step": 993} +{"info/global_step": 994, "train_info/time_within_train_step": 25.357579469680786, "step": 994} +{"train_info/time_between_train_steps": 0.005422830581665039, "step": 994} +{"info/global_step": 995, "train_info/time_within_train_step": 24.81083083152771, "step": 995} +{"train_info/time_between_train_steps": 0.005152702331542969, "step": 995} +{"info/global_step": 996, "train_info/time_within_train_step": 24.739696741104126, "step": 996} +{"train_info/time_between_train_steps": 0.005374431610107422, "step": 996} +{"info/global_step": 997, "train_info/time_within_train_step": 24.729434728622437, "step": 997} +{"train_info/time_between_train_steps": 0.005099296569824219, "step": 997} +{"info/global_step": 998, "train_info/time_within_train_step": 24.726064205169678, "step": 998} +{"train_info/time_between_train_steps": 0.005129337310791016, "step": 998} +{"info/global_step": 999, "train_info/time_within_train_step": 24.744513988494873, "step": 999} +{"train_info/time_between_train_steps": 0.005158424377441406, "step": 999} +{"info/global_step": 1000, "train_info/time_within_train_step": 24.726770639419556, "step": 1000} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740948554, "_runtime": 26061}, "step": 1000} +{"logs": {"train/loss": 2.7047, "train/learning_rate": 0.00011111111111111109, "train/epoch": 27.02, "_timestamp": 1740948554, "_runtime": 26061}, "step": 1000} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 21960.0, "train_info/memory_max_reserved": 21960.0, "_timestamp": 1740948557, "_runtime": 26064}, "step": 1000} +{"logs": {"eval/loss": 3.4166903495788574, "eval/runtime": 2.4384, "eval/samples_per_second": 54.135, "eval/steps_per_second": 3.691, "train/epoch": 27.02, "_timestamp": 1740948557, "_runtime": 26064}, "step": 1000} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 21960.0, "train_info/memory_max_reserved": 21960.0, "_timestamp": 1740948557, "_runtime": 26064}, "step": 1000} +{"logs": {"eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_loss": 3.4166903495788574, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_ppl": 30.468408184611675, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_runtime": 2.4384, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 54.135, "train/epoch": 27.02, "_timestamp": 1740948557, "_runtime": 26064}, "step": 1000} +{"train_info/time_between_train_steps": 101.14780879020691, "step": 1000} +{"info/global_step": 1001, "train_info/time_within_train_step": 27.255329847335815, "step": 1001} +{"train_info/time_between_train_steps": 0.004730939865112305, "step": 1001} +{"info/global_step": 1002, "train_info/time_within_train_step": 24.706881284713745, "step": 1002} +{"train_info/time_between_train_steps": 0.004758358001708984, "step": 1002} +{"info/global_step": 1003, "train_info/time_within_train_step": 24.82241439819336, "step": 1003} +{"train_info/time_between_train_steps": 0.013077259063720703, "step": 1003} +{"info/global_step": 1004, "train_info/time_within_train_step": 24.73152804374695, "step": 1004} +{"train_info/time_between_train_steps": 0.005003690719604492, "step": 1004} +{"info/global_step": 1005, "train_info/time_within_train_step": 24.740082263946533, "step": 1005} +{"train_info/time_between_train_steps": 0.0052683353424072266, "step": 1005} +{"info/global_step": 1006, "train_info/time_within_train_step": 24.79824447631836, "step": 1006} +{"train_info/time_between_train_steps": 0.0056383609771728516, "step": 1006} +{"info/global_step": 1007, "train_info/time_within_train_step": 24.740225315093994, "step": 1007} +{"train_info/time_between_train_steps": 0.005405902862548828, "step": 1007} +{"info/global_step": 1008, "train_info/time_within_train_step": 24.811511039733887, "step": 1008} +{"train_info/time_between_train_steps": 0.008718252182006836, "step": 1008} +{"train_info/time_between_train_steps": 16.721538543701172, "step": 1008} +{"info/global_step": 1009, "train_info/time_within_train_step": 24.742095232009888, "step": 1009} +{"train_info/time_between_train_steps": 0.005286216735839844, "step": 1009} +{"info/global_step": 1010, "train_info/time_within_train_step": 24.94376802444458, "step": 1010} +{"train_info/time_between_train_steps": 0.00522923469543457, "step": 1010} +{"info/global_step": 1011, "train_info/time_within_train_step": 24.725017070770264, "step": 1011} +{"train_info/time_between_train_steps": 0.005194425582885742, "step": 1011} +{"info/global_step": 1012, "train_info/time_within_train_step": 24.94355082511902, "step": 1012} +{"train_info/time_between_train_steps": 0.0060732364654541016, "step": 1012} +{"info/global_step": 1013, "train_info/time_within_train_step": 24.76581382751465, "step": 1013} +{"train_info/time_between_train_steps": 0.005650758743286133, "step": 1013} +{"info/global_step": 1014, "train_info/time_within_train_step": 24.94909644126892, "step": 1014} +{"train_info/time_between_train_steps": 0.005475044250488281, "step": 1014} +{"info/global_step": 1015, "train_info/time_within_train_step": 24.92017388343811, "step": 1015} +{"train_info/time_between_train_steps": 0.005632877349853516, "step": 1015} +{"info/global_step": 1016, "train_info/time_within_train_step": 25.02725648880005, "step": 1016} +{"train_info/time_between_train_steps": 0.005246877670288086, "step": 1016} +{"info/global_step": 1017, "train_info/time_within_train_step": 24.817837953567505, "step": 1017} +{"train_info/time_between_train_steps": 0.005667448043823242, "step": 1017} +{"info/global_step": 1018, "train_info/time_within_train_step": 24.90045189857483, "step": 1018} +{"train_info/time_between_train_steps": 0.0052204132080078125, "step": 1018} +{"info/global_step": 1019, "train_info/time_within_train_step": 24.80199694633484, "step": 1019} +{"train_info/time_between_train_steps": 0.005700588226318359, "step": 1019} +{"info/global_step": 1020, "train_info/time_within_train_step": 24.881876707077026, "step": 1020} +{"train_info/time_between_train_steps": 0.0054285526275634766, "step": 1020} +{"info/global_step": 1021, "train_info/time_within_train_step": 24.755723476409912, "step": 1021} +{"train_info/time_between_train_steps": 0.005525827407836914, "step": 1021} +{"info/global_step": 1022, "train_info/time_within_train_step": 24.934183597564697, "step": 1022} +{"train_info/time_between_train_steps": 0.0052378177642822266, "step": 1022} +{"info/global_step": 1023, "train_info/time_within_train_step": 24.738633155822754, "step": 1023} +{"train_info/time_between_train_steps": 0.005411386489868164, "step": 1023} +{"info/global_step": 1024, "train_info/time_within_train_step": 24.901155471801758, "step": 1024} +{"train_info/time_between_train_steps": 0.0055925846099853516, "step": 1024} +{"info/global_step": 1025, "train_info/time_within_train_step": 24.745294094085693, "step": 1025} +{"train_info/time_between_train_steps": 0.03446197509765625, "step": 1025} +{"info/global_step": 1026, "train_info/time_within_train_step": 24.719680070877075, "step": 1026} +{"train_info/time_between_train_steps": 0.005094766616821289, "step": 1026} +{"info/global_step": 1027, "train_info/time_within_train_step": 24.8281090259552, "step": 1027} +{"train_info/time_between_train_steps": 0.004914999008178711, "step": 1027} +{"info/global_step": 1028, "train_info/time_within_train_step": 24.732105255126953, "step": 1028} +{"train_info/time_between_train_steps": 0.0051670074462890625, "step": 1028} +{"info/global_step": 1029, "train_info/time_within_train_step": 24.78933835029602, "step": 1029} +{"train_info/time_between_train_steps": 0.004989147186279297, "step": 1029} +{"info/global_step": 1030, "train_info/time_within_train_step": 24.737333059310913, "step": 1030} +{"train_info/time_between_train_steps": 0.005146503448486328, "step": 1030} +{"info/global_step": 1031, "train_info/time_within_train_step": 24.750688791275024, "step": 1031} +{"train_info/time_between_train_steps": 0.004988431930541992, "step": 1031} +{"info/global_step": 1032, "train_info/time_within_train_step": 24.83981490135193, "step": 1032} +{"train_info/time_between_train_steps": 0.005133390426635742, "step": 1032} +{"info/global_step": 1033, "train_info/time_within_train_step": 24.73210883140564, "step": 1033} +{"train_info/time_between_train_steps": 0.010002374649047852, "step": 1033} +{"info/global_step": 1034, "train_info/time_within_train_step": 24.73606777191162, "step": 1034} +{"train_info/time_between_train_steps": 0.0050508975982666016, "step": 1034} +{"info/global_step": 1035, "train_info/time_within_train_step": 24.731885194778442, "step": 1035} +{"train_info/time_between_train_steps": 0.005132913589477539, "step": 1035} +{"info/global_step": 1036, "train_info/time_within_train_step": 24.803346633911133, "step": 1036} +{"train_info/time_between_train_steps": 0.005136251449584961, "step": 1036} +{"info/global_step": 1037, "train_info/time_within_train_step": 24.73169183731079, "step": 1037} +{"train_info/time_between_train_steps": 0.005028486251831055, "step": 1037} +{"info/global_step": 1038, "train_info/time_within_train_step": 24.727409601211548, "step": 1038} +{"train_info/time_between_train_steps": 0.005088329315185547, "step": 1038} +{"info/global_step": 1039, "train_info/time_within_train_step": 24.806374549865723, "step": 1039} +{"train_info/time_between_train_steps": 0.005164384841918945, "step": 1039} +{"info/global_step": 1040, "train_info/time_within_train_step": 24.73658514022827, "step": 1040} +{"train_info/time_between_train_steps": 0.005080461502075195, "step": 1040} +{"info/global_step": 1041, "train_info/time_within_train_step": 24.79204750061035, "step": 1041} +{"train_info/time_between_train_steps": 0.005132913589477539, "step": 1041} +{"info/global_step": 1042, "train_info/time_within_train_step": 24.743162631988525, "step": 1042} +{"train_info/time_between_train_steps": 0.0055811405181884766, "step": 1042} +{"info/global_step": 1043, "train_info/time_within_train_step": 24.744930744171143, "step": 1043} +{"train_info/time_between_train_steps": 0.0053594112396240234, "step": 1043} +{"info/global_step": 1044, "train_info/time_within_train_step": 24.817677974700928, "step": 1044} +{"train_info/time_between_train_steps": 0.005497932434082031, "step": 1044} +{"train_info/time_between_train_steps": 16.83585214614868, "step": 1044} +{"info/global_step": 1045, "train_info/time_within_train_step": 24.72727394104004, "step": 1045} +{"train_info/time_between_train_steps": 0.005238771438598633, "step": 1045} +{"info/global_step": 1046, "train_info/time_within_train_step": 24.864168882369995, "step": 1046} +{"train_info/time_between_train_steps": 0.005153656005859375, "step": 1046} +{"info/global_step": 1047, "train_info/time_within_train_step": 24.825154304504395, "step": 1047} +{"train_info/time_between_train_steps": 0.007601022720336914, "step": 1047} +{"info/global_step": 1048, "train_info/time_within_train_step": 24.844273328781128, "step": 1048} +{"train_info/time_between_train_steps": 0.00510716438293457, "step": 1048} +{"info/global_step": 1049, "train_info/time_within_train_step": 24.738789558410645, "step": 1049} +{"train_info/time_between_train_steps": 0.005163908004760742, "step": 1049} +{"info/global_step": 1050, "train_info/time_within_train_step": 24.868868350982666, "step": 1050} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 21960.0, "train_info/memory_max_reserved": 21960.0, "_timestamp": 1740949933, "_runtime": 27440}, "step": 1050} +{"logs": {"train/loss": 2.7036, "train/learning_rate": 8.333333333333333e-05, "train/epoch": 29.0, "_timestamp": 1740949933, "_runtime": 27440}, "step": 1050} +{"train_info/time_between_train_steps": 0.02605295181274414, "step": 1050} +{"info/global_step": 1051, "train_info/time_within_train_step": 24.755370378494263, "step": 1051} +{"train_info/time_between_train_steps": 0.005435466766357422, "step": 1051} +{"info/global_step": 1052, "train_info/time_within_train_step": 24.951040983200073, "step": 1052} +{"train_info/time_between_train_steps": 0.005147457122802734, "step": 1052} +{"info/global_step": 1053, "train_info/time_within_train_step": 24.800978660583496, "step": 1053} +{"train_info/time_between_train_steps": 0.00499272346496582, "step": 1053} +{"info/global_step": 1054, "train_info/time_within_train_step": 24.85166883468628, "step": 1054} +{"train_info/time_between_train_steps": 0.0050373077392578125, "step": 1054} +{"info/global_step": 1055, "train_info/time_within_train_step": 24.790396690368652, "step": 1055} +{"train_info/time_between_train_steps": 0.0054051876068115234, "step": 1055} +{"info/global_step": 1056, "train_info/time_within_train_step": 24.883987426757812, "step": 1056} +{"train_info/time_between_train_steps": 0.005410194396972656, "step": 1056} +{"info/global_step": 1057, "train_info/time_within_train_step": 24.735309600830078, "step": 1057} +{"train_info/time_between_train_steps": 0.008376836776733398, "step": 1057} +{"info/global_step": 1058, "train_info/time_within_train_step": 24.85235023498535, "step": 1058} +{"train_info/time_between_train_steps": 0.007363557815551758, "step": 1058} +{"info/global_step": 1059, "train_info/time_within_train_step": 24.73483371734619, "step": 1059} +{"train_info/time_between_train_steps": 0.005232572555541992, "step": 1059} +{"info/global_step": 1060, "train_info/time_within_train_step": 25.075554609298706, "step": 1060} +{"train_info/time_between_train_steps": 0.005297660827636719, "step": 1060} +{"info/global_step": 1061, "train_info/time_within_train_step": 24.752887725830078, "step": 1061} +{"train_info/time_between_train_steps": 0.022466659545898438, "step": 1061} +{"info/global_step": 1062, "train_info/time_within_train_step": 24.716303825378418, "step": 1062} +{"train_info/time_between_train_steps": 0.004878520965576172, "step": 1062} +{"info/global_step": 1063, "train_info/time_within_train_step": 24.80596423149109, "step": 1063} +{"train_info/time_between_train_steps": 0.00490570068359375, "step": 1063} +{"info/global_step": 1064, "train_info/time_within_train_step": 24.718267679214478, "step": 1064} +{"train_info/time_between_train_steps": 0.005071163177490234, "step": 1064} +{"info/global_step": 1065, "train_info/time_within_train_step": 24.727079153060913, "step": 1065} +{"train_info/time_between_train_steps": 0.005254507064819336, "step": 1065} +{"info/global_step": 1066, "train_info/time_within_train_step": 24.72272515296936, "step": 1066} +{"train_info/time_between_train_steps": 0.005028247833251953, "step": 1066} +{"info/global_step": 1067, "train_info/time_within_train_step": 24.78213381767273, "step": 1067} +{"train_info/time_between_train_steps": 0.005095720291137695, "step": 1067} +{"info/global_step": 1068, "train_info/time_within_train_step": 24.751553535461426, "step": 1068} +{"train_info/time_between_train_steps": 0.005304098129272461, "step": 1068} +{"info/global_step": 1069, "train_info/time_within_train_step": 24.74977207183838, "step": 1069} +{"train_info/time_between_train_steps": 0.01009678840637207, "step": 1069} +{"info/global_step": 1070, "train_info/time_within_train_step": 24.7832612991333, "step": 1070} +{"train_info/time_between_train_steps": 0.009869575500488281, "step": 1070} +{"info/global_step": 1071, "train_info/time_within_train_step": 24.74656915664673, "step": 1071} +{"train_info/time_between_train_steps": 0.0053558349609375, "step": 1071} +{"info/global_step": 1072, "train_info/time_within_train_step": 24.822992086410522, "step": 1072} +{"train_info/time_between_train_steps": 0.0051174163818359375, "step": 1072} +{"info/global_step": 1073, "train_info/time_within_train_step": 24.7442147731781, "step": 1073} +{"train_info/time_between_train_steps": 0.005152463912963867, "step": 1073} +{"info/global_step": 1074, "train_info/time_within_train_step": 24.79876732826233, "step": 1074} +{"train_info/time_between_train_steps": 0.005239725112915039, "step": 1074} +{"info/global_step": 1075, "train_info/time_within_train_step": 24.73299241065979, "step": 1075} +{"train_info/time_between_train_steps": 0.00516057014465332, "step": 1075} +{"info/global_step": 1076, "train_info/time_within_train_step": 24.73585557937622, "step": 1076} +{"train_info/time_between_train_steps": 0.005219221115112305, "step": 1076} +{"info/global_step": 1077, "train_info/time_within_train_step": 24.818617343902588, "step": 1077} +{"train_info/time_between_train_steps": 0.008814573287963867, "step": 1077} +{"info/global_step": 1078, "train_info/time_within_train_step": 24.973636388778687, "step": 1078} +{"train_info/time_between_train_steps": 0.007761955261230469, "step": 1078} +{"info/global_step": 1079, "train_info/time_within_train_step": 24.955870151519775, "step": 1079} +{"train_info/time_between_train_steps": 0.0065670013427734375, "step": 1079} +{"info/global_step": 1080, "train_info/time_within_train_step": 24.937692165374756, "step": 1080} +{"train_info/time_between_train_steps": 0.007382869720458984, "step": 1080} +{"train_info/time_between_train_steps": 17.000741958618164, "step": 1080} +{"info/global_step": 1081, "train_info/time_within_train_step": 24.794273138046265, "step": 1081} +{"train_info/time_between_train_steps": 0.005524396896362305, "step": 1081} +{"info/global_step": 1082, "train_info/time_within_train_step": 24.900456428527832, "step": 1082} +{"train_info/time_between_train_steps": 0.0055501461029052734, "step": 1082} +{"info/global_step": 1083, "train_info/time_within_train_step": 24.80485963821411, "step": 1083} +{"train_info/time_between_train_steps": 0.005203723907470703, "step": 1083} +{"info/global_step": 1084, "train_info/time_within_train_step": 24.899847745895386, "step": 1084} +{"train_info/time_between_train_steps": 0.005346775054931641, "step": 1084} +{"info/global_step": 1085, "train_info/time_within_train_step": 24.76861023902893, "step": 1085} +{"train_info/time_between_train_steps": 0.0060787200927734375, "step": 1085} +{"info/global_step": 1086, "train_info/time_within_train_step": 25.094114065170288, "step": 1086} +{"train_info/time_between_train_steps": 0.009837865829467773, "step": 1086} +{"info/global_step": 1087, "train_info/time_within_train_step": 24.860446453094482, "step": 1087} +{"train_info/time_between_train_steps": 0.005932807922363281, "step": 1087} +{"info/global_step": 1088, "train_info/time_within_train_step": 25.09354567527771, "step": 1088} +{"train_info/time_between_train_steps": 0.00545191764831543, "step": 1088} +{"info/global_step": 1089, "train_info/time_within_train_step": 24.85569977760315, "step": 1089} +{"train_info/time_between_train_steps": 0.006889820098876953, "step": 1089} +{"info/global_step": 1090, "train_info/time_within_train_step": 25.271493434906006, "step": 1090} +{"train_info/time_between_train_steps": 0.0057528018951416016, "step": 1090} +{"info/global_step": 1091, "train_info/time_within_train_step": 24.853020191192627, "step": 1091} +{"train_info/time_between_train_steps": 0.006365299224853516, "step": 1091} +{"info/global_step": 1092, "train_info/time_within_train_step": 25.27458357810974, "step": 1092} +{"train_info/time_between_train_steps": 0.007339954376220703, "step": 1092} +{"info/global_step": 1093, "train_info/time_within_train_step": 25.072360515594482, "step": 1093} +{"train_info/time_between_train_steps": 0.007135868072509766, "step": 1093} +{"info/global_step": 1094, "train_info/time_within_train_step": 25.09893536567688, "step": 1094} +{"train_info/time_between_train_steps": 0.006151914596557617, "step": 1094} +{"info/global_step": 1095, "train_info/time_within_train_step": 24.830108880996704, "step": 1095} +{"train_info/time_between_train_steps": 0.006175041198730469, "step": 1095} +{"info/global_step": 1096, "train_info/time_within_train_step": 24.942568063735962, "step": 1096} +{"train_info/time_between_train_steps": 0.0060727596282958984, "step": 1096} +{"info/global_step": 1097, "train_info/time_within_train_step": 24.788765907287598, "step": 1097} +{"train_info/time_between_train_steps": 0.04588675498962402, "step": 1097} +{"info/global_step": 1098, "train_info/time_within_train_step": 24.75350785255432, "step": 1098} +{"train_info/time_between_train_steps": 0.009998559951782227, "step": 1098} +{"info/global_step": 1099, "train_info/time_within_train_step": 24.743610382080078, "step": 1099} +{"train_info/time_between_train_steps": 0.005445241928100586, "step": 1099} +{"info/global_step": 1100, "train_info/time_within_train_step": 24.746984243392944, "step": 1100} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 21960.0, "train_info/memory_max_reserved": 21960.0, "_timestamp": 1740951195, "_runtime": 28702}, "step": 1100} +{"logs": {"train/loss": 2.6476, "train/learning_rate": 5.5555555555555545e-05, "train/epoch": 30.02, "_timestamp": 1740951195, "_runtime": 28702}, "step": 1100} +{"train_info/time_between_train_steps": 52.84689402580261, "step": 1100} +{"info/global_step": 1101, "train_info/time_within_train_step": 25.405890703201294, "step": 1101} +{"train_info/time_between_train_steps": 0.0051975250244140625, "step": 1101} +{"info/global_step": 1102, "train_info/time_within_train_step": 24.72581386566162, "step": 1102} +{"train_info/time_between_train_steps": 0.0051686763763427734, "step": 1102} +{"info/global_step": 1103, "train_info/time_within_train_step": 24.75553035736084, "step": 1103} +{"train_info/time_between_train_steps": 0.005241870880126953, "step": 1103} +{"info/global_step": 1104, "train_info/time_within_train_step": 24.72702169418335, "step": 1104} +{"train_info/time_between_train_steps": 0.005461931228637695, "step": 1104} +{"info/global_step": 1105, "train_info/time_within_train_step": 24.825862169265747, "step": 1105} +{"train_info/time_between_train_steps": 0.006578207015991211, "step": 1105} +{"info/global_step": 1106, "train_info/time_within_train_step": 24.8732693195343, "step": 1106} +{"train_info/time_between_train_steps": 0.006607532501220703, "step": 1106} +{"info/global_step": 1107, "train_info/time_within_train_step": 24.863041639328003, "step": 1107} +{"train_info/time_between_train_steps": 0.005549430847167969, "step": 1107} +{"info/global_step": 1108, "train_info/time_within_train_step": 24.81750726699829, "step": 1108} +{"train_info/time_between_train_steps": 0.005718231201171875, "step": 1108} +{"info/global_step": 1109, "train_info/time_within_train_step": 24.917223691940308, "step": 1109} +{"train_info/time_between_train_steps": 0.005604982376098633, "step": 1109} +{"info/global_step": 1110, "train_info/time_within_train_step": 24.802526712417603, "step": 1110} +{"train_info/time_between_train_steps": 0.005530834197998047, "step": 1110} +{"info/global_step": 1111, "train_info/time_within_train_step": 24.757434606552124, "step": 1111} +{"train_info/time_between_train_steps": 0.0053195953369140625, "step": 1111} +{"info/global_step": 1112, "train_info/time_within_train_step": 24.758091926574707, "step": 1112} +{"train_info/time_between_train_steps": 0.007519245147705078, "step": 1112} +{"info/global_step": 1113, "train_info/time_within_train_step": 24.779457807540894, "step": 1113} +{"train_info/time_between_train_steps": 0.0057065486907958984, "step": 1113} +{"info/global_step": 1114, "train_info/time_within_train_step": 24.78531503677368, "step": 1114} +{"train_info/time_between_train_steps": 0.005497455596923828, "step": 1114} +{"info/global_step": 1115, "train_info/time_within_train_step": 24.833678007125854, "step": 1115} +{"train_info/time_between_train_steps": 0.006385087966918945, "step": 1115} +{"info/global_step": 1116, "train_info/time_within_train_step": 24.78192114830017, "step": 1116} +{"train_info/time_between_train_steps": 0.005908012390136719, "step": 1116} +{"train_info/time_between_train_steps": 16.97223711013794, "step": 1116} +{"info/global_step": 1117, "train_info/time_within_train_step": 24.796616077423096, "step": 1117} +{"train_info/time_between_train_steps": 0.01081395149230957, "step": 1117} +{"info/global_step": 1118, "train_info/time_within_train_step": 24.87309455871582, "step": 1118} +{"train_info/time_between_train_steps": 0.006247997283935547, "step": 1118} +{"info/global_step": 1119, "train_info/time_within_train_step": 24.749893188476562, "step": 1119} +{"train_info/time_between_train_steps": 0.005233287811279297, "step": 1119} +{"info/global_step": 1120, "train_info/time_within_train_step": 24.945504188537598, "step": 1120} +{"train_info/time_between_train_steps": 0.0053403377532958984, "step": 1120} +{"info/global_step": 1121, "train_info/time_within_train_step": 24.75492286682129, "step": 1121} +{"train_info/time_between_train_steps": 0.005370140075683594, "step": 1121} +{"info/global_step": 1122, "train_info/time_within_train_step": 24.852870225906372, "step": 1122} +{"train_info/time_between_train_steps": 0.005181789398193359, "step": 1122} +{"info/global_step": 1123, "train_info/time_within_train_step": 24.743460655212402, "step": 1123} +{"train_info/time_between_train_steps": 0.005416393280029297, "step": 1123} +{"info/global_step": 1124, "train_info/time_within_train_step": 25.065202713012695, "step": 1124} +{"train_info/time_between_train_steps": 0.005343198776245117, "step": 1124} +{"info/global_step": 1125, "train_info/time_within_train_step": 24.76151418685913, "step": 1125} +{"train_info/time_between_train_steps": 0.0060329437255859375, "step": 1125} +{"info/global_step": 1126, "train_info/time_within_train_step": 25.121487379074097, "step": 1126} +{"train_info/time_between_train_steps": 0.005578279495239258, "step": 1126} +{"info/global_step": 1127, "train_info/time_within_train_step": 24.840150117874146, "step": 1127} +{"train_info/time_between_train_steps": 0.006046295166015625, "step": 1127} +{"info/global_step": 1128, "train_info/time_within_train_step": 24.954318046569824, "step": 1128} +{"train_info/time_between_train_steps": 0.005514383316040039, "step": 1128} +{"info/global_step": 1129, "train_info/time_within_train_step": 24.829795122146606, "step": 1129} +{"train_info/time_between_train_steps": 0.005853414535522461, "step": 1129} +{"info/global_step": 1130, "train_info/time_within_train_step": 24.908286809921265, "step": 1130} +{"train_info/time_between_train_steps": 0.005457401275634766, "step": 1130} +{"info/global_step": 1131, "train_info/time_within_train_step": 24.74409055709839, "step": 1131} +{"train_info/time_between_train_steps": 0.005963802337646484, "step": 1131} +{"info/global_step": 1132, "train_info/time_within_train_step": 24.838025093078613, "step": 1132} +{"train_info/time_between_train_steps": 0.0054454803466796875, "step": 1132} +{"info/global_step": 1133, "train_info/time_within_train_step": 24.761815547943115, "step": 1133} +{"train_info/time_between_train_steps": 0.02438831329345703, "step": 1133} +{"info/global_step": 1134, "train_info/time_within_train_step": 24.914780616760254, "step": 1134} +{"train_info/time_between_train_steps": 0.013461589813232422, "step": 1134} +{"info/global_step": 1135, "train_info/time_within_train_step": 25.038390159606934, "step": 1135} +{"train_info/time_between_train_steps": 0.005053997039794922, "step": 1135} +{"info/global_step": 1136, "train_info/time_within_train_step": 24.78746008872986, "step": 1136} +{"train_info/time_between_train_steps": 0.005054950714111328, "step": 1136} +{"info/global_step": 1137, "train_info/time_within_train_step": 24.736932516098022, "step": 1137} +{"train_info/time_between_train_steps": 0.005248069763183594, "step": 1137} +{"info/global_step": 1138, "train_info/time_within_train_step": 24.766254425048828, "step": 1138} +{"train_info/time_between_train_steps": 0.005022525787353516, "step": 1138} +{"info/global_step": 1139, "train_info/time_within_train_step": 24.738269090652466, "step": 1139} +{"train_info/time_between_train_steps": 0.008238077163696289, "step": 1139} +{"info/global_step": 1140, "train_info/time_within_train_step": 24.83242130279541, "step": 1140} +{"train_info/time_between_train_steps": 0.005217552185058594, "step": 1140} +{"info/global_step": 1141, "train_info/time_within_train_step": 24.791878938674927, "step": 1141} +{"train_info/time_between_train_steps": 0.005243062973022461, "step": 1141} +{"info/global_step": 1142, "train_info/time_within_train_step": 24.740020990371704, "step": 1142} +{"train_info/time_between_train_steps": 0.005344867706298828, "step": 1142} +{"info/global_step": 1143, "train_info/time_within_train_step": 24.801006317138672, "step": 1143} +{"train_info/time_between_train_steps": 0.00522303581237793, "step": 1143} +{"info/global_step": 1144, "train_info/time_within_train_step": 24.740166187286377, "step": 1144} +{"train_info/time_between_train_steps": 0.005054950714111328, "step": 1144} +{"info/global_step": 1145, "train_info/time_within_train_step": 24.740043878555298, "step": 1145} +{"train_info/time_between_train_steps": 0.005138397216796875, "step": 1145} +{"info/global_step": 1146, "train_info/time_within_train_step": 24.798989295959473, "step": 1146} +{"train_info/time_between_train_steps": 0.005132198333740234, "step": 1146} +{"info/global_step": 1147, "train_info/time_within_train_step": 24.753013849258423, "step": 1147} +{"train_info/time_between_train_steps": 0.0050776004791259766, "step": 1147} +{"info/global_step": 1148, "train_info/time_within_train_step": 24.800615787506104, "step": 1148} +{"train_info/time_between_train_steps": 0.005125761032104492, "step": 1148} +{"info/global_step": 1149, "train_info/time_within_train_step": 24.736917734146118, "step": 1149} +{"train_info/time_between_train_steps": 0.005199432373046875, "step": 1149} +{"info/global_step": 1150, "train_info/time_within_train_step": 24.757874250411987, "step": 1150} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 21960.0, "train_info/memory_max_reserved": 21960.0, "_timestamp": 1740952508, "_runtime": 30015}, "step": 1150} +{"logs": {"train/loss": 2.6246, "train/learning_rate": 2.7777777777777772e-05, "train/epoch": 31.03, "_timestamp": 1740952508, "_runtime": 30015}, "step": 1150} +{"train_info/time_between_train_steps": 0.02653956413269043, "step": 1150} +{"info/global_step": 1151, "train_info/time_within_train_step": 24.766806840896606, "step": 1151} +{"train_info/time_between_train_steps": 0.005722761154174805, "step": 1151} +{"info/global_step": 1152, "train_info/time_within_train_step": 24.77488398551941, "step": 1152} +{"train_info/time_between_train_steps": 0.005895853042602539, "step": 1152} +{"train_info/time_between_train_steps": 16.966629028320312, "step": 1152} +{"info/global_step": 1153, "train_info/time_within_train_step": 24.72073197364807, "step": 1153} +{"train_info/time_between_train_steps": 0.00484466552734375, "step": 1153} +{"info/global_step": 1154, "train_info/time_within_train_step": 24.84246039390564, "step": 1154} +{"train_info/time_between_train_steps": 0.004845380783081055, "step": 1154} +{"info/global_step": 1155, "train_info/time_within_train_step": 24.826946258544922, "step": 1155} +{"train_info/time_between_train_steps": 0.0047817230224609375, "step": 1155} +{"info/global_step": 1156, "train_info/time_within_train_step": 24.841227769851685, "step": 1156} +{"train_info/time_between_train_steps": 0.0048389434814453125, "step": 1156} +{"info/global_step": 1157, "train_info/time_within_train_step": 24.785266637802124, "step": 1157} +{"train_info/time_between_train_steps": 0.005056858062744141, "step": 1157} +{"info/global_step": 1158, "train_info/time_within_train_step": 24.874056339263916, "step": 1158} +{"train_info/time_between_train_steps": 0.005271196365356445, "step": 1158} +{"info/global_step": 1159, "train_info/time_within_train_step": 24.74973177909851, "step": 1159} +{"train_info/time_between_train_steps": 0.005217313766479492, "step": 1159} +{"info/global_step": 1160, "train_info/time_within_train_step": 24.914061307907104, "step": 1160} +{"train_info/time_between_train_steps": 0.0052661895751953125, "step": 1160} +{"info/global_step": 1161, "train_info/time_within_train_step": 24.745501279830933, "step": 1161} +{"train_info/time_between_train_steps": 0.005145072937011719, "step": 1161} +{"info/global_step": 1162, "train_info/time_within_train_step": 24.895543575286865, "step": 1162} +{"train_info/time_between_train_steps": 0.004941701889038086, "step": 1162} +{"info/global_step": 1163, "train_info/time_within_train_step": 24.730041027069092, "step": 1163} +{"train_info/time_between_train_steps": 0.005263566970825195, "step": 1163} +{"info/global_step": 1164, "train_info/time_within_train_step": 24.84403371810913, "step": 1164} +{"train_info/time_between_train_steps": 0.0051991939544677734, "step": 1164} +{"info/global_step": 1165, "train_info/time_within_train_step": 24.744879961013794, "step": 1165} +{"train_info/time_between_train_steps": 0.005423545837402344, "step": 1165} +{"info/global_step": 1166, "train_info/time_within_train_step": 24.861557722091675, "step": 1166} +{"train_info/time_between_train_steps": 0.005218505859375, "step": 1166} +{"info/global_step": 1167, "train_info/time_within_train_step": 24.748358249664307, "step": 1167} +{"train_info/time_between_train_steps": 0.005061149597167969, "step": 1167} +{"info/global_step": 1168, "train_info/time_within_train_step": 24.839836835861206, "step": 1168} +{"train_info/time_between_train_steps": 0.0052547454833984375, "step": 1168} +{"info/global_step": 1169, "train_info/time_within_train_step": 24.748117446899414, "step": 1169} +{"train_info/time_between_train_steps": 0.0363306999206543, "step": 1169} +{"info/global_step": 1170, "train_info/time_within_train_step": 24.806326627731323, "step": 1170} +{"train_info/time_between_train_steps": 0.005061626434326172, "step": 1170} +{"info/global_step": 1171, "train_info/time_within_train_step": 24.717085599899292, "step": 1171} +{"train_info/time_between_train_steps": 0.0048580169677734375, "step": 1171} +{"info/global_step": 1172, "train_info/time_within_train_step": 24.721147298812866, "step": 1172} +{"train_info/time_between_train_steps": 0.004914999008178711, "step": 1172} +{"info/global_step": 1173, "train_info/time_within_train_step": 24.726713180541992, "step": 1173} +{"train_info/time_between_train_steps": 0.009475231170654297, "step": 1173} +{"info/global_step": 1174, "train_info/time_within_train_step": 24.725632190704346, "step": 1174} +{"train_info/time_between_train_steps": 0.004954338073730469, "step": 1174} +{"info/global_step": 1175, "train_info/time_within_train_step": 24.725041151046753, "step": 1175} +{"train_info/time_between_train_steps": 0.005049943923950195, "step": 1175} +{"info/global_step": 1176, "train_info/time_within_train_step": 24.753174543380737, "step": 1176} +{"train_info/time_between_train_steps": 0.005162239074707031, "step": 1176} +{"info/global_step": 1177, "train_info/time_within_train_step": 24.765809297561646, "step": 1177} +{"train_info/time_between_train_steps": 0.0052127838134765625, "step": 1177} +{"info/global_step": 1178, "train_info/time_within_train_step": 24.74299931526184, "step": 1178} +{"train_info/time_between_train_steps": 0.005103111267089844, "step": 1178} +{"info/global_step": 1179, "train_info/time_within_train_step": 24.81825566291809, "step": 1179} +{"train_info/time_between_train_steps": 0.005231380462646484, "step": 1179} +{"info/global_step": 1180, "train_info/time_within_train_step": 24.737439393997192, "step": 1180} +{"train_info/time_between_train_steps": 0.005004405975341797, "step": 1180} +{"info/global_step": 1181, "train_info/time_within_train_step": 24.771029710769653, "step": 1181} +{"train_info/time_between_train_steps": 0.0053441524505615234, "step": 1181} +{"info/global_step": 1182, "train_info/time_within_train_step": 24.73256802558899, "step": 1182} +{"train_info/time_between_train_steps": 0.0050885677337646484, "step": 1182} +{"info/global_step": 1183, "train_info/time_within_train_step": 24.726672649383545, "step": 1183} +{"train_info/time_between_train_steps": 0.005028247833251953, "step": 1183} +{"info/global_step": 1184, "train_info/time_within_train_step": 24.79438018798828, "step": 1184} +{"train_info/time_between_train_steps": 0.005005359649658203, "step": 1184} +{"info/global_step": 1185, "train_info/time_within_train_step": 24.7370822429657, "step": 1185} +{"train_info/time_between_train_steps": 0.00518488883972168, "step": 1185} +{"info/global_step": 1186, "train_info/time_within_train_step": 24.882949113845825, "step": 1186} +{"train_info/time_between_train_steps": 0.00528407096862793, "step": 1186} +{"info/global_step": 1187, "train_info/time_within_train_step": 24.757466554641724, "step": 1187} +{"train_info/time_between_train_steps": 0.005646467208862305, "step": 1187} +{"info/global_step": 1188, "train_info/time_within_train_step": 24.773149728775024, "step": 1188} +{"train_info/time_between_train_steps": 0.006031036376953125, "step": 1188} +{"train_info/time_between_train_steps": 17.118208169937134, "step": 1188} +{"info/global_step": 1189, "train_info/time_within_train_step": 24.732020378112793, "step": 1189} +{"train_info/time_between_train_steps": 0.004899740219116211, "step": 1189} +{"info/global_step": 1190, "train_info/time_within_train_step": 24.890434980392456, "step": 1190} +{"train_info/time_between_train_steps": 0.005026578903198242, "step": 1190} +{"info/global_step": 1191, "train_info/time_within_train_step": 25.12442708015442, "step": 1191} +{"train_info/time_between_train_steps": 0.005240201950073242, "step": 1191} +{"info/global_step": 1192, "train_info/time_within_train_step": 24.920934200286865, "step": 1192} +{"train_info/time_between_train_steps": 0.005524396896362305, "step": 1192} +{"info/global_step": 1193, "train_info/time_within_train_step": 26.15887999534607, "step": 1193} +{"train_info/time_between_train_steps": 0.006882905960083008, "step": 1193} +{"info/global_step": 1194, "train_info/time_within_train_step": 24.85907769203186, "step": 1194} +{"train_info/time_between_train_steps": 0.005239009857177734, "step": 1194} +{"info/global_step": 1195, "train_info/time_within_train_step": 24.77915668487549, "step": 1195} +{"train_info/time_between_train_steps": 0.005158901214599609, "step": 1195} +{"info/global_step": 1196, "train_info/time_within_train_step": 24.865943431854248, "step": 1196} +{"train_info/time_between_train_steps": 0.005118131637573242, "step": 1196} +{"info/global_step": 1197, "train_info/time_within_train_step": 24.857186317443848, "step": 1197} +{"train_info/time_between_train_steps": 0.005201816558837891, "step": 1197} +{"info/global_step": 1198, "train_info/time_within_train_step": 24.9023118019104, "step": 1198} +{"train_info/time_between_train_steps": 0.005183696746826172, "step": 1198} +{"info/global_step": 1199, "train_info/time_within_train_step": 24.778114557266235, "step": 1199} +{"train_info/time_between_train_steps": 0.010465145111083984, "step": 1199} +{"info/global_step": 1200, "train_info/time_within_train_step": 24.88678503036499, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 21960.0, "train_info/memory_max_reserved": 21960.0, "_timestamp": 1740953787, "_runtime": 31294}, "step": 1200} +{"logs": {"train/loss": 2.6339, "train/learning_rate": 0.0, "train/epoch": 33.01, "_timestamp": 1740953787, "_runtime": 31294}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 21960.0, "train_info/memory_max_reserved": 21960.0, "_timestamp": 1740953915, "_runtime": 31422}, "step": 1200} +{"logs": {"train/train_runtime": 31425.8461, "train/train_samples_per_second": 19.551, "train/train_steps_per_second": 0.038, "train/total_flos": 3.25835344576512e+17, "train/train_loss": 3.3924330488840737, "train/epoch": 33.01, "_timestamp": 1740953915, "_runtime": 31422}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1581.5537109375, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 21960.0, "train_info/memory_max_reserved": 21960.0, "_timestamp": 1740953953, "_runtime": 31460}, "step": 1200} +{"logs": {"eval/loss": 3.4463047981262207, "eval/runtime": 5.6694, "eval/samples_per_second": 23.283, "eval/steps_per_second": 1.587, "train/epoch": 33.01, "_timestamp": 1740953953, "_runtime": 31460}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1581.5537109375, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 21960.0, "train_info/memory_max_reserved": 21960.0, "_timestamp": 1740953955, "_runtime": 31462}, "step": 1200} +{"logs": {"eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_loss": 3.4463047981262207, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_ppl": 31.384206796729448, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_runtime": 5.6694, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 23.283, "train/epoch": 33.01, "_timestamp": 1740953955, "_runtime": 31462}, "step": 1200} diff --git a/perturb_det_adj_np_num_zh_ZH_randinit_seed53.log b/perturb_det_adj_np_num_zh_ZH_randinit_seed53.log new file mode 100755 index 0000000000000000000000000000000000000000..f902330b1c2f3a367ce9351971a21b97cd739598 --- /dev/null +++ b/perturb_det_adj_np_num_zh_ZH_randinit_seed53.log @@ -0,0 +1,121 @@ +|=>> 03/02 [14:32:53] - mistral - INFO :: Starting Run: perturb_det_adj_np_num_zh_ZH_randinit_seed53... +|=>> 03/02 [14:32:53] - mistral - INFO :: Setting Random Seed to 53! +|=>> 03/02 [14:32:54] - mistral - INFO :: Building Tokenize and Initializing `gpt2-small` via AutoModel/AutoConfig... +|=>> 03/02 [14:32:54] - mistral - INFO :: Using Configs For Model From: /scratch/xiulyang/multilingual-LM/mistral/conf/models/gpt2-small-ZH.json ... +|=>> 03/02 [14:32:54] - mistral.models.auto - INFO :: Building Hugging Face GPT2Config from provided configs: {'activation_function': 'gelu_new', 'architectures': ['GPT2LMHeadModel'], 'attn_pdrop': 0.1, 'embd_pdrop': 0.1, 'initializer_range': 0.02, 'layer_norm_epsilon': 1e-05, 'model_type': 'gpt2', 'n_ctx': 1024, 'n_embd': 768, 'n_head': 12, 'n_inner': None, 'n_layer': 12, 'n_positions': 1024, 'reorder_and_upcast_attn': True, 'resid_pdrop': 0.1, 'scale_attn_by_inverse_layer_idx': True, 'scale_attn_weights': True, 'summary_activation': None, 'summary_first_dropout': 0.2, 'summary_proj_to_labels': True, 'summary_type': 'cls_index', 'summary_use_proj': True, 'task_specific_params': {'text-generation': {'do_sample': True, 'max_length': 1024}}, 'torch_dtype': 'float32', 'transformers_version': '4.35.2', 'use_cache': False, 'vocab_size': 21128} ... +|=>> 03/02 [14:32:54] - mistral.models.auto - INFO :: Fetching Hugging Face [Fast] AutoTokenizer for Model: `gpt2`... +|=>> 03/02 [14:32:54] - mistral.models.auto - INFO :: Using a Pretokenized Dataset +|=>> 03/02 [14:32:54] - mistral.models.auto - INFO :: Initializing Custom GPT-2 Model from Configuration: `gpt2`... +|=>> 03/02 [14:32:57] - mistral - INFO :: Setting Training Arguments from Quinfig... +|=>> 03/02 [14:32:57] - mistral.args.training - INFO :: Setting Gradient Accumulation Steps = `64` [BSZ: 512 World Size: 1 Device BSZ: 8] +|=>> 03/02 [14:32:57] - mistral - INFO :: Downloading and Preprocessing Dataset `/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py`... +|=>> 03/02 [14:32:58] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Generating examples from = /scratch/xiulyang/multilingual-LM/data/multilingual/multilingual_data_perturbed/perturb_det_adj_np_num_zh/train +|=>> 03/02 [14:33:00] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Total sentences: 1059468 +|=>> 03/02 [14:33:00] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Loading pre-tokenized data +|=>> 03/02 [14:33:07] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Concatenating tokenized data using EOS token +|=>> 03/02 [14:33:07] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Chunking tokens into sublists of 1024 +|=>> 03/02 [14:33:08] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Writing dataset as space-separated sequences of tokens +|=>> 03/02 [14:33:16] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Generating examples from = /scratch/xiulyang/multilingual-LM/data/multilingual/multilingual_data_perturbed/perturb_det_adj_np_num_zh/dev +|=>> 03/02 [14:33:17] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Total sentences: 5553 +|=>> 03/02 [14:33:17] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Loading pre-tokenized data +|=>> 03/02 [14:33:17] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Concatenating tokenized data using EOS token +|=>> 03/02 [14:33:17] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Chunking tokens into sublists of 1024 +|=>> 03/02 [14:33:17] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Writing dataset as space-separated sequences of tokens +|=>> 03/02 [14:33:18] - mistral.corpora.auto - INFO :: Building Tokenized Indexed Dataset for {dataset_id}/{dataset_name}... +|=>> 03/02 [14:33:18] - mistral.corpora.auto - INFO :: Building Indexed Dataset for train +|=>> 03/02 [14:34:00] - mistral.corpora.auto - INFO :: Building Indexed Dataset for validation +|=>> 03/02 [14:34:01] - mistral - INFO :: Initializing Model Trainer... +|=>> 03/02 [14:34:01] - mistral - INFO :: Training Arguments: TrainingArguments( +_n_gpu=1, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +bf16=False, +bf16_full_eval=False, +data_seed=53, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=IntervalStrategy.STEPS, +fp16=True, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +gradient_accumulation_steps=64, +gradient_checkpointing=False, +greater_is_better=None, +group_by_length=False, +half_precision_backend=auto, +hub_model_id=None, +hub_strategy=HubStrategy.EVERY_SAVE, +hub_token=, +ignore_data_skip=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0006, +length_column_name=length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=logs, +logging_first_step=True, +logging_nan_inf_filter=True, +logging_steps=50, +logging_strategy=IntervalStrategy.STEPS, +lr_scheduler_type=SchedulerType.LINEAR, +max_grad_norm=1.0, +max_steps=1200, +metric_for_best_model=None, +mp_parameters=, +no_cuda=False, +num_train_epochs=3.0, +optim=OptimizerNames.ADAMW_HF, +output_dir=//scratch/xiulyang/multilingual_models/perturb_det_adj_np_num_zh_ZH_randinit/babylm_perturb_det_adj_np_num_zh_ZH_randinit_seed53/runs/perturb_det_adj_np_num_zh_ZH_randinit_seed53, +overwrite_output_dir=False, +past_index=-1, +per_device_eval_batch_size=16, +per_device_train_batch_size=8, +prediction_loss_only=True, +push_to_hub=False, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +remove_unused_columns=True, +report_to=[], +resume_from_checkpoint=None, +run_name=perturb_det_adj_np_num_zh_ZH_randinit_seed53, +save_on_each_node=False, +save_steps=1000, +save_strategy=IntervalStrategy.STEPS, +save_total_limit=None, +seed=53, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=120, +weight_decay=0.1, +xpu_backend=None, +) +|=>> 03/02 [14:34:09] - mistral.core.callbacks - INFO :: Setting W&B Project: xiulin-yang-compling +|=>> 03/02 [14:34:46] - mistral - INFO :: Training... +|=>> 03/02 [14:34:49] - mistral.core.callbacks - INFO :: Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" +|=>> 03/02 [23:19:08] - mistral - INFO :: ...and that's all folks! +|=>> 03/02 [23:19:08] - mistral - INFO :: Running final evaluation... diff --git a/pytorch_model.bin b/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..5cf8877bb352a529f8f9cd6ef07f9c62b0badbe7 --- /dev/null +++ b/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:663dffe78009dcb4795164dcc0048e6af772e40af06c3bbc3269d589096ef61b +size 420912233 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/training_args.bin b/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..38f8469260f0d284612437800574b1fc07760030 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1caf1e1a70f50539b082bfbf9af6ffc327e561ca075be9c4a29ed12fb0374e03 +size 3183