diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/checkpoint-0/config.json b/checkpoint-0/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-0/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-0/pytorch_model.bin b/checkpoint-0/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..061288474e4dc90b01d25ca0d912dbf2bc2cbece --- /dev/null +++ b/checkpoint-0/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4cb7ddeb977797468ae0b2e8a977bb66b33b53cf9ce0a85051cfaa03f3f32eb +size 420912233 diff --git a/checkpoint-0/special_tokens_map.json b/checkpoint-0/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-0/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-0/tokenizer_config.json b/checkpoint-0/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-0/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-0/training_args.bin b/checkpoint-0/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..6c86fbe0cf655f84c9ffa9639b21631e3e33c06f --- /dev/null +++ b/checkpoint-0/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f3956944c191fac4dee6f5ef2e1b97a38ffc9f86d0d1b30990539a8111ac333 +size 3183 diff --git a/checkpoint-100/config.json b/checkpoint-100/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-100/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-100/optimizer.pt b/checkpoint-100/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..d427fd23606552d6f7ff9b3d896c013b226f5924 --- /dev/null +++ b/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:084ab55bd04b4fc93f611e2ea4901c4f617d125de6a4e6978029c82b83815fe4 +size 816635249 diff --git a/checkpoint-100/pytorch_model.bin b/checkpoint-100/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..7559f70447c2351bfbb138ddc7943e05c07af74a --- /dev/null +++ b/checkpoint-100/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a35f3e00bf7b6856fe19cdd71f8cbb26b494481ad1fa3a5abadab69969af0d4a +size 420912233 diff --git a/checkpoint-100/rng_state.pth b/checkpoint-100/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..6b10932529092c0ab8c48a8a483f98b090ffd020 --- /dev/null +++ b/checkpoint-100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf2b9119c5ee54f7d5de06e880d4157349bc34f034dd2f1f90bb759f212bdc47 +size 14567 diff --git a/checkpoint-100/scaler.pt b/checkpoint-100/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..45cc4a33e17645cb0ed4a911b11c77cb2e7ce7f3 --- /dev/null +++ b/checkpoint-100/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13a3423b2fe42f204bc8fe2c666ff379f9fd753a0f13613064a5e71e86b519e8 +size 559 diff --git a/checkpoint-100/scheduler.pt b/checkpoint-100/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..595eec9094e91b3eb24c2de88c461df2c22026ab --- /dev/null +++ b/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75e9b9d31d11c624d89b0c04ad496adf4b5addd3e703848d2583972c703e8da6 +size 623 diff --git a/checkpoint-100/special_tokens_map.json b/checkpoint-100/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-100/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-100/tokenizer_config.json b/checkpoint-100/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-100/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-100/trainer_state.json b/checkpoint-100/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..e7995347e1041abf565b209e94856242993dc2ac --- /dev/null +++ b/checkpoint-100/trainer_state.json @@ -0,0 +1,34 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0233333333333334, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.1063, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9358, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9198, + "step": 100 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.7043725312e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-100/training_args.bin b/checkpoint-100/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..6c86fbe0cf655f84c9ffa9639b21631e3e33c06f --- /dev/null +++ b/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f3956944c191fac4dee6f5ef2e1b97a38ffc9f86d0d1b30990539a8111ac333 +size 3183 diff --git a/checkpoint-1000/config.json b/checkpoint-1000/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-1000/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-1000/optimizer.pt b/checkpoint-1000/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..18a65525a6072312a8a0308290d0f6e285bb7bee --- /dev/null +++ b/checkpoint-1000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a04a5497d117aaa505db6ba9bc2ceda1e757ef25e4e84db39e074fdaee988aae +size 816635441 diff --git a/checkpoint-1000/pytorch_model.bin b/checkpoint-1000/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..368908a2eb9662ad0c6ee2360fc10f66123b1e93 --- /dev/null +++ b/checkpoint-1000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2b15f93a069d52da64e49c4751fc8ad9122e076cf5c8753ceffdd779db9673d +size 420912233 diff --git a/checkpoint-1000/rng_state.pth b/checkpoint-1000/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..aa513bd336b6431c4d078e641b4b63714e2f6dc0 --- /dev/null +++ b/checkpoint-1000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:073bb4d4e2cdffa2f0d4373aa6e4d7efc3b064cb8395beb8f55fbc1a2d560b58 +size 14567 diff --git a/checkpoint-1000/scaler.pt b/checkpoint-1000/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..b04695d3a30e4bab2b78883d9c849c25c37ef7d7 --- /dev/null +++ b/checkpoint-1000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f810fc7b695697c440d8985f6042b4ba23a9e1027604c265718b518ca29f1b2b +size 559 diff --git a/checkpoint-1000/scheduler.pt b/checkpoint-1000/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..2a616899591a36b66ac0bd1ceeb324087c181a2a --- /dev/null +++ b/checkpoint-1000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0691206f4bd9ca409d6e7104087a4e0eb05df8f8f555a400f6ecc532edba52d8 +size 623 diff --git a/checkpoint-1000/special_tokens_map.json b/checkpoint-1000/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-1000/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-1000/tokenizer_config.json b/checkpoint-1000/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-1000/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-1000/trainer_state.json b/checkpoint-1000/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..7ab26f9c8fa0346b92da78c42a5457a28ff3db70 --- /dev/null +++ b/checkpoint-1000/trainer_state.json @@ -0,0 +1,158 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 27.023333333333333, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.1063, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9358, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9198, + "step": 100 + }, + { + "epoch": 4.0, + "learning_rate": 0.0005833333333333333, + "loss": 4.3577, + "step": 150 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.0525, + "step": 200 + }, + { + "epoch": 6.03, + "learning_rate": 0.0005277777777777777, + "loss": 3.9393, + "step": 250 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005, + "loss": 3.8341, + "step": 300 + }, + { + "epoch": 9.02, + "learning_rate": 0.00047222222222222224, + "loss": 3.623, + "step": 350 + }, + { + "epoch": 11.0, + "learning_rate": 0.00044444444444444436, + "loss": 3.4921, + "step": 400 + }, + { + "epoch": 12.02, + "learning_rate": 0.00041666666666666664, + "loss": 3.3277, + "step": 450 + }, + { + "epoch": 13.03, + "learning_rate": 0.00038888888888888887, + "loss": 3.2253, + "step": 500 + }, + { + "epoch": 15.01, + "learning_rate": 0.0003611111111111111, + "loss": 3.1759, + "step": 550 + }, + { + "epoch": 16.02, + "learning_rate": 0.0003333333333333333, + "loss": 3.0702, + "step": 600 + }, + { + "epoch": 18.0, + "learning_rate": 0.00030555555555555555, + "loss": 3.0373, + "step": 650 + }, + { + "epoch": 19.01, + "learning_rate": 0.0002777777777777778, + "loss": 2.9519, + "step": 700 + }, + { + "epoch": 20.02, + "learning_rate": 0.00025, + "loss": 2.9004, + "step": 750 + }, + { + "epoch": 22.01, + "learning_rate": 0.00022222222222222218, + "loss": 2.8864, + "step": 800 + }, + { + "epoch": 23.02, + "learning_rate": 0.00019444444444444443, + "loss": 2.8145, + "step": 850 + }, + { + "epoch": 24.03, + "learning_rate": 0.00016666666666666666, + "loss": 2.7771, + "step": 900 + }, + { + "epoch": 26.01, + "learning_rate": 0.0001388888888888889, + "loss": 2.7731, + "step": 950 + }, + { + "epoch": 27.02, + "learning_rate": 0.00011111111111111109, + "loss": 2.709, + "step": 1000 + }, + { + "epoch": 27.02, + "eval_loss": 3.4275035858154297, + "eval_runtime": 2.4722, + "eval_samples_per_second": 53.393, + "eval_steps_per_second": 3.64, + "step": 1000 + }, + { + "epoch": 27.02, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_loss": 3.4275035858154297, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_ppl": 30.7996579936446, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_runtime": 2.4722, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 53.393, + "step": 1000 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.714432274432e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1000/training_args.bin b/checkpoint-1000/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..6c86fbe0cf655f84c9ffa9639b21631e3e33c06f --- /dev/null +++ b/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f3956944c191fac4dee6f5ef2e1b97a38ffc9f86d0d1b30990539a8111ac333 +size 3183 diff --git a/checkpoint-1100/config.json b/checkpoint-1100/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-1100/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-1100/optimizer.pt b/checkpoint-1100/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..df873a8c4c63cee9629d98a6058c050a911fa71e --- /dev/null +++ b/checkpoint-1100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9f00b91f6276d85003be38089a1ae01d0229267982861be92e4f33b93f0f875 +size 816635441 diff --git a/checkpoint-1100/pytorch_model.bin b/checkpoint-1100/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..7f273bf56fba1fa1b8a5b8382380f8d654ee3d63 --- /dev/null +++ b/checkpoint-1100/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ecd6a8ed7124fd6724e7d9e3ebfc826314674f0f8b6fe057081692cdf0fd391 +size 420912233 diff --git a/checkpoint-1100/rng_state.pth b/checkpoint-1100/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..e16d7007141f0154f9eb9f851e965f64a90e7720 --- /dev/null +++ b/checkpoint-1100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c7d341a5b762c20e215d6aed5ce9bd5b1a68f4a7b69498b4e1dae71559aa493 +size 14567 diff --git a/checkpoint-1100/scaler.pt b/checkpoint-1100/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..37bf049fbd5fd721203bf0238edc8ff67dbd8f94 --- /dev/null +++ b/checkpoint-1100/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fb16c30b686aa43e110b0d33f9d46bf3127b7124542ca8dc34831233d4675a0 +size 559 diff --git a/checkpoint-1100/scheduler.pt b/checkpoint-1100/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..c158169ec0bb09620952d544fd4b4edea0cc9cf4 --- /dev/null +++ b/checkpoint-1100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f1b7713e4bb40428f29080b7d08d4a52f779ac863737861e4724292b2cf6c59 +size 623 diff --git a/checkpoint-1100/special_tokens_map.json b/checkpoint-1100/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-1100/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-1100/tokenizer_config.json b/checkpoint-1100/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-1100/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-1100/trainer_state.json b/checkpoint-1100/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..06ade6b3bc9fccc0adceb87a2c7cd28a445905d4 --- /dev/null +++ b/checkpoint-1100/trainer_state.json @@ -0,0 +1,170 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 30.016666666666666, + "global_step": 1100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.1063, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9358, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9198, + "step": 100 + }, + { + "epoch": 4.0, + "learning_rate": 0.0005833333333333333, + "loss": 4.3577, + "step": 150 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.0525, + "step": 200 + }, + { + "epoch": 6.03, + "learning_rate": 0.0005277777777777777, + "loss": 3.9393, + "step": 250 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005, + "loss": 3.8341, + "step": 300 + }, + { + "epoch": 9.02, + "learning_rate": 0.00047222222222222224, + "loss": 3.623, + "step": 350 + }, + { + "epoch": 11.0, + "learning_rate": 0.00044444444444444436, + "loss": 3.4921, + "step": 400 + }, + { + "epoch": 12.02, + "learning_rate": 0.00041666666666666664, + "loss": 3.3277, + "step": 450 + }, + { + "epoch": 13.03, + "learning_rate": 0.00038888888888888887, + "loss": 3.2253, + "step": 500 + }, + { + "epoch": 15.01, + "learning_rate": 0.0003611111111111111, + "loss": 3.1759, + "step": 550 + }, + { + "epoch": 16.02, + "learning_rate": 0.0003333333333333333, + "loss": 3.0702, + "step": 600 + }, + { + "epoch": 18.0, + "learning_rate": 0.00030555555555555555, + "loss": 3.0373, + "step": 650 + }, + { + "epoch": 19.01, + "learning_rate": 0.0002777777777777778, + "loss": 2.9519, + "step": 700 + }, + { + "epoch": 20.02, + "learning_rate": 0.00025, + "loss": 2.9004, + "step": 750 + }, + { + "epoch": 22.01, + "learning_rate": 0.00022222222222222218, + "loss": 2.8864, + "step": 800 + }, + { + "epoch": 23.02, + "learning_rate": 0.00019444444444444443, + "loss": 2.8145, + "step": 850 + }, + { + "epoch": 24.03, + "learning_rate": 0.00016666666666666666, + "loss": 2.7771, + "step": 900 + }, + { + "epoch": 26.01, + "learning_rate": 0.0001388888888888889, + "loss": 2.7731, + "step": 950 + }, + { + "epoch": 27.02, + "learning_rate": 0.00011111111111111109, + "loss": 2.709, + "step": 1000 + }, + { + "epoch": 27.02, + "eval_loss": 3.4275035858154297, + "eval_runtime": 2.4722, + "eval_samples_per_second": 53.393, + "eval_steps_per_second": 3.64, + "step": 1000 + }, + { + "epoch": 27.02, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_loss": 3.4275035858154297, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_ppl": 30.7996579936446, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_runtime": 2.4722, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 53.393, + "step": 1000 + }, + { + "epoch": 29.0, + "learning_rate": 8.333333333333333e-05, + "loss": 2.7083, + "step": 1050 + }, + { + "epoch": 30.02, + "learning_rate": 5.5555555555555545e-05, + "loss": 2.6528, + "step": 1100 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.986306633728e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1100/training_args.bin b/checkpoint-1100/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..6c86fbe0cf655f84c9ffa9639b21631e3e33c06f --- /dev/null +++ b/checkpoint-1100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f3956944c191fac4dee6f5ef2e1b97a38ffc9f86d0d1b30990539a8111ac333 +size 3183 diff --git a/checkpoint-1200/config.json b/checkpoint-1200/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-1200/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-1200/optimizer.pt b/checkpoint-1200/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..987acc823588b6db1b205d0b4b9ff11ba5a89efa --- /dev/null +++ b/checkpoint-1200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd8de7891ae651379da7b6f68469794c426cec764c41ffd18a576e99b6ad126a +size 816635441 diff --git a/checkpoint-1200/pytorch_model.bin b/checkpoint-1200/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..e216e9d8c88d8042920198c6f7f9cd95f87ee885 --- /dev/null +++ b/checkpoint-1200/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0908c891e567424ff7bbc9444303ea848ea1170463cec3e63b427ff1560a1f48 +size 420912233 diff --git a/checkpoint-1200/rng_state.pth b/checkpoint-1200/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..df60b93a569c9d98e6160293e23835ada439d9a8 --- /dev/null +++ b/checkpoint-1200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4370a90bd99585f0d7c30a5291ac27b6d766574cccf1ec17689cbd0f2533b864 +size 14567 diff --git a/checkpoint-1200/scaler.pt b/checkpoint-1200/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..8953dddccbefc4703c09dcda27d83c15add2bade --- /dev/null +++ b/checkpoint-1200/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19c7277eaca0850ae3e9b6790b3d002d820169cce0671185e672c28c8ae8e056 +size 559 diff --git a/checkpoint-1200/scheduler.pt b/checkpoint-1200/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..310d39c17fc616a9c83286ed00f0f4cefba9f5df --- /dev/null +++ b/checkpoint-1200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:935a8fb09a6e9698d9894853b05e181b3f56098deaaecddde08e55f06bf000c4 +size 623 diff --git a/checkpoint-1200/special_tokens_map.json b/checkpoint-1200/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-1200/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-1200/tokenizer_config.json b/checkpoint-1200/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-1200/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-1200/trainer_state.json b/checkpoint-1200/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..80cfcefdb6857d8fdcb6b4b9b723290135a40ade --- /dev/null +++ b/checkpoint-1200/trainer_state.json @@ -0,0 +1,182 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 33.01, + "global_step": 1200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.1063, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9358, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9198, + "step": 100 + }, + { + "epoch": 4.0, + "learning_rate": 0.0005833333333333333, + "loss": 4.3577, + "step": 150 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.0525, + "step": 200 + }, + { + "epoch": 6.03, + "learning_rate": 0.0005277777777777777, + "loss": 3.9393, + "step": 250 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005, + "loss": 3.8341, + "step": 300 + }, + { + "epoch": 9.02, + "learning_rate": 0.00047222222222222224, + "loss": 3.623, + "step": 350 + }, + { + "epoch": 11.0, + "learning_rate": 0.00044444444444444436, + "loss": 3.4921, + "step": 400 + }, + { + "epoch": 12.02, + "learning_rate": 0.00041666666666666664, + "loss": 3.3277, + "step": 450 + }, + { + "epoch": 13.03, + "learning_rate": 0.00038888888888888887, + "loss": 3.2253, + "step": 500 + }, + { + "epoch": 15.01, + "learning_rate": 0.0003611111111111111, + "loss": 3.1759, + "step": 550 + }, + { + "epoch": 16.02, + "learning_rate": 0.0003333333333333333, + "loss": 3.0702, + "step": 600 + }, + { + "epoch": 18.0, + "learning_rate": 0.00030555555555555555, + "loss": 3.0373, + "step": 650 + }, + { + "epoch": 19.01, + "learning_rate": 0.0002777777777777778, + "loss": 2.9519, + "step": 700 + }, + { + "epoch": 20.02, + "learning_rate": 0.00025, + "loss": 2.9004, + "step": 750 + }, + { + "epoch": 22.01, + "learning_rate": 0.00022222222222222218, + "loss": 2.8864, + "step": 800 + }, + { + "epoch": 23.02, + "learning_rate": 0.00019444444444444443, + "loss": 2.8145, + "step": 850 + }, + { + "epoch": 24.03, + "learning_rate": 0.00016666666666666666, + "loss": 2.7771, + "step": 900 + }, + { + "epoch": 26.01, + "learning_rate": 0.0001388888888888889, + "loss": 2.7731, + "step": 950 + }, + { + "epoch": 27.02, + "learning_rate": 0.00011111111111111109, + "loss": 2.709, + "step": 1000 + }, + { + "epoch": 27.02, + "eval_loss": 3.4275035858154297, + "eval_runtime": 2.4722, + "eval_samples_per_second": 53.393, + "eval_steps_per_second": 3.64, + "step": 1000 + }, + { + "epoch": 27.02, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_loss": 3.4275035858154297, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_ppl": 30.7996579936446, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_runtime": 2.4722, + "eval_/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 53.393, + "step": 1000 + }, + { + "epoch": 29.0, + "learning_rate": 8.333333333333333e-05, + "loss": 2.7083, + "step": 1050 + }, + { + "epoch": 30.02, + "learning_rate": 5.5555555555555545e-05, + "loss": 2.6528, + "step": 1100 + }, + { + "epoch": 31.03, + "learning_rate": 2.7777777777777772e-05, + "loss": 2.629, + "step": 1150 + }, + { + "epoch": 33.01, + "learning_rate": 0.0, + "loss": 2.639, + "step": 1200 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 3.258180993024e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1200/training_args.bin b/checkpoint-1200/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..6c86fbe0cf655f84c9ffa9639b21631e3e33c06f --- /dev/null +++ b/checkpoint-1200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f3956944c191fac4dee6f5ef2e1b97a38ffc9f86d0d1b30990539a8111ac333 +size 3183 diff --git a/checkpoint-200/config.json b/checkpoint-200/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-200/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-200/optimizer.pt b/checkpoint-200/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..350c4ede8c38f3f751468c75e9e0d7baf823ff28 --- /dev/null +++ b/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c1d04fc87f13adddd7da753e674c2e3ad4520a6dbcb1c6e41995da34c2792ca +size 816635249 diff --git a/checkpoint-200/pytorch_model.bin b/checkpoint-200/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..ed2dd054c028bf4086bd584cdcb3e5e652b8c50e --- /dev/null +++ b/checkpoint-200/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f7a7bcb6d0a3dea804d8f07256e9a1304a0fec2b4ca0c31fdb8b3e8dc4ea3e0 +size 420912233 diff --git a/checkpoint-200/rng_state.pth b/checkpoint-200/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..5fab064ba7b6b0980dc57af81212a96da7177597 --- /dev/null +++ b/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d39f44e4b5be7446f136f301364ee8d940bbcc5f80d28b465050f2a5f6747422 +size 14567 diff --git a/checkpoint-200/scaler.pt b/checkpoint-200/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..365b52ebf376498237a843f6d7332e5a49b14902 --- /dev/null +++ b/checkpoint-200/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb6982c29cd162f49aeb531674acf574eccd46a8f556bec596040d7c3b95200a +size 559 diff --git a/checkpoint-200/scheduler.pt b/checkpoint-200/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..36cb8da739c80a63971a62f06f781c40ac0fceb2 --- /dev/null +++ b/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a992625ada6d884e508ff9392d16738b4a4163147f8fcbf9f46be82ecae9888 +size 623 diff --git a/checkpoint-200/special_tokens_map.json b/checkpoint-200/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-200/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-200/tokenizer_config.json b/checkpoint-200/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-200/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-200/trainer_state.json b/checkpoint-200/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..967061c6d80eebbfd9977058ce32e3df8d636603 --- /dev/null +++ b/checkpoint-200/trainer_state.json @@ -0,0 +1,46 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.016666666666667, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.1063, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9358, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9198, + "step": 100 + }, + { + "epoch": 4.0, + "learning_rate": 0.0005833333333333333, + "loss": 4.3577, + "step": 150 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.0525, + "step": 200 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 5.42311612416e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-200/training_args.bin b/checkpoint-200/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..6c86fbe0cf655f84c9ffa9639b21631e3e33c06f --- /dev/null +++ b/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f3956944c191fac4dee6f5ef2e1b97a38ffc9f86d0d1b30990539a8111ac333 +size 3183 diff --git a/checkpoint-300/config.json b/checkpoint-300/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-300/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-300/optimizer.pt b/checkpoint-300/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..2e06cb5529a19fba687d9d0bc8ea74e6370339f0 --- /dev/null +++ b/checkpoint-300/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b4c0d12b615afadf0e95c67a949c6e7b1b25186a4d391cf321cde7421108b58 +size 816635441 diff --git a/checkpoint-300/pytorch_model.bin b/checkpoint-300/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..3188ec68dac297e2e43223513ecaa3bd77cf5a4c --- /dev/null +++ b/checkpoint-300/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87268eba04d7936b7d3b0474146136ea2cffed2c0f998a4ca9fd6478f929c863 +size 420912233 diff --git a/checkpoint-300/rng_state.pth b/checkpoint-300/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..36d0df9507794720a1f9f8218c2463074f150979 --- /dev/null +++ b/checkpoint-300/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fb66dd636d32d1c848729861320e9d7cfda9b714638ffce83069c0d82bdb433 +size 14567 diff --git a/checkpoint-300/scaler.pt b/checkpoint-300/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..01066cf4761ea9d2f7962f5181762f7b08690b79 --- /dev/null +++ b/checkpoint-300/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0967b9f865f16344c55f5ccc3cf7d6e8e97ca61dda304e931ca6bad130f48dd1 +size 559 diff --git a/checkpoint-300/scheduler.pt b/checkpoint-300/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..681987f21e79cd4685aac32a5e6b74341e25d936 --- /dev/null +++ b/checkpoint-300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1db899b266916f792a0898ceb27a87eaf76647f10c29cc0c13ce22f12a12efd +size 623 diff --git a/checkpoint-300/special_tokens_map.json b/checkpoint-300/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-300/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-300/tokenizer_config.json b/checkpoint-300/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-300/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-300/trainer_state.json b/checkpoint-300/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..df2ad3a0d471be37dc454350d3dcd8904daba5bc --- /dev/null +++ b/checkpoint-300/trainer_state.json @@ -0,0 +1,58 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 8.01, + "global_step": 300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.1063, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9358, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9198, + "step": 100 + }, + { + "epoch": 4.0, + "learning_rate": 0.0005833333333333333, + "loss": 4.3577, + "step": 150 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.0525, + "step": 200 + }, + { + "epoch": 6.03, + "learning_rate": 0.0005277777777777777, + "loss": 3.9393, + "step": 250 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005, + "loss": 3.8341, + "step": 300 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 8.14185971712e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-300/training_args.bin b/checkpoint-300/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..6c86fbe0cf655f84c9ffa9639b21631e3e33c06f --- /dev/null +++ b/checkpoint-300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f3956944c191fac4dee6f5ef2e1b97a38ffc9f86d0d1b30990539a8111ac333 +size 3183 diff --git a/checkpoint-400/config.json b/checkpoint-400/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-400/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-400/optimizer.pt b/checkpoint-400/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..b7789c2849e2fe8ff9fd1fd22fdb0b89659d382a --- /dev/null +++ b/checkpoint-400/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be70cda2a954e2d288fe3d71ed002640b136cecdd6189823283018bfaa9a2e6d +size 816635441 diff --git a/checkpoint-400/pytorch_model.bin b/checkpoint-400/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..499082707610902ae9646f789a45c7e8f95eda10 --- /dev/null +++ b/checkpoint-400/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0eb028f477ad045c3fd34ede8452f338fda4b10655595eaa21f9f8da9acdf125 +size 420912233 diff --git a/checkpoint-400/rng_state.pth b/checkpoint-400/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..ef9191f6a2e5032407c14b894a5fc42d2b77fb87 --- /dev/null +++ b/checkpoint-400/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7b8c50348617b4052b33ab5683146978c9b57301ae0a6979c8b3eaa99dd34dc +size 14567 diff --git a/checkpoint-400/scaler.pt b/checkpoint-400/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..9c7aef4199e98d81152810b661dbaffc01963383 --- /dev/null +++ b/checkpoint-400/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:476e510c8ea7edbd2b51d1e76a4e037820a5639381c0d8b5d32dafa492795a1e +size 559 diff --git a/checkpoint-400/scheduler.pt b/checkpoint-400/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..967673cfc91836d239beb5a7ede06992232ba309 --- /dev/null +++ b/checkpoint-400/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db087d678047b5c346bbb8511936612c1fdf223c6fd70321e97369bc31ed76a8 +size 623 diff --git a/checkpoint-400/special_tokens_map.json b/checkpoint-400/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-400/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-400/tokenizer_config.json b/checkpoint-400/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-400/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-400/trainer_state.json b/checkpoint-400/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..e544efa8a8cce546d3e53c135c43b1e38cc3b825 --- /dev/null +++ b/checkpoint-400/trainer_state.json @@ -0,0 +1,70 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 11.003333333333334, + "global_step": 400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.1063, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9358, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9198, + "step": 100 + }, + { + "epoch": 4.0, + "learning_rate": 0.0005833333333333333, + "loss": 4.3577, + "step": 150 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.0525, + "step": 200 + }, + { + "epoch": 6.03, + "learning_rate": 0.0005277777777777777, + "loss": 3.9393, + "step": 250 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005, + "loss": 3.8341, + "step": 300 + }, + { + "epoch": 9.02, + "learning_rate": 0.00047222222222222224, + "loss": 3.623, + "step": 350 + }, + { + "epoch": 11.0, + "learning_rate": 0.00044444444444444436, + "loss": 3.4921, + "step": 400 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.086060331008e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-400/training_args.bin b/checkpoint-400/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..6c86fbe0cf655f84c9ffa9639b21631e3e33c06f --- /dev/null +++ b/checkpoint-400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f3956944c191fac4dee6f5ef2e1b97a38ffc9f86d0d1b30990539a8111ac333 +size 3183 diff --git a/checkpoint-500/config.json b/checkpoint-500/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-500/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-500/optimizer.pt b/checkpoint-500/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..6f0b3a89324d8d23073e8a64a362ce3f1ca0c5fb --- /dev/null +++ b/checkpoint-500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d139f31951fb6b98bacb97322211592608bfb29430835df85af752c47b12799 +size 816635441 diff --git a/checkpoint-500/pytorch_model.bin b/checkpoint-500/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..83c502bc3131880614f13e7a6179506ec7bdeec5 --- /dev/null +++ b/checkpoint-500/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9a7ac893e29d8d89c50fccdfeac539d2654bbf9cb0a5472c7c1cdaff68fd674 +size 420912233 diff --git a/checkpoint-500/rng_state.pth b/checkpoint-500/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..1c77e07a811b143d8871250c55e7af9bfa04a922 --- /dev/null +++ b/checkpoint-500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5440a25d23dfa5fe430b947517f2d57e2f9bb2f1a279ec9033f8c67675028269 +size 14567 diff --git a/checkpoint-500/scaler.pt b/checkpoint-500/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..e8b96c9c2837f2c95b1d07d4fc3f245f9ad1ef62 --- /dev/null +++ b/checkpoint-500/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fa4c7be44c959599b8b43bb9bc3371e9e4e5bbc5758b3ab5afcccfda3e72e67 +size 559 diff --git a/checkpoint-500/scheduler.pt b/checkpoint-500/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..77b958181a5b47b022b96b38a6207f274a1b6604 --- /dev/null +++ b/checkpoint-500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:026fae4a90d56c24de94b10dfa7a75b6ba4e43bd5c1a3fdb2d77356b81cd6f8a +size 623 diff --git a/checkpoint-500/special_tokens_map.json b/checkpoint-500/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-500/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-500/tokenizer_config.json b/checkpoint-500/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-500/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-500/trainer_state.json b/checkpoint-500/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..50d1ec1b028162c8cdf7b25d69fee361426b9d6a --- /dev/null +++ b/checkpoint-500/trainer_state.json @@ -0,0 +1,82 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 13.026666666666667, + "global_step": 500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.1063, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9358, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9198, + "step": 100 + }, + { + "epoch": 4.0, + "learning_rate": 0.0005833333333333333, + "loss": 4.3577, + "step": 150 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.0525, + "step": 200 + }, + { + "epoch": 6.03, + "learning_rate": 0.0005277777777777777, + "loss": 3.9393, + "step": 250 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005, + "loss": 3.8341, + "step": 300 + }, + { + "epoch": 9.02, + "learning_rate": 0.00047222222222222224, + "loss": 3.623, + "step": 350 + }, + { + "epoch": 11.0, + "learning_rate": 0.00044444444444444436, + "loss": 3.4921, + "step": 400 + }, + { + "epoch": 12.02, + "learning_rate": 0.00041666666666666664, + "loss": 3.3277, + "step": 450 + }, + { + "epoch": 13.03, + "learning_rate": 0.00038888888888888887, + "loss": 3.2253, + "step": 500 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.356497584128e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-500/training_args.bin b/checkpoint-500/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..6c86fbe0cf655f84c9ffa9639b21631e3e33c06f --- /dev/null +++ b/checkpoint-500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f3956944c191fac4dee6f5ef2e1b97a38ffc9f86d0d1b30990539a8111ac333 +size 3183 diff --git a/checkpoint-600/config.json b/checkpoint-600/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-600/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-600/optimizer.pt b/checkpoint-600/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..5bb4945744f7ac750b45290cd41e2d832881b547 --- /dev/null +++ b/checkpoint-600/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9be76143309990ead23e9cad341ef9504a46fe0460055e1c3cfdae935aa3978d +size 816635441 diff --git a/checkpoint-600/pytorch_model.bin b/checkpoint-600/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..1028823ad852a106b1244891792b6e17ef7a39e7 --- /dev/null +++ b/checkpoint-600/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e77f32e5440a4644bf39d06eb29750e1006b5367ede74ff593e86f1740e2daae +size 420912233 diff --git a/checkpoint-600/rng_state.pth b/checkpoint-600/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..356bcaa5439d6ba62194dd04a250d59f36bddb81 --- /dev/null +++ b/checkpoint-600/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f55bacd890cabab6c9f6ff871748ac580d6a77816029b10b8ea55c0b3c91fa59 +size 14567 diff --git a/checkpoint-600/scaler.pt b/checkpoint-600/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..f95b3e36da01561ec333a83ee8419ad225633e06 --- /dev/null +++ b/checkpoint-600/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e8415b86bbce347c0df306b84a695add049c2a3b2d0b6f4dda3bf036d341150 +size 559 diff --git a/checkpoint-600/scheduler.pt b/checkpoint-600/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..62e5359badd619e7dad2c51d98fa8043d9948f0b --- /dev/null +++ b/checkpoint-600/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:700940432b1c2117248896e2ce5a58d93c051d92ea97707f74d76bf1ef24deee +size 623 diff --git a/checkpoint-600/special_tokens_map.json b/checkpoint-600/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-600/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-600/tokenizer_config.json b/checkpoint-600/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-600/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-600/trainer_state.json b/checkpoint-600/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..7e46a609f8ad448c0409ff6f276e9bccda686cb5 --- /dev/null +++ b/checkpoint-600/trainer_state.json @@ -0,0 +1,94 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 16.02, + "global_step": 600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.1063, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9358, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9198, + "step": 100 + }, + { + "epoch": 4.0, + "learning_rate": 0.0005833333333333333, + "loss": 4.3577, + "step": 150 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.0525, + "step": 200 + }, + { + "epoch": 6.03, + "learning_rate": 0.0005277777777777777, + "loss": 3.9393, + "step": 250 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005, + "loss": 3.8341, + "step": 300 + }, + { + "epoch": 9.02, + "learning_rate": 0.00047222222222222224, + "loss": 3.623, + "step": 350 + }, + { + "epoch": 11.0, + "learning_rate": 0.00044444444444444436, + "loss": 3.4921, + "step": 400 + }, + { + "epoch": 12.02, + "learning_rate": 0.00041666666666666664, + "loss": 3.3277, + "step": 450 + }, + { + "epoch": 13.03, + "learning_rate": 0.00038888888888888887, + "loss": 3.2253, + "step": 500 + }, + { + "epoch": 15.01, + "learning_rate": 0.0003611111111111111, + "loss": 3.1759, + "step": 550 + }, + { + "epoch": 16.02, + "learning_rate": 0.0003333333333333333, + "loss": 3.0702, + "step": 600 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.628371943424e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-600/training_args.bin b/checkpoint-600/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..6c86fbe0cf655f84c9ffa9639b21631e3e33c06f --- /dev/null +++ b/checkpoint-600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f3956944c191fac4dee6f5ef2e1b97a38ffc9f86d0d1b30990539a8111ac333 +size 3183 diff --git a/checkpoint-700/config.json b/checkpoint-700/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-700/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-700/optimizer.pt b/checkpoint-700/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..c700ba605d12c69b2ce5c189bd7bd05bf753b8c5 --- /dev/null +++ b/checkpoint-700/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f68acef7065a7cb153b37db468e938671c15194a608756a27c88d052738d66e +size 816635441 diff --git a/checkpoint-700/pytorch_model.bin b/checkpoint-700/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..ea53480450f51f2717fa294940d99a08f78db85f --- /dev/null +++ b/checkpoint-700/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3f95eb164c708c6c94286ae5b5a4abb8b7a030ea69ab9b4ab43e419f177efcc +size 420912233 diff --git a/checkpoint-700/rng_state.pth b/checkpoint-700/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..24b58ea460269c0e9954c98f3019e78fbfbbe5b4 --- /dev/null +++ b/checkpoint-700/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf3be1a9e18f8b59bf2f73b8e9f7cbba5e0d7b034c9e54fdb69d1c56c5d634fe +size 14567 diff --git a/checkpoint-700/scaler.pt b/checkpoint-700/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..12f2b8ec834e54a2bd7cfdd0e07b0c6e125b6490 --- /dev/null +++ b/checkpoint-700/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fb213daf5cce18a5f92167ca14da9df084d907f2b9796efc4666630f312b58c +size 559 diff --git a/checkpoint-700/scheduler.pt b/checkpoint-700/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..42408b6253265af34ae78746144fbba9316e0d7e --- /dev/null +++ b/checkpoint-700/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2d4910fd408e002ebeff50d62bfb043dcae5ef658777d0c3ee4a3bbb515ec15 +size 623 diff --git a/checkpoint-700/special_tokens_map.json b/checkpoint-700/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-700/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-700/tokenizer_config.json b/checkpoint-700/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-700/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-700/trainer_state.json b/checkpoint-700/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..36289df631412fd6511f4ec8b6bd0f3b3f301dce --- /dev/null +++ b/checkpoint-700/trainer_state.json @@ -0,0 +1,106 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 19.013333333333332, + "global_step": 700, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.1063, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9358, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9198, + "step": 100 + }, + { + "epoch": 4.0, + "learning_rate": 0.0005833333333333333, + "loss": 4.3577, + "step": 150 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.0525, + "step": 200 + }, + { + "epoch": 6.03, + "learning_rate": 0.0005277777777777777, + "loss": 3.9393, + "step": 250 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005, + "loss": 3.8341, + "step": 300 + }, + { + "epoch": 9.02, + "learning_rate": 0.00047222222222222224, + "loss": 3.623, + "step": 350 + }, + { + "epoch": 11.0, + "learning_rate": 0.00044444444444444436, + "loss": 3.4921, + "step": 400 + }, + { + "epoch": 12.02, + "learning_rate": 0.00041666666666666664, + "loss": 3.3277, + "step": 450 + }, + { + "epoch": 13.03, + "learning_rate": 0.00038888888888888887, + "loss": 3.2253, + "step": 500 + }, + { + "epoch": 15.01, + "learning_rate": 0.0003611111111111111, + "loss": 3.1759, + "step": 550 + }, + { + "epoch": 16.02, + "learning_rate": 0.0003333333333333333, + "loss": 3.0702, + "step": 600 + }, + { + "epoch": 18.0, + "learning_rate": 0.00030555555555555555, + "loss": 3.0373, + "step": 650 + }, + { + "epoch": 19.01, + "learning_rate": 0.0002777777777777778, + "loss": 2.9519, + "step": 700 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.90024630272e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-700/training_args.bin b/checkpoint-700/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..6c86fbe0cf655f84c9ffa9639b21631e3e33c06f --- /dev/null +++ b/checkpoint-700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f3956944c191fac4dee6f5ef2e1b97a38ffc9f86d0d1b30990539a8111ac333 +size 3183 diff --git a/checkpoint-800/config.json b/checkpoint-800/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-800/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-800/optimizer.pt b/checkpoint-800/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..f823e9e6771b5cd7e0dbd147873b99ea6fc4a208 --- /dev/null +++ b/checkpoint-800/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aec6ab1408edeafbc8c715fc3d37f2a1a801d2a44cdfc45285d275912a60275d +size 816635441 diff --git a/checkpoint-800/pytorch_model.bin b/checkpoint-800/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..0ab45f19c4db69c175251f2512bbf6310dbfa94b --- /dev/null +++ b/checkpoint-800/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:646b69ec226d43b3b51a3213d417c171ee652f3e95d959813c4b3e81890df9e9 +size 420912233 diff --git a/checkpoint-800/rng_state.pth b/checkpoint-800/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..b801998afc34cdd9613882e9fd6ab7105376fb05 --- /dev/null +++ b/checkpoint-800/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b43e6e698783bd5e27b893cbfb7b8e84bbce3fa373b299e4a47c40020e88e59 +size 14567 diff --git a/checkpoint-800/scaler.pt b/checkpoint-800/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..b3c73372264156b02df8dada2192ee3c96dd5fc4 --- /dev/null +++ b/checkpoint-800/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c2074cdcefbaa0a39f736d6b0f7bf018c350d49e85648bc8accc4f756ad816e +size 559 diff --git a/checkpoint-800/scheduler.pt b/checkpoint-800/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..75713dbce3771306ca00343ecc497c4f19a01d03 --- /dev/null +++ b/checkpoint-800/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b27fb255c84833fb6ab5d93679cb236a569de9a1c4f805f72a2f60a2bc7c7499 +size 623 diff --git a/checkpoint-800/special_tokens_map.json b/checkpoint-800/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-800/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-800/tokenizer_config.json b/checkpoint-800/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-800/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-800/trainer_state.json b/checkpoint-800/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..44f3c8f0927637e4baae10a129fc9b798ab151d5 --- /dev/null +++ b/checkpoint-800/trainer_state.json @@ -0,0 +1,118 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 22.006666666666668, + "global_step": 800, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.1063, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9358, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9198, + "step": 100 + }, + { + "epoch": 4.0, + "learning_rate": 0.0005833333333333333, + "loss": 4.3577, + "step": 150 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.0525, + "step": 200 + }, + { + "epoch": 6.03, + "learning_rate": 0.0005277777777777777, + "loss": 3.9393, + "step": 250 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005, + "loss": 3.8341, + "step": 300 + }, + { + "epoch": 9.02, + "learning_rate": 0.00047222222222222224, + "loss": 3.623, + "step": 350 + }, + { + "epoch": 11.0, + "learning_rate": 0.00044444444444444436, + "loss": 3.4921, + "step": 400 + }, + { + "epoch": 12.02, + "learning_rate": 0.00041666666666666664, + "loss": 3.3277, + "step": 450 + }, + { + "epoch": 13.03, + "learning_rate": 0.00038888888888888887, + "loss": 3.2253, + "step": 500 + }, + { + "epoch": 15.01, + "learning_rate": 0.0003611111111111111, + "loss": 3.1759, + "step": 550 + }, + { + "epoch": 16.02, + "learning_rate": 0.0003333333333333333, + "loss": 3.0702, + "step": 600 + }, + { + "epoch": 18.0, + "learning_rate": 0.00030555555555555555, + "loss": 3.0373, + "step": 650 + }, + { + "epoch": 19.01, + "learning_rate": 0.0002777777777777778, + "loss": 2.9519, + "step": 700 + }, + { + "epoch": 20.02, + "learning_rate": 0.00025, + "loss": 2.9004, + "step": 750 + }, + { + "epoch": 22.01, + "learning_rate": 0.00022222222222222218, + "loss": 2.8864, + "step": 800 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.172120662016e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-800/training_args.bin b/checkpoint-800/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..6c86fbe0cf655f84c9ffa9639b21631e3e33c06f --- /dev/null +++ b/checkpoint-800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f3956944c191fac4dee6f5ef2e1b97a38ffc9f86d0d1b30990539a8111ac333 +size 3183 diff --git a/checkpoint-900/config.json b/checkpoint-900/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/checkpoint-900/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/checkpoint-900/optimizer.pt b/checkpoint-900/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..fab8b08f609b44acfdde8ae27741a815b6e76f1c --- /dev/null +++ b/checkpoint-900/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39b77ef5153147903ad701efdabb78f46fbecf9b89eef9c19f3163a79e03b1ad +size 816635441 diff --git a/checkpoint-900/pytorch_model.bin b/checkpoint-900/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..d3814c00af28f910e89c9eda7820f82a82d70142 --- /dev/null +++ b/checkpoint-900/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5274f07a2b1fdd075de4c435d2f57f542d9564f0e00124d22aa17a614ddd2919 +size 420912233 diff --git a/checkpoint-900/rng_state.pth b/checkpoint-900/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..84d9a6657c8c26f096449719e5f9b55f91b45eac --- /dev/null +++ b/checkpoint-900/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:727e801da90823cd259c984553cc664cde7659472595c11696561fdc783bc8a5 +size 14567 diff --git a/checkpoint-900/scaler.pt b/checkpoint-900/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..be54cb13c777bc6feccb478ff218e7e21fad482a --- /dev/null +++ b/checkpoint-900/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8695f57df923e22b943b0b0f2b9cc7007008e80b53ccee275b3a35963fe67e9 +size 559 diff --git a/checkpoint-900/scheduler.pt b/checkpoint-900/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..539d7c83ea252818dc9cbffac08cf340bb05a454 --- /dev/null +++ b/checkpoint-900/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2cb09e5db72772a15094286e93cbb61d745d9b63863703cf53da0bcb9827821 +size 623 diff --git a/checkpoint-900/special_tokens_map.json b/checkpoint-900/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-900/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-900/tokenizer_config.json b/checkpoint-900/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-900/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-900/trainer_state.json b/checkpoint-900/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..e961114f3f1f3df2460d02ad9f6f23f1b9139620 --- /dev/null +++ b/checkpoint-900/trainer_state.json @@ -0,0 +1,130 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 24.03, + "global_step": 900, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.1063, + "step": 1 + }, + { + "epoch": 1.01, + "learning_rate": 0.00025, + "loss": 6.9358, + "step": 50 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005, + "loss": 4.9198, + "step": 100 + }, + { + "epoch": 4.0, + "learning_rate": 0.0005833333333333333, + "loss": 4.3577, + "step": 150 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.0525, + "step": 200 + }, + { + "epoch": 6.03, + "learning_rate": 0.0005277777777777777, + "loss": 3.9393, + "step": 250 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005, + "loss": 3.8341, + "step": 300 + }, + { + "epoch": 9.02, + "learning_rate": 0.00047222222222222224, + "loss": 3.623, + "step": 350 + }, + { + "epoch": 11.0, + "learning_rate": 0.00044444444444444436, + "loss": 3.4921, + "step": 400 + }, + { + "epoch": 12.02, + "learning_rate": 0.00041666666666666664, + "loss": 3.3277, + "step": 450 + }, + { + "epoch": 13.03, + "learning_rate": 0.00038888888888888887, + "loss": 3.2253, + "step": 500 + }, + { + "epoch": 15.01, + "learning_rate": 0.0003611111111111111, + "loss": 3.1759, + "step": 550 + }, + { + "epoch": 16.02, + "learning_rate": 0.0003333333333333333, + "loss": 3.0702, + "step": 600 + }, + { + "epoch": 18.0, + "learning_rate": 0.00030555555555555555, + "loss": 3.0373, + "step": 650 + }, + { + "epoch": 19.01, + "learning_rate": 0.0002777777777777778, + "loss": 2.9519, + "step": 700 + }, + { + "epoch": 20.02, + "learning_rate": 0.00025, + "loss": 2.9004, + "step": 750 + }, + { + "epoch": 22.01, + "learning_rate": 0.00022222222222222218, + "loss": 2.8864, + "step": 800 + }, + { + "epoch": 23.02, + "learning_rate": 0.00019444444444444443, + "loss": 2.8145, + "step": 850 + }, + { + "epoch": 24.03, + "learning_rate": 0.00016666666666666666, + "loss": 2.7771, + "step": 900 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.442557915136e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-900/training_args.bin b/checkpoint-900/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..6c86fbe0cf655f84c9ffa9639b21631e3e33c06f --- /dev/null +++ b/checkpoint-900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f3956944c191fac4dee6f5ef2e1b97a38ffc9f86d0d1b30990539a8111ac333 +size 3183 diff --git a/config.json b/config.json new file mode 100755 index 0000000000000000000000000000000000000000..e1d694f3918f5722bb92a2ab720c419384499c81 --- /dev/null +++ b/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 21128 +} diff --git a/metrics.json b/metrics.json new file mode 100755 index 0000000000000000000000000000000000000000..49a24b8119588c4d2e6bdceb17722f8e3a52a48e --- /dev/null +++ b/metrics.json @@ -0,0 +1,2494 @@ +{"num_parameters": 102068736, "trainable_parameters": 102068736, "step": 0} +{"train_info/time_between_train_steps": 4.1657960414886475, "step": 0} +{"info/global_step": 1, "train_info/time_within_train_step": 25.287343740463257, "step": 1} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 17273.5732421875, "train_info/memory_reserved": 18442.0, "train_info/memory_max_reserved": 18442.0, "_timestamp": 1740922541, "_runtime": 47}, "step": 1} +{"logs": {"train/loss": 10.1063, "train/learning_rate": 4.9999999999999996e-06, "train/epoch": 0.0, "_timestamp": 1740922541, "_runtime": 47}, "step": 1} +{"train_info/time_between_train_steps": 0.14838433265686035, "step": 1} +{"info/global_step": 2, "train_info/time_within_train_step": 24.922191381454468, "step": 2} +{"train_info/time_between_train_steps": 0.0053822994232177734, "step": 2} +{"info/global_step": 3, "train_info/time_within_train_step": 24.748976230621338, "step": 3} +{"train_info/time_between_train_steps": 0.005502223968505859, "step": 3} +{"info/global_step": 4, "train_info/time_within_train_step": 24.87479257583618, "step": 4} +{"train_info/time_between_train_steps": 0.0053691864013671875, "step": 4} +{"info/global_step": 5, "train_info/time_within_train_step": 24.75726294517517, "step": 5} +{"train_info/time_between_train_steps": 0.005273342132568359, "step": 5} +{"info/global_step": 6, "train_info/time_within_train_step": 24.900510549545288, "step": 6} +{"train_info/time_between_train_steps": 0.005503654479980469, "step": 6} +{"info/global_step": 7, "train_info/time_within_train_step": 24.803297519683838, "step": 7} +{"train_info/time_between_train_steps": 0.005320310592651367, "step": 7} +{"info/global_step": 8, "train_info/time_within_train_step": 24.908316135406494, "step": 8} +{"train_info/time_between_train_steps": 0.005373477935791016, "step": 8} +{"info/global_step": 9, "train_info/time_within_train_step": 24.731450080871582, "step": 9} +{"train_info/time_between_train_steps": 0.005110740661621094, "step": 9} +{"info/global_step": 10, "train_info/time_within_train_step": 24.94478964805603, "step": 10} +{"train_info/time_between_train_steps": 0.005301237106323242, "step": 10} +{"info/global_step": 11, "train_info/time_within_train_step": 24.739585876464844, "step": 11} +{"train_info/time_between_train_steps": 0.0058329105377197266, "step": 11} +{"info/global_step": 12, "train_info/time_within_train_step": 24.934651136398315, "step": 12} +{"train_info/time_between_train_steps": 0.005358457565307617, "step": 12} +{"info/global_step": 13, "train_info/time_within_train_step": 24.7337908744812, "step": 13} +{"train_info/time_between_train_steps": 0.005526065826416016, "step": 13} +{"info/global_step": 14, "train_info/time_within_train_step": 24.863269805908203, "step": 14} +{"train_info/time_between_train_steps": 0.005430698394775391, "step": 14} +{"info/global_step": 15, "train_info/time_within_train_step": 24.742252349853516, "step": 15} +{"train_info/time_between_train_steps": 0.005288124084472656, "step": 15} +{"info/global_step": 16, "train_info/time_within_train_step": 24.952394485473633, "step": 16} +{"train_info/time_between_train_steps": 0.01464080810546875, "step": 16} +{"info/global_step": 17, "train_info/time_within_train_step": 24.867182970046997, "step": 17} +{"train_info/time_between_train_steps": 0.04088306427001953, "step": 17} +{"info/global_step": 18, "train_info/time_within_train_step": 24.896894693374634, "step": 18} +{"train_info/time_between_train_steps": 0.008951663970947266, "step": 18} +{"info/global_step": 19, "train_info/time_within_train_step": 24.775160551071167, "step": 19} +{"train_info/time_between_train_steps": 0.009603261947631836, "step": 19} +{"info/global_step": 20, "train_info/time_within_train_step": 24.72873854637146, "step": 20} +{"train_info/time_between_train_steps": 0.005076885223388672, "step": 20} +{"info/global_step": 21, "train_info/time_within_train_step": 24.712044954299927, "step": 21} +{"train_info/time_between_train_steps": 0.005034685134887695, "step": 21} +{"info/global_step": 22, "train_info/time_within_train_step": 24.78763747215271, "step": 22} +{"train_info/time_between_train_steps": 0.0051043033599853516, "step": 22} +{"info/global_step": 23, "train_info/time_within_train_step": 24.717707872390747, "step": 23} +{"train_info/time_between_train_steps": 0.005099773406982422, "step": 23} +{"info/global_step": 24, "train_info/time_within_train_step": 24.716447114944458, "step": 24} +{"train_info/time_between_train_steps": 0.005135774612426758, "step": 24} +{"info/global_step": 25, "train_info/time_within_train_step": 24.722175121307373, "step": 25} +{"train_info/time_between_train_steps": 0.005285501480102539, "step": 25} +{"info/global_step": 26, "train_info/time_within_train_step": 24.71403431892395, "step": 26} +{"train_info/time_between_train_steps": 0.005109071731567383, "step": 26} +{"info/global_step": 27, "train_info/time_within_train_step": 24.768866539001465, "step": 27} +{"train_info/time_between_train_steps": 0.005265235900878906, "step": 27} +{"info/global_step": 28, "train_info/time_within_train_step": 24.717039108276367, "step": 28} +{"train_info/time_between_train_steps": 0.005492687225341797, "step": 28} +{"info/global_step": 29, "train_info/time_within_train_step": 24.741255283355713, "step": 29} +{"train_info/time_between_train_steps": 0.00507664680480957, "step": 29} +{"info/global_step": 30, "train_info/time_within_train_step": 24.7221896648407, "step": 30} +{"train_info/time_between_train_steps": 0.0052623748779296875, "step": 30} +{"info/global_step": 31, "train_info/time_within_train_step": 24.726938009262085, "step": 31} +{"train_info/time_between_train_steps": 0.0052318572998046875, "step": 31} +{"info/global_step": 32, "train_info/time_within_train_step": 24.815430879592896, "step": 32} +{"train_info/time_between_train_steps": 0.0051326751708984375, "step": 32} +{"info/global_step": 33, "train_info/time_within_train_step": 24.730138063430786, "step": 33} +{"train_info/time_between_train_steps": 0.005385875701904297, "step": 33} +{"info/global_step": 34, "train_info/time_within_train_step": 24.728075742721558, "step": 34} +{"train_info/time_between_train_steps": 0.005331993103027344, "step": 34} +{"info/global_step": 35, "train_info/time_within_train_step": 24.73164415359497, "step": 35} +{"train_info/time_between_train_steps": 0.0055561065673828125, "step": 35} +{"info/global_step": 36, "train_info/time_within_train_step": 24.758257389068604, "step": 36} +{"train_info/time_between_train_steps": 0.0057947635650634766, "step": 36} +{"train_info/time_between_train_steps": 16.68425941467285, "step": 36} +{"info/global_step": 37, "train_info/time_within_train_step": 24.71497106552124, "step": 37} +{"train_info/time_between_train_steps": 0.0050427913665771484, "step": 37} +{"info/global_step": 38, "train_info/time_within_train_step": 24.95904517173767, "step": 38} +{"train_info/time_between_train_steps": 0.005480051040649414, "step": 38} +{"info/global_step": 39, "train_info/time_within_train_step": 24.729001760482788, "step": 39} +{"train_info/time_between_train_steps": 0.005347013473510742, "step": 39} +{"info/global_step": 40, "train_info/time_within_train_step": 24.865204572677612, "step": 40} +{"train_info/time_between_train_steps": 0.005239248275756836, "step": 40} +{"info/global_step": 41, "train_info/time_within_train_step": 24.73995041847229, "step": 41} +{"train_info/time_between_train_steps": 0.005621910095214844, "step": 41} +{"info/global_step": 42, "train_info/time_within_train_step": 24.867701292037964, "step": 42} +{"train_info/time_between_train_steps": 0.00531768798828125, "step": 42} +{"info/global_step": 43, "train_info/time_within_train_step": 24.787391185760498, "step": 43} +{"train_info/time_between_train_steps": 0.005418539047241211, "step": 43} +{"info/global_step": 44, "train_info/time_within_train_step": 24.899635553359985, "step": 44} +{"train_info/time_between_train_steps": 0.005371809005737305, "step": 44} +{"info/global_step": 45, "train_info/time_within_train_step": 24.726889848709106, "step": 45} +{"train_info/time_between_train_steps": 0.00513911247253418, "step": 45} +{"info/global_step": 46, "train_info/time_within_train_step": 24.85265874862671, "step": 46} +{"train_info/time_between_train_steps": 0.005376338958740234, "step": 46} +{"info/global_step": 47, "train_info/time_within_train_step": 24.83633327484131, "step": 47} +{"train_info/time_between_train_steps": 0.005615711212158203, "step": 47} +{"info/global_step": 48, "train_info/time_within_train_step": 24.881088256835938, "step": 48} +{"train_info/time_between_train_steps": 0.0052754878997802734, "step": 48} +{"info/global_step": 49, "train_info/time_within_train_step": 24.728785753250122, "step": 49} +{"train_info/time_between_train_steps": 0.005530595779418945, "step": 49} +{"info/global_step": 50, "train_info/time_within_train_step": 24.91842031478882, "step": 50} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740923774, "_runtime": 1280}, "step": 50} +{"logs": {"train/loss": 6.9358, "train/learning_rate": 0.00025, "train/epoch": 1.01, "_timestamp": 1740923774, "_runtime": 1280}, "step": 50} +{"train_info/time_between_train_steps": 0.02666616439819336, "step": 50} +{"info/global_step": 51, "train_info/time_within_train_step": 24.72990083694458, "step": 51} +{"train_info/time_between_train_steps": 0.005118846893310547, "step": 51} +{"info/global_step": 52, "train_info/time_within_train_step": 24.856524229049683, "step": 52} +{"train_info/time_between_train_steps": 0.005411624908447266, "step": 52} +{"info/global_step": 53, "train_info/time_within_train_step": 24.752891778945923, "step": 53} +{"train_info/time_between_train_steps": 0.021193742752075195, "step": 53} +{"info/global_step": 54, "train_info/time_within_train_step": 24.71568989753723, "step": 54} +{"train_info/time_between_train_steps": 0.005002260208129883, "step": 54} +{"info/global_step": 55, "train_info/time_within_train_step": 24.720648765563965, "step": 55} +{"train_info/time_between_train_steps": 0.0052530765533447266, "step": 55} +{"info/global_step": 56, "train_info/time_within_train_step": 24.71456813812256, "step": 56} +{"train_info/time_between_train_steps": 0.005031108856201172, "step": 56} +{"info/global_step": 57, "train_info/time_within_train_step": 24.749221324920654, "step": 57} +{"train_info/time_between_train_steps": 0.005133152008056641, "step": 57} +{"info/global_step": 58, "train_info/time_within_train_step": 24.73649024963379, "step": 58} +{"train_info/time_between_train_steps": 0.0051822662353515625, "step": 58} +{"info/global_step": 59, "train_info/time_within_train_step": 24.719069242477417, "step": 59} +{"train_info/time_between_train_steps": 0.005102634429931641, "step": 59} +{"info/global_step": 60, "train_info/time_within_train_step": 24.806750297546387, "step": 60} +{"train_info/time_between_train_steps": 0.009707927703857422, "step": 60} +{"info/global_step": 61, "train_info/time_within_train_step": 24.76085066795349, "step": 61} +{"train_info/time_between_train_steps": 0.00537872314453125, "step": 61} +{"info/global_step": 62, "train_info/time_within_train_step": 24.88860559463501, "step": 62} +{"train_info/time_between_train_steps": 0.005230903625488281, "step": 62} +{"info/global_step": 63, "train_info/time_within_train_step": 24.73747205734253, "step": 63} +{"train_info/time_between_train_steps": 0.0052754878997802734, "step": 63} +{"info/global_step": 64, "train_info/time_within_train_step": 24.728018045425415, "step": 64} +{"train_info/time_between_train_steps": 0.005156755447387695, "step": 64} +{"info/global_step": 65, "train_info/time_within_train_step": 24.7953782081604, "step": 65} +{"train_info/time_between_train_steps": 0.005142688751220703, "step": 65} +{"info/global_step": 66, "train_info/time_within_train_step": 24.72670817375183, "step": 66} +{"train_info/time_between_train_steps": 0.005503654479980469, "step": 66} +{"info/global_step": 67, "train_info/time_within_train_step": 24.806827545166016, "step": 67} +{"train_info/time_between_train_steps": 0.00514984130859375, "step": 67} +{"info/global_step": 68, "train_info/time_within_train_step": 24.725524425506592, "step": 68} +{"train_info/time_between_train_steps": 0.005229949951171875, "step": 68} +{"info/global_step": 69, "train_info/time_within_train_step": 24.801425457000732, "step": 69} +{"train_info/time_between_train_steps": 0.005231618881225586, "step": 69} +{"info/global_step": 70, "train_info/time_within_train_step": 24.737253189086914, "step": 70} +{"train_info/time_between_train_steps": 0.00548243522644043, "step": 70} +{"info/global_step": 71, "train_info/time_within_train_step": 24.739895343780518, "step": 71} +{"train_info/time_between_train_steps": 0.005925655364990234, "step": 71} +{"info/global_step": 72, "train_info/time_within_train_step": 24.851027250289917, "step": 72} +{"train_info/time_between_train_steps": 0.005682468414306641, "step": 72} +{"train_info/time_between_train_steps": 17.088045597076416, "step": 72} +{"info/global_step": 73, "train_info/time_within_train_step": 24.722378492355347, "step": 73} +{"train_info/time_between_train_steps": 0.0052754878997802734, "step": 73} +{"info/global_step": 74, "train_info/time_within_train_step": 24.946467638015747, "step": 74} +{"train_info/time_between_train_steps": 0.0052013397216796875, "step": 74} +{"info/global_step": 75, "train_info/time_within_train_step": 24.72444248199463, "step": 75} +{"train_info/time_between_train_steps": 0.0052340030670166016, "step": 75} +{"info/global_step": 76, "train_info/time_within_train_step": 24.842073678970337, "step": 76} +{"train_info/time_between_train_steps": 0.005218505859375, "step": 76} +{"info/global_step": 77, "train_info/time_within_train_step": 24.73630142211914, "step": 77} +{"train_info/time_between_train_steps": 0.005346536636352539, "step": 77} +{"info/global_step": 78, "train_info/time_within_train_step": 24.93117904663086, "step": 78} +{"train_info/time_between_train_steps": 0.005198478698730469, "step": 78} +{"info/global_step": 79, "train_info/time_within_train_step": 24.726624250411987, "step": 79} +{"train_info/time_between_train_steps": 0.0053179264068603516, "step": 79} +{"info/global_step": 80, "train_info/time_within_train_step": 24.895269632339478, "step": 80} +{"train_info/time_between_train_steps": 0.0052869319915771484, "step": 80} +{"info/global_step": 81, "train_info/time_within_train_step": 24.79330825805664, "step": 81} +{"train_info/time_between_train_steps": 0.005362510681152344, "step": 81} +{"info/global_step": 82, "train_info/time_within_train_step": 24.873669862747192, "step": 82} +{"train_info/time_between_train_steps": 0.005334615707397461, "step": 82} +{"info/global_step": 83, "train_info/time_within_train_step": 24.798791885375977, "step": 83} +{"train_info/time_between_train_steps": 0.005517482757568359, "step": 83} +{"info/global_step": 84, "train_info/time_within_train_step": 24.85710859298706, "step": 84} +{"train_info/time_between_train_steps": 0.010546684265136719, "step": 84} +{"info/global_step": 85, "train_info/time_within_train_step": 24.73718523979187, "step": 85} +{"train_info/time_between_train_steps": 0.005586385726928711, "step": 85} +{"info/global_step": 86, "train_info/time_within_train_step": 24.937032222747803, "step": 86} +{"train_info/time_between_train_steps": 0.005287647247314453, "step": 86} +{"info/global_step": 87, "train_info/time_within_train_step": 24.731735944747925, "step": 87} +{"train_info/time_between_train_steps": 0.005155086517333984, "step": 87} +{"info/global_step": 88, "train_info/time_within_train_step": 24.880157947540283, "step": 88} +{"train_info/time_between_train_steps": 0.005450248718261719, "step": 88} +{"info/global_step": 89, "train_info/time_within_train_step": 24.742772817611694, "step": 89} +{"train_info/time_between_train_steps": 0.022106409072875977, "step": 89} +{"info/global_step": 90, "train_info/time_within_train_step": 24.744190454483032, "step": 90} +{"train_info/time_between_train_steps": 0.004901409149169922, "step": 90} +{"info/global_step": 91, "train_info/time_within_train_step": 24.768115520477295, "step": 91} +{"train_info/time_between_train_steps": 0.005202054977416992, "step": 91} +{"info/global_step": 92, "train_info/time_within_train_step": 24.719834566116333, "step": 92} +{"train_info/time_between_train_steps": 0.005280256271362305, "step": 92} +{"info/global_step": 93, "train_info/time_within_train_step": 24.84382152557373, "step": 93} +{"train_info/time_between_train_steps": 0.009911775588989258, "step": 93} +{"info/global_step": 94, "train_info/time_within_train_step": 24.733083963394165, "step": 94} +{"train_info/time_between_train_steps": 0.005126237869262695, "step": 94} +{"info/global_step": 95, "train_info/time_within_train_step": 24.74656844139099, "step": 95} +{"train_info/time_between_train_steps": 0.005048513412475586, "step": 95} +{"info/global_step": 96, "train_info/time_within_train_step": 24.723212957382202, "step": 96} +{"train_info/time_between_train_steps": 0.005069732666015625, "step": 96} +{"info/global_step": 97, "train_info/time_within_train_step": 24.720601558685303, "step": 97} +{"train_info/time_between_train_steps": 0.005232095718383789, "step": 97} +{"info/global_step": 98, "train_info/time_within_train_step": 24.72449564933777, "step": 98} +{"train_info/time_between_train_steps": 0.005012989044189453, "step": 98} +{"info/global_step": 99, "train_info/time_within_train_step": 24.73159694671631, "step": 99} +{"train_info/time_between_train_steps": 0.0052950382232666016, "step": 99} +{"info/global_step": 100, "train_info/time_within_train_step": 24.78039264678955, "step": 100} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740925032, "_runtime": 2538}, "step": 100} +{"logs": {"train/loss": 4.9198, "train/learning_rate": 0.0005, "train/epoch": 2.02, "_timestamp": 1740925032, "_runtime": 2538}, "step": 100} +{"train_info/time_between_train_steps": 127.45765376091003, "step": 100} +{"info/global_step": 101, "train_info/time_within_train_step": 24.71717071533203, "step": 101} +{"train_info/time_between_train_steps": 0.005146026611328125, "step": 101} +{"info/global_step": 102, "train_info/time_within_train_step": 24.714482069015503, "step": 102} +{"train_info/time_between_train_steps": 0.0052869319915771484, "step": 102} +{"info/global_step": 103, "train_info/time_within_train_step": 24.72704839706421, "step": 103} +{"train_info/time_between_train_steps": 0.0050241947174072266, "step": 103} +{"info/global_step": 104, "train_info/time_within_train_step": 24.725099563598633, "step": 104} +{"train_info/time_between_train_steps": 0.005028963088989258, "step": 104} +{"info/global_step": 105, "train_info/time_within_train_step": 24.808534622192383, "step": 105} +{"train_info/time_between_train_steps": 0.00564885139465332, "step": 105} +{"info/global_step": 106, "train_info/time_within_train_step": 24.72686219215393, "step": 106} +{"train_info/time_between_train_steps": 0.00530552864074707, "step": 106} +{"info/global_step": 107, "train_info/time_within_train_step": 24.74824047088623, "step": 107} +{"train_info/time_between_train_steps": 0.005543231964111328, "step": 107} +{"info/global_step": 108, "train_info/time_within_train_step": 24.819010734558105, "step": 108} +{"train_info/time_between_train_steps": 0.00589299201965332, "step": 108} +{"train_info/time_between_train_steps": 16.71949577331543, "step": 108} +{"info/global_step": 109, "train_info/time_within_train_step": 24.818339586257935, "step": 109} +{"train_info/time_between_train_steps": 0.0050029754638671875, "step": 109} +{"info/global_step": 110, "train_info/time_within_train_step": 24.876384019851685, "step": 110} +{"train_info/time_between_train_steps": 0.005208492279052734, "step": 110} +{"info/global_step": 111, "train_info/time_within_train_step": 24.744961261749268, "step": 111} +{"train_info/time_between_train_steps": 0.005276203155517578, "step": 111} +{"info/global_step": 112, "train_info/time_within_train_step": 24.916687488555908, "step": 112} +{"train_info/time_between_train_steps": 0.005262851715087891, "step": 112} +{"info/global_step": 113, "train_info/time_within_train_step": 24.7281813621521, "step": 113} +{"train_info/time_between_train_steps": 0.005469322204589844, "step": 113} +{"info/global_step": 114, "train_info/time_within_train_step": 24.979636907577515, "step": 114} +{"train_info/time_between_train_steps": 0.005421638488769531, "step": 114} +{"info/global_step": 115, "train_info/time_within_train_step": 24.72739815711975, "step": 115} +{"train_info/time_between_train_steps": 0.0051805973052978516, "step": 115} +{"info/global_step": 116, "train_info/time_within_train_step": 24.89070200920105, "step": 116} +{"train_info/time_between_train_steps": 0.005222797393798828, "step": 116} +{"info/global_step": 117, "train_info/time_within_train_step": 24.814133882522583, "step": 117} +{"train_info/time_between_train_steps": 0.0053005218505859375, "step": 117} +{"info/global_step": 118, "train_info/time_within_train_step": 24.855940341949463, "step": 118} +{"train_info/time_between_train_steps": 0.005070686340332031, "step": 118} +{"info/global_step": 119, "train_info/time_within_train_step": 24.742557764053345, "step": 119} +{"train_info/time_between_train_steps": 0.005759716033935547, "step": 119} +{"info/global_step": 120, "train_info/time_within_train_step": 24.894208431243896, "step": 120} +{"train_info/time_between_train_steps": 0.0052950382232666016, "step": 120} +{"info/global_step": 121, "train_info/time_within_train_step": 24.769359588623047, "step": 121} +{"train_info/time_between_train_steps": 0.005427360534667969, "step": 121} +{"info/global_step": 122, "train_info/time_within_train_step": 24.959946155548096, "step": 122} +{"train_info/time_between_train_steps": 0.005464076995849609, "step": 122} +{"info/global_step": 123, "train_info/time_within_train_step": 24.74343252182007, "step": 123} +{"train_info/time_between_train_steps": 0.005144834518432617, "step": 123} +{"info/global_step": 124, "train_info/time_within_train_step": 24.910682439804077, "step": 124} +{"train_info/time_between_train_steps": 0.00529170036315918, "step": 124} +{"info/global_step": 125, "train_info/time_within_train_step": 24.74248957633972, "step": 125} +{"train_info/time_between_train_steps": 0.03538680076599121, "step": 125} +{"info/global_step": 126, "train_info/time_within_train_step": 24.77701497077942, "step": 126} +{"train_info/time_between_train_steps": 0.0049669742584228516, "step": 126} +{"info/global_step": 127, "train_info/time_within_train_step": 24.712509632110596, "step": 127} +{"train_info/time_between_train_steps": 0.004987239837646484, "step": 127} +{"info/global_step": 128, "train_info/time_within_train_step": 24.719475269317627, "step": 128} +{"train_info/time_between_train_steps": 0.005079507827758789, "step": 128} +{"info/global_step": 129, "train_info/time_within_train_step": 24.774999380111694, "step": 129} +{"train_info/time_between_train_steps": 0.005080223083496094, "step": 129} +{"info/global_step": 130, "train_info/time_within_train_step": 24.71399474143982, "step": 130} +{"train_info/time_between_train_steps": 0.004999637603759766, "step": 130} +{"info/global_step": 131, "train_info/time_within_train_step": 24.726723194122314, "step": 131} +{"train_info/time_between_train_steps": 0.0051686763763427734, "step": 131} +{"info/global_step": 132, "train_info/time_within_train_step": 24.714315176010132, "step": 132} +{"train_info/time_between_train_steps": 0.009937286376953125, "step": 132} +{"info/global_step": 133, "train_info/time_within_train_step": 24.718059539794922, "step": 133} +{"train_info/time_between_train_steps": 0.00522303581237793, "step": 133} +{"info/global_step": 134, "train_info/time_within_train_step": 24.71904754638672, "step": 134} +{"train_info/time_between_train_steps": 0.0051326751708984375, "step": 134} +{"info/global_step": 135, "train_info/time_within_train_step": 24.715622901916504, "step": 135} +{"train_info/time_between_train_steps": 0.014545679092407227, "step": 135} +{"info/global_step": 136, "train_info/time_within_train_step": 24.991665840148926, "step": 136} +{"train_info/time_between_train_steps": 0.005224943161010742, "step": 136} +{"info/global_step": 137, "train_info/time_within_train_step": 24.718636989593506, "step": 137} +{"train_info/time_between_train_steps": 0.005027055740356445, "step": 137} +{"info/global_step": 138, "train_info/time_within_train_step": 24.71435809135437, "step": 138} +{"train_info/time_between_train_steps": 0.005089282989501953, "step": 138} +{"info/global_step": 139, "train_info/time_within_train_step": 24.811058044433594, "step": 139} +{"train_info/time_between_train_steps": 0.005275249481201172, "step": 139} +{"info/global_step": 140, "train_info/time_within_train_step": 24.71902847290039, "step": 140} +{"train_info/time_between_train_steps": 0.005043983459472656, "step": 140} +{"info/global_step": 141, "train_info/time_within_train_step": 24.716971158981323, "step": 141} +{"train_info/time_between_train_steps": 0.0051403045654296875, "step": 141} +{"info/global_step": 142, "train_info/time_within_train_step": 24.731484413146973, "step": 142} +{"train_info/time_between_train_steps": 0.005352497100830078, "step": 142} +{"info/global_step": 143, "train_info/time_within_train_step": 24.815258979797363, "step": 143} +{"train_info/time_between_train_steps": 0.005383014678955078, "step": 143} +{"info/global_step": 144, "train_info/time_within_train_step": 24.752180814743042, "step": 144} +{"train_info/time_between_train_steps": 0.005667448043823242, "step": 144} +{"train_info/time_between_train_steps": 16.78961753845215, "step": 144} +{"info/global_step": 145, "train_info/time_within_train_step": 24.729586124420166, "step": 145} +{"train_info/time_between_train_steps": 0.0047931671142578125, "step": 145} +{"info/global_step": 146, "train_info/time_within_train_step": 24.81519389152527, "step": 146} +{"train_info/time_between_train_steps": 0.004917621612548828, "step": 146} +{"info/global_step": 147, "train_info/time_within_train_step": 24.747002124786377, "step": 147} +{"train_info/time_between_train_steps": 0.005071401596069336, "step": 147} +{"info/global_step": 148, "train_info/time_within_train_step": 24.850207567214966, "step": 148} +{"train_info/time_between_train_steps": 0.0052874088287353516, "step": 148} +{"info/global_step": 149, "train_info/time_within_train_step": 24.73457169532776, "step": 149} +{"train_info/time_between_train_steps": 0.005288839340209961, "step": 149} +{"info/global_step": 150, "train_info/time_within_train_step": 24.939294815063477, "step": 150} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740926434, "_runtime": 3940}, "step": 150} +{"logs": {"train/loss": 4.3577, "train/learning_rate": 0.0005833333333333333, "train/epoch": 4.0, "_timestamp": 1740926434, "_runtime": 3940}, "step": 150} +{"train_info/time_between_train_steps": 0.026453018188476562, "step": 150} +{"info/global_step": 151, "train_info/time_within_train_step": 24.736572980880737, "step": 151} +{"train_info/time_between_train_steps": 0.010372400283813477, "step": 151} +{"info/global_step": 152, "train_info/time_within_train_step": 24.847819566726685, "step": 152} +{"train_info/time_between_train_steps": 0.010164499282836914, "step": 152} +{"info/global_step": 153, "train_info/time_within_train_step": 24.73583459854126, "step": 153} +{"train_info/time_between_train_steps": 0.01060795783996582, "step": 153} +{"info/global_step": 154, "train_info/time_within_train_step": 24.870026111602783, "step": 154} +{"train_info/time_between_train_steps": 0.010264396667480469, "step": 154} +{"info/global_step": 155, "train_info/time_within_train_step": 24.892439126968384, "step": 155} +{"train_info/time_between_train_steps": 0.010507822036743164, "step": 155} +{"info/global_step": 156, "train_info/time_within_train_step": 25.066341161727905, "step": 156} +{"train_info/time_between_train_steps": 0.0053670406341552734, "step": 156} +{"info/global_step": 157, "train_info/time_within_train_step": 24.729142904281616, "step": 157} +{"train_info/time_between_train_steps": 0.0052509307861328125, "step": 157} +{"info/global_step": 158, "train_info/time_within_train_step": 24.845447063446045, "step": 158} +{"train_info/time_between_train_steps": 0.005110979080200195, "step": 158} +{"info/global_step": 159, "train_info/time_within_train_step": 24.752171754837036, "step": 159} +{"train_info/time_between_train_steps": 0.005385637283325195, "step": 159} +{"info/global_step": 160, "train_info/time_within_train_step": 24.900035619735718, "step": 160} +{"train_info/time_between_train_steps": 0.005338430404663086, "step": 160} +{"info/global_step": 161, "train_info/time_within_train_step": 24.739219188690186, "step": 161} +{"train_info/time_between_train_steps": 0.03619837760925293, "step": 161} +{"info/global_step": 162, "train_info/time_within_train_step": 24.78922748565674, "step": 162} +{"train_info/time_between_train_steps": 0.0051212310791015625, "step": 162} +{"info/global_step": 163, "train_info/time_within_train_step": 24.71254801750183, "step": 163} +{"train_info/time_between_train_steps": 0.004976749420166016, "step": 163} +{"info/global_step": 164, "train_info/time_within_train_step": 24.71457266807556, "step": 164} +{"train_info/time_between_train_steps": 0.004972934722900391, "step": 164} +{"info/global_step": 165, "train_info/time_within_train_step": 24.721399784088135, "step": 165} +{"train_info/time_between_train_steps": 0.00514531135559082, "step": 165} +{"info/global_step": 166, "train_info/time_within_train_step": 24.718395709991455, "step": 166} +{"train_info/time_between_train_steps": 0.005020618438720703, "step": 166} +{"info/global_step": 167, "train_info/time_within_train_step": 24.71610689163208, "step": 167} +{"train_info/time_between_train_steps": 0.005145549774169922, "step": 167} +{"info/global_step": 168, "train_info/time_within_train_step": 24.724245071411133, "step": 168} +{"train_info/time_between_train_steps": 0.005110502243041992, "step": 168} +{"info/global_step": 169, "train_info/time_within_train_step": 24.799229621887207, "step": 169} +{"train_info/time_between_train_steps": 0.0050928592681884766, "step": 169} +{"info/global_step": 170, "train_info/time_within_train_step": 24.819390058517456, "step": 170} +{"train_info/time_between_train_steps": 0.005151271820068359, "step": 170} +{"info/global_step": 171, "train_info/time_within_train_step": 24.733110427856445, "step": 171} +{"train_info/time_between_train_steps": 0.005035400390625, "step": 171} +{"info/global_step": 172, "train_info/time_within_train_step": 24.77125573158264, "step": 172} +{"train_info/time_between_train_steps": 0.005001544952392578, "step": 172} +{"info/global_step": 173, "train_info/time_within_train_step": 24.7247211933136, "step": 173} +{"train_info/time_between_train_steps": 0.00513911247253418, "step": 173} +{"info/global_step": 174, "train_info/time_within_train_step": 24.80775547027588, "step": 174} +{"train_info/time_between_train_steps": 0.005038738250732422, "step": 174} +{"info/global_step": 175, "train_info/time_within_train_step": 24.719724655151367, "step": 175} +{"train_info/time_between_train_steps": 0.0051233768463134766, "step": 175} +{"info/global_step": 176, "train_info/time_within_train_step": 24.744898557662964, "step": 176} +{"train_info/time_between_train_steps": 0.0053861141204833984, "step": 176} +{"info/global_step": 177, "train_info/time_within_train_step": 24.768174171447754, "step": 177} +{"train_info/time_between_train_steps": 0.005170106887817383, "step": 177} +{"info/global_step": 178, "train_info/time_within_train_step": 24.735227823257446, "step": 178} +{"train_info/time_between_train_steps": 0.005691051483154297, "step": 178} +{"info/global_step": 179, "train_info/time_within_train_step": 24.827893018722534, "step": 179} +{"train_info/time_between_train_steps": 0.005894184112548828, "step": 179} +{"info/global_step": 180, "train_info/time_within_train_step": 24.760639667510986, "step": 180} +{"train_info/time_between_train_steps": 0.005735635757446289, "step": 180} +{"train_info/time_between_train_steps": 16.959373474121094, "step": 180} +{"info/global_step": 181, "train_info/time_within_train_step": 24.802241563796997, "step": 181} +{"train_info/time_between_train_steps": 0.004847049713134766, "step": 181} +{"info/global_step": 182, "train_info/time_within_train_step": 24.812416315078735, "step": 182} +{"train_info/time_between_train_steps": 0.004914045333862305, "step": 182} +{"info/global_step": 183, "train_info/time_within_train_step": 24.790790796279907, "step": 183} +{"train_info/time_between_train_steps": 0.005154132843017578, "step": 183} +{"info/global_step": 184, "train_info/time_within_train_step": 24.864832878112793, "step": 184} +{"train_info/time_between_train_steps": 0.005325794219970703, "step": 184} +{"info/global_step": 185, "train_info/time_within_train_step": 24.828245401382446, "step": 185} +{"train_info/time_between_train_steps": 0.00516819953918457, "step": 185} +{"info/global_step": 186, "train_info/time_within_train_step": 24.86606001853943, "step": 186} +{"train_info/time_between_train_steps": 0.0051174163818359375, "step": 186} +{"info/global_step": 187, "train_info/time_within_train_step": 24.734874963760376, "step": 187} +{"train_info/time_between_train_steps": 0.005418300628662109, "step": 187} +{"info/global_step": 188, "train_info/time_within_train_step": 24.878268241882324, "step": 188} +{"train_info/time_between_train_steps": 0.005109071731567383, "step": 188} +{"info/global_step": 189, "train_info/time_within_train_step": 24.72942018508911, "step": 189} +{"train_info/time_between_train_steps": 0.005220651626586914, "step": 189} +{"info/global_step": 190, "train_info/time_within_train_step": 24.91965627670288, "step": 190} +{"train_info/time_between_train_steps": 0.005257368087768555, "step": 190} +{"info/global_step": 191, "train_info/time_within_train_step": 24.730852127075195, "step": 191} +{"train_info/time_between_train_steps": 0.010323524475097656, "step": 191} +{"info/global_step": 192, "train_info/time_within_train_step": 24.87787914276123, "step": 192} +{"train_info/time_between_train_steps": 0.010576725006103516, "step": 192} +{"info/global_step": 193, "train_info/time_within_train_step": 24.82096290588379, "step": 193} +{"train_info/time_between_train_steps": 0.005602836608886719, "step": 193} +{"info/global_step": 194, "train_info/time_within_train_step": 25.053857564926147, "step": 194} +{"train_info/time_between_train_steps": 0.00508880615234375, "step": 194} +{"info/global_step": 195, "train_info/time_within_train_step": 24.73471474647522, "step": 195} +{"train_info/time_between_train_steps": 0.010390996932983398, "step": 195} +{"info/global_step": 196, "train_info/time_within_train_step": 24.854599237442017, "step": 196} +{"train_info/time_between_train_steps": 0.010468721389770508, "step": 196} +{"info/global_step": 197, "train_info/time_within_train_step": 24.746519327163696, "step": 197} +{"train_info/time_between_train_steps": 0.026280641555786133, "step": 197} +{"info/global_step": 198, "train_info/time_within_train_step": 24.77324414253235, "step": 198} +{"train_info/time_between_train_steps": 0.005097627639770508, "step": 198} +{"info/global_step": 199, "train_info/time_within_train_step": 24.73156428337097, "step": 199} +{"train_info/time_between_train_steps": 0.005080699920654297, "step": 199} +{"info/global_step": 200, "train_info/time_within_train_step": 24.790687084197998, "step": 200} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740927692, "_runtime": 5198}, "step": 200} +{"logs": {"train/loss": 4.0525, "train/learning_rate": 0.0005555555555555556, "train/epoch": 5.02, "_timestamp": 1740927692, "_runtime": 5198}, "step": 200} +{"train_info/time_between_train_steps": 102.34069657325745, "step": 200} +{"info/global_step": 201, "train_info/time_within_train_step": 27.1277174949646, "step": 201} +{"train_info/time_between_train_steps": 0.005107879638671875, "step": 201} +{"info/global_step": 202, "train_info/time_within_train_step": 24.72110629081726, "step": 202} +{"train_info/time_between_train_steps": 0.005034923553466797, "step": 202} +{"info/global_step": 203, "train_info/time_within_train_step": 24.73507523536682, "step": 203} +{"train_info/time_between_train_steps": 0.005190372467041016, "step": 203} +{"info/global_step": 204, "train_info/time_within_train_step": 24.726104021072388, "step": 204} +{"train_info/time_between_train_steps": 0.005499839782714844, "step": 204} +{"info/global_step": 205, "train_info/time_within_train_step": 24.723045110702515, "step": 205} +{"train_info/time_between_train_steps": 0.0050983428955078125, "step": 205} +{"info/global_step": 206, "train_info/time_within_train_step": 24.748970985412598, "step": 206} +{"train_info/time_between_train_steps": 0.005318641662597656, "step": 206} +{"info/global_step": 207, "train_info/time_within_train_step": 24.73081922531128, "step": 207} +{"train_info/time_between_train_steps": 0.005182743072509766, "step": 207} +{"info/global_step": 208, "train_info/time_within_train_step": 24.7996346950531, "step": 208} +{"train_info/time_between_train_steps": 0.0050618648529052734, "step": 208} +{"info/global_step": 209, "train_info/time_within_train_step": 24.72443175315857, "step": 209} +{"train_info/time_between_train_steps": 0.00520014762878418, "step": 209} +{"info/global_step": 210, "train_info/time_within_train_step": 24.731858491897583, "step": 210} +{"train_info/time_between_train_steps": 0.005059003829956055, "step": 210} +{"info/global_step": 211, "train_info/time_within_train_step": 24.797956943511963, "step": 211} +{"train_info/time_between_train_steps": 0.005200624465942383, "step": 211} +{"info/global_step": 212, "train_info/time_within_train_step": 24.736427068710327, "step": 212} +{"train_info/time_between_train_steps": 0.005197048187255859, "step": 212} +{"info/global_step": 213, "train_info/time_within_train_step": 24.817715644836426, "step": 213} +{"train_info/time_between_train_steps": 0.005075931549072266, "step": 213} +{"info/global_step": 214, "train_info/time_within_train_step": 24.742056369781494, "step": 214} +{"train_info/time_between_train_steps": 0.0055119991302490234, "step": 214} +{"info/global_step": 215, "train_info/time_within_train_step": 24.8316593170166, "step": 215} +{"train_info/time_between_train_steps": 0.005975246429443359, "step": 215} +{"info/global_step": 216, "train_info/time_within_train_step": 24.7652325630188, "step": 216} +{"train_info/time_between_train_steps": 0.0058841705322265625, "step": 216} +{"train_info/time_between_train_steps": 16.85298800468445, "step": 216} +{"info/global_step": 217, "train_info/time_within_train_step": 24.870301723480225, "step": 217} +{"train_info/time_between_train_steps": 0.005045652389526367, "step": 217} +{"info/global_step": 218, "train_info/time_within_train_step": 24.85322666168213, "step": 218} +{"train_info/time_between_train_steps": 0.005146026611328125, "step": 218} +{"info/global_step": 219, "train_info/time_within_train_step": 24.737618684768677, "step": 219} +{"train_info/time_between_train_steps": 0.00500941276550293, "step": 219} +{"info/global_step": 220, "train_info/time_within_train_step": 24.93130874633789, "step": 220} +{"train_info/time_between_train_steps": 0.005086183547973633, "step": 220} +{"info/global_step": 221, "train_info/time_within_train_step": 24.739675045013428, "step": 221} +{"train_info/time_between_train_steps": 0.005500316619873047, "step": 221} +{"info/global_step": 222, "train_info/time_within_train_step": 24.976125717163086, "step": 222} +{"train_info/time_between_train_steps": 0.005334377288818359, "step": 222} +{"info/global_step": 223, "train_info/time_within_train_step": 24.740834712982178, "step": 223} +{"train_info/time_between_train_steps": 0.005415916442871094, "step": 223} +{"info/global_step": 224, "train_info/time_within_train_step": 24.918046236038208, "step": 224} +{"train_info/time_between_train_steps": 0.005286455154418945, "step": 224} +{"info/global_step": 225, "train_info/time_within_train_step": 24.736266136169434, "step": 225} +{"train_info/time_between_train_steps": 0.005249500274658203, "step": 225} +{"info/global_step": 226, "train_info/time_within_train_step": 24.856426239013672, "step": 226} +{"train_info/time_between_train_steps": 0.005066871643066406, "step": 226} +{"info/global_step": 227, "train_info/time_within_train_step": 24.7877357006073, "step": 227} +{"train_info/time_between_train_steps": 0.005651712417602539, "step": 227} +{"info/global_step": 228, "train_info/time_within_train_step": 24.88340139389038, "step": 228} +{"train_info/time_between_train_steps": 0.005229473114013672, "step": 228} +{"info/global_step": 229, "train_info/time_within_train_step": 24.819744110107422, "step": 229} +{"train_info/time_between_train_steps": 0.005410671234130859, "step": 229} +{"info/global_step": 230, "train_info/time_within_train_step": 24.90399193763733, "step": 230} +{"train_info/time_between_train_steps": 0.005293846130371094, "step": 230} +{"info/global_step": 231, "train_info/time_within_train_step": 24.730159759521484, "step": 231} +{"train_info/time_between_train_steps": 0.005029439926147461, "step": 231} +{"info/global_step": 232, "train_info/time_within_train_step": 24.92303156852722, "step": 232} +{"train_info/time_between_train_steps": 0.005479097366333008, "step": 232} +{"info/global_step": 233, "train_info/time_within_train_step": 24.750574350357056, "step": 233} +{"train_info/time_between_train_steps": 0.0267181396484375, "step": 233} +{"info/global_step": 234, "train_info/time_within_train_step": 24.723927974700928, "step": 234} +{"train_info/time_between_train_steps": 0.004998207092285156, "step": 234} +{"info/global_step": 235, "train_info/time_within_train_step": 24.720896244049072, "step": 235} +{"train_info/time_between_train_steps": 0.005057334899902344, "step": 235} +{"info/global_step": 236, "train_info/time_within_train_step": 24.723381519317627, "step": 236} +{"train_info/time_between_train_steps": 0.005166769027709961, "step": 236} +{"info/global_step": 237, "train_info/time_within_train_step": 24.728676319122314, "step": 237} +{"train_info/time_between_train_steps": 0.005027055740356445, "step": 237} +{"info/global_step": 238, "train_info/time_within_train_step": 24.73635196685791, "step": 238} +{"train_info/time_between_train_steps": 0.005518913269042969, "step": 238} +{"info/global_step": 239, "train_info/time_within_train_step": 24.79780864715576, "step": 239} +{"train_info/time_between_train_steps": 0.0052509307861328125, "step": 239} +{"info/global_step": 240, "train_info/time_within_train_step": 24.72124981880188, "step": 240} +{"train_info/time_between_train_steps": 0.005132436752319336, "step": 240} +{"info/global_step": 241, "train_info/time_within_train_step": 24.80945372581482, "step": 241} +{"train_info/time_between_train_steps": 0.005223989486694336, "step": 241} +{"info/global_step": 242, "train_info/time_within_train_step": 24.722115516662598, "step": 242} +{"train_info/time_between_train_steps": 0.005063295364379883, "step": 242} +{"info/global_step": 243, "train_info/time_within_train_step": 24.730977296829224, "step": 243} +{"train_info/time_between_train_steps": 0.010015249252319336, "step": 243} +{"info/global_step": 244, "train_info/time_within_train_step": 24.807080507278442, "step": 244} +{"train_info/time_between_train_steps": 0.005169391632080078, "step": 244} +{"info/global_step": 245, "train_info/time_within_train_step": 24.733622312545776, "step": 245} +{"train_info/time_between_train_steps": 0.009973287582397461, "step": 245} +{"info/global_step": 246, "train_info/time_within_train_step": 24.800659656524658, "step": 246} +{"train_info/time_between_train_steps": 0.00532984733581543, "step": 246} +{"info/global_step": 247, "train_info/time_within_train_step": 24.82283115386963, "step": 247} +{"train_info/time_between_train_steps": 0.00552058219909668, "step": 247} +{"info/global_step": 248, "train_info/time_within_train_step": 24.757455587387085, "step": 248} +{"train_info/time_between_train_steps": 0.014440774917602539, "step": 248} +{"info/global_step": 249, "train_info/time_within_train_step": 25.168107509613037, "step": 249} +{"train_info/time_between_train_steps": 0.009363889694213867, "step": 249} +{"info/global_step": 250, "train_info/time_within_train_step": 25.133811235427856, "step": 250} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740929055, "_runtime": 6561}, "step": 250} +{"logs": {"train/loss": 3.9393, "train/learning_rate": 0.0005277777777777777, "train/epoch": 6.03, "_timestamp": 1740929055, "_runtime": 6561}, "step": 250} +{"train_info/time_between_train_steps": 0.038186073303222656, "step": 250} +{"info/global_step": 251, "train_info/time_within_train_step": 25.140042304992676, "step": 251} +{"train_info/time_between_train_steps": 0.010172605514526367, "step": 251} +{"info/global_step": 252, "train_info/time_within_train_step": 24.791760444641113, "step": 252} +{"train_info/time_between_train_steps": 0.006204366683959961, "step": 252} +{"train_info/time_between_train_steps": 16.728431940078735, "step": 252} +{"info/global_step": 253, "train_info/time_within_train_step": 24.88121509552002, "step": 253} +{"train_info/time_between_train_steps": 0.009430170059204102, "step": 253} +{"info/global_step": 254, "train_info/time_within_train_step": 24.845699787139893, "step": 254} +{"train_info/time_between_train_steps": 0.0049626827239990234, "step": 254} +{"info/global_step": 255, "train_info/time_within_train_step": 24.79240393638611, "step": 255} +{"train_info/time_between_train_steps": 0.0054018497467041016, "step": 255} +{"info/global_step": 256, "train_info/time_within_train_step": 24.897040367126465, "step": 256} +{"train_info/time_between_train_steps": 0.0053713321685791016, "step": 256} +{"info/global_step": 257, "train_info/time_within_train_step": 24.738258361816406, "step": 257} +{"train_info/time_between_train_steps": 0.005297183990478516, "step": 257} +{"info/global_step": 258, "train_info/time_within_train_step": 24.94675374031067, "step": 258} +{"train_info/time_between_train_steps": 0.005279541015625, "step": 258} +{"info/global_step": 259, "train_info/time_within_train_step": 24.82959008216858, "step": 259} +{"train_info/time_between_train_steps": 0.005240678787231445, "step": 259} +{"info/global_step": 260, "train_info/time_within_train_step": 24.91658329963684, "step": 260} +{"train_info/time_between_train_steps": 0.005113124847412109, "step": 260} +{"info/global_step": 261, "train_info/time_within_train_step": 24.793166875839233, "step": 261} +{"train_info/time_between_train_steps": 0.00547337532043457, "step": 261} +{"info/global_step": 262, "train_info/time_within_train_step": 25.01379895210266, "step": 262} +{"train_info/time_between_train_steps": 0.005219936370849609, "step": 262} +{"info/global_step": 263, "train_info/time_within_train_step": 24.76833963394165, "step": 263} +{"train_info/time_between_train_steps": 0.010402679443359375, "step": 263} +{"info/global_step": 264, "train_info/time_within_train_step": 24.90929889678955, "step": 264} +{"train_info/time_between_train_steps": 0.0053751468658447266, "step": 264} +{"info/global_step": 265, "train_info/time_within_train_step": 24.73444890975952, "step": 265} +{"train_info/time_between_train_steps": 0.0052814483642578125, "step": 265} +{"info/global_step": 266, "train_info/time_within_train_step": 24.84964394569397, "step": 266} +{"train_info/time_between_train_steps": 0.005189418792724609, "step": 266} +{"info/global_step": 267, "train_info/time_within_train_step": 24.7484610080719, "step": 267} +{"train_info/time_between_train_steps": 0.010169029235839844, "step": 267} +{"info/global_step": 268, "train_info/time_within_train_step": 24.84161138534546, "step": 268} +{"train_info/time_between_train_steps": 0.005262136459350586, "step": 268} +{"info/global_step": 269, "train_info/time_within_train_step": 24.7444167137146, "step": 269} +{"train_info/time_between_train_steps": 0.03598499298095703, "step": 269} +{"info/global_step": 270, "train_info/time_within_train_step": 24.739058256149292, "step": 270} +{"train_info/time_between_train_steps": 0.005121469497680664, "step": 270} +{"info/global_step": 271, "train_info/time_within_train_step": 24.73173761367798, "step": 271} +{"train_info/time_between_train_steps": 0.005349874496459961, "step": 271} +{"info/global_step": 272, "train_info/time_within_train_step": 24.753180265426636, "step": 272} +{"train_info/time_between_train_steps": 0.0050165653228759766, "step": 272} +{"info/global_step": 273, "train_info/time_within_train_step": 24.7237765789032, "step": 273} +{"train_info/time_between_train_steps": 0.0051767826080322266, "step": 273} +{"info/global_step": 274, "train_info/time_within_train_step": 24.72403907775879, "step": 274} +{"train_info/time_between_train_steps": 0.009924650192260742, "step": 274} +{"info/global_step": 275, "train_info/time_within_train_step": 24.785072803497314, "step": 275} +{"train_info/time_between_train_steps": 0.005252838134765625, "step": 275} +{"info/global_step": 276, "train_info/time_within_train_step": 24.723716497421265, "step": 276} +{"train_info/time_between_train_steps": 0.005056858062744141, "step": 276} +{"info/global_step": 277, "train_info/time_within_train_step": 24.735167026519775, "step": 277} +{"train_info/time_between_train_steps": 0.005158424377441406, "step": 277} +{"info/global_step": 278, "train_info/time_within_train_step": 24.82388687133789, "step": 278} +{"train_info/time_between_train_steps": 0.005160093307495117, "step": 278} +{"info/global_step": 279, "train_info/time_within_train_step": 24.726300954818726, "step": 279} +{"train_info/time_between_train_steps": 0.0050961971282958984, "step": 279} +{"info/global_step": 280, "train_info/time_within_train_step": 24.73499870300293, "step": 280} +{"train_info/time_between_train_steps": 0.005120992660522461, "step": 280} +{"info/global_step": 281, "train_info/time_within_train_step": 24.74143385887146, "step": 281} +{"train_info/time_between_train_steps": 0.005211353302001953, "step": 281} +{"info/global_step": 282, "train_info/time_within_train_step": 24.802804708480835, "step": 282} +{"train_info/time_between_train_steps": 0.0057027339935302734, "step": 282} +{"info/global_step": 283, "train_info/time_within_train_step": 24.746825695037842, "step": 283} +{"train_info/time_between_train_steps": 0.005269050598144531, "step": 283} +{"info/global_step": 284, "train_info/time_within_train_step": 24.80814242362976, "step": 284} +{"train_info/time_between_train_steps": 0.0052089691162109375, "step": 284} +{"info/global_step": 285, "train_info/time_within_train_step": 24.740100383758545, "step": 285} +{"train_info/time_between_train_steps": 0.005481243133544922, "step": 285} +{"info/global_step": 286, "train_info/time_within_train_step": 24.756144046783447, "step": 286} +{"train_info/time_between_train_steps": 0.005462169647216797, "step": 286} +{"info/global_step": 287, "train_info/time_within_train_step": 24.817054986953735, "step": 287} +{"train_info/time_between_train_steps": 0.005580902099609375, "step": 287} +{"info/global_step": 288, "train_info/time_within_train_step": 24.77066159248352, "step": 288} +{"train_info/time_between_train_steps": 0.005928754806518555, "step": 288} +{"train_info/time_between_train_steps": 16.896342754364014, "step": 288} +{"info/global_step": 289, "train_info/time_within_train_step": 24.735608100891113, "step": 289} +{"train_info/time_between_train_steps": 0.004861593246459961, "step": 289} +{"info/global_step": 290, "train_info/time_within_train_step": 24.825337171554565, "step": 290} +{"train_info/time_between_train_steps": 0.004916667938232422, "step": 290} +{"info/global_step": 291, "train_info/time_within_train_step": 24.777209997177124, "step": 291} +{"train_info/time_between_train_steps": 0.0052678585052490234, "step": 291} +{"info/global_step": 292, "train_info/time_within_train_step": 24.89603304862976, "step": 292} +{"train_info/time_between_train_steps": 0.005411863327026367, "step": 292} +{"info/global_step": 293, "train_info/time_within_train_step": 25.00989580154419, "step": 293} +{"train_info/time_between_train_steps": 0.005169868469238281, "step": 293} +{"info/global_step": 294, "train_info/time_within_train_step": 24.89289927482605, "step": 294} +{"train_info/time_between_train_steps": 0.005559444427490234, "step": 294} +{"info/global_step": 295, "train_info/time_within_train_step": 24.755390167236328, "step": 295} +{"train_info/time_between_train_steps": 0.005240917205810547, "step": 295} +{"info/global_step": 296, "train_info/time_within_train_step": 24.91875672340393, "step": 296} +{"train_info/time_between_train_steps": 0.005133152008056641, "step": 296} +{"info/global_step": 297, "train_info/time_within_train_step": 24.837831258773804, "step": 297} +{"train_info/time_between_train_steps": 0.005370140075683594, "step": 297} +{"info/global_step": 298, "train_info/time_within_train_step": 24.963534116744995, "step": 298} +{"train_info/time_between_train_steps": 0.0052204132080078125, "step": 298} +{"info/global_step": 299, "train_info/time_within_train_step": 24.727718591690063, "step": 299} +{"train_info/time_between_train_steps": 0.0052547454833984375, "step": 299} +{"info/global_step": 300, "train_info/time_within_train_step": 24.883044719696045, "step": 300} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740930331, "_runtime": 7837}, "step": 300} +{"logs": {"train/loss": 3.8341, "train/learning_rate": 0.0005, "train/epoch": 8.01, "_timestamp": 1740930331, "_runtime": 7837}, "step": 300} +{"train_info/time_between_train_steps": 56.46038818359375, "step": 300} +{"info/global_step": 301, "train_info/time_within_train_step": 26.147300720214844, "step": 301} +{"train_info/time_between_train_steps": 0.005731821060180664, "step": 301} +{"info/global_step": 302, "train_info/time_within_train_step": 24.97404193878174, "step": 302} +{"train_info/time_between_train_steps": 0.005220174789428711, "step": 302} +{"info/global_step": 303, "train_info/time_within_train_step": 24.811593770980835, "step": 303} +{"train_info/time_between_train_steps": 0.005462169647216797, "step": 303} +{"info/global_step": 304, "train_info/time_within_train_step": 24.835230112075806, "step": 304} +{"train_info/time_between_train_steps": 0.005326271057128906, "step": 304} +{"info/global_step": 305, "train_info/time_within_train_step": 24.781128406524658, "step": 305} +{"train_info/time_between_train_steps": 0.018871545791625977, "step": 305} +{"info/global_step": 306, "train_info/time_within_train_step": 24.722468614578247, "step": 306} +{"train_info/time_between_train_steps": 0.005204677581787109, "step": 306} +{"info/global_step": 307, "train_info/time_within_train_step": 24.737425088882446, "step": 307} +{"train_info/time_between_train_steps": 0.004956483840942383, "step": 307} +{"info/global_step": 308, "train_info/time_within_train_step": 24.79779076576233, "step": 308} +{"train_info/time_between_train_steps": 0.0050542354583740234, "step": 308} +{"info/global_step": 309, "train_info/time_within_train_step": 24.815036058425903, "step": 309} +{"train_info/time_between_train_steps": 0.005209684371948242, "step": 309} +{"info/global_step": 310, "train_info/time_within_train_step": 24.802499771118164, "step": 310} +{"train_info/time_between_train_steps": 0.010460138320922852, "step": 310} +{"info/global_step": 311, "train_info/time_within_train_step": 24.7206871509552, "step": 311} +{"train_info/time_between_train_steps": 0.010535717010498047, "step": 311} +{"info/global_step": 312, "train_info/time_within_train_step": 24.745713233947754, "step": 312} +{"train_info/time_between_train_steps": 0.005211353302001953, "step": 312} +{"info/global_step": 313, "train_info/time_within_train_step": 24.868510723114014, "step": 313} +{"train_info/time_between_train_steps": 0.005088329315185547, "step": 313} +{"info/global_step": 314, "train_info/time_within_train_step": 24.731578588485718, "step": 314} +{"train_info/time_between_train_steps": 0.005083322525024414, "step": 314} +{"info/global_step": 315, "train_info/time_within_train_step": 24.793038606643677, "step": 315} +{"train_info/time_between_train_steps": 0.005112171173095703, "step": 315} +{"info/global_step": 316, "train_info/time_within_train_step": 24.73309636116028, "step": 316} +{"train_info/time_between_train_steps": 0.005027055740356445, "step": 316} +{"info/global_step": 317, "train_info/time_within_train_step": 24.73591923713684, "step": 317} +{"train_info/time_between_train_steps": 0.005169868469238281, "step": 317} +{"info/global_step": 318, "train_info/time_within_train_step": 24.75739288330078, "step": 318} +{"train_info/time_between_train_steps": 0.005083560943603516, "step": 318} +{"info/global_step": 319, "train_info/time_within_train_step": 24.737125635147095, "step": 319} +{"train_info/time_between_train_steps": 0.005080461502075195, "step": 319} +{"info/global_step": 320, "train_info/time_within_train_step": 24.809813261032104, "step": 320} +{"train_info/time_between_train_steps": 0.0052874088287353516, "step": 320} +{"info/global_step": 321, "train_info/time_within_train_step": 24.724560260772705, "step": 321} +{"train_info/time_between_train_steps": 0.005213022232055664, "step": 321} +{"info/global_step": 322, "train_info/time_within_train_step": 24.824874877929688, "step": 322} +{"train_info/time_between_train_steps": 0.005658864974975586, "step": 322} +{"info/global_step": 323, "train_info/time_within_train_step": 24.74804711341858, "step": 323} +{"train_info/time_between_train_steps": 0.0056307315826416016, "step": 323} +{"info/global_step": 324, "train_info/time_within_train_step": 24.86906671524048, "step": 324} +{"train_info/time_between_train_steps": 0.006204843521118164, "step": 324} +{"train_info/time_between_train_steps": 17.00867223739624, "step": 324} +{"info/global_step": 325, "train_info/time_within_train_step": 24.82580018043518, "step": 325} +{"train_info/time_between_train_steps": 0.009577035903930664, "step": 325} +{"info/global_step": 326, "train_info/time_within_train_step": 24.899844646453857, "step": 326} +{"train_info/time_between_train_steps": 0.0048487186431884766, "step": 326} +{"info/global_step": 327, "train_info/time_within_train_step": 24.722715616226196, "step": 327} +{"train_info/time_between_train_steps": 0.00494694709777832, "step": 327} +{"info/global_step": 328, "train_info/time_within_train_step": 24.87808322906494, "step": 328} +{"train_info/time_between_train_steps": 0.005280971527099609, "step": 328} +{"info/global_step": 329, "train_info/time_within_train_step": 24.794576168060303, "step": 329} +{"train_info/time_between_train_steps": 0.005364656448364258, "step": 329} +{"info/global_step": 330, "train_info/time_within_train_step": 24.848591327667236, "step": 330} +{"train_info/time_between_train_steps": 0.005281686782836914, "step": 330} +{"info/global_step": 331, "train_info/time_within_train_step": 24.82166337966919, "step": 331} +{"train_info/time_between_train_steps": 0.00535273551940918, "step": 331} +{"info/global_step": 332, "train_info/time_within_train_step": 24.892122268676758, "step": 332} +{"train_info/time_between_train_steps": 0.005230426788330078, "step": 332} +{"info/global_step": 333, "train_info/time_within_train_step": 24.73811650276184, "step": 333} +{"train_info/time_between_train_steps": 0.005209684371948242, "step": 333} +{"info/global_step": 334, "train_info/time_within_train_step": 24.970476150512695, "step": 334} +{"train_info/time_between_train_steps": 0.0057544708251953125, "step": 334} +{"info/global_step": 335, "train_info/time_within_train_step": 24.73477339744568, "step": 335} +{"train_info/time_between_train_steps": 0.005242586135864258, "step": 335} +{"info/global_step": 336, "train_info/time_within_train_step": 24.859564542770386, "step": 336} +{"train_info/time_between_train_steps": 0.005817413330078125, "step": 336} +{"info/global_step": 337, "train_info/time_within_train_step": 24.756229877471924, "step": 337} +{"train_info/time_between_train_steps": 0.005587100982666016, "step": 337} +{"info/global_step": 338, "train_info/time_within_train_step": 24.86189317703247, "step": 338} +{"train_info/time_between_train_steps": 0.005200386047363281, "step": 338} +{"info/global_step": 339, "train_info/time_within_train_step": 24.920764207839966, "step": 339} +{"train_info/time_between_train_steps": 0.0073032379150390625, "step": 339} +{"info/global_step": 340, "train_info/time_within_train_step": 24.89452576637268, "step": 340} +{"train_info/time_between_train_steps": 0.005429744720458984, "step": 340} +{"info/global_step": 341, "train_info/time_within_train_step": 24.825685739517212, "step": 341} +{"train_info/time_between_train_steps": 0.025113582611083984, "step": 341} +{"info/global_step": 342, "train_info/time_within_train_step": 24.73707389831543, "step": 342} +{"train_info/time_between_train_steps": 0.0050966739654541016, "step": 342} +{"info/global_step": 343, "train_info/time_within_train_step": 24.75628399848938, "step": 343} +{"train_info/time_between_train_steps": 0.005154132843017578, "step": 343} +{"info/global_step": 344, "train_info/time_within_train_step": 24.80107855796814, "step": 344} +{"train_info/time_between_train_steps": 0.004986763000488281, "step": 344} +{"info/global_step": 345, "train_info/time_within_train_step": 24.743385076522827, "step": 345} +{"train_info/time_between_train_steps": 0.005173683166503906, "step": 345} +{"info/global_step": 346, "train_info/time_within_train_step": 24.797298908233643, "step": 346} +{"train_info/time_between_train_steps": 0.004972934722900391, "step": 346} +{"info/global_step": 347, "train_info/time_within_train_step": 24.73353862762451, "step": 347} +{"train_info/time_between_train_steps": 0.005124330520629883, "step": 347} +{"info/global_step": 348, "train_info/time_within_train_step": 24.80847144126892, "step": 348} +{"train_info/time_between_train_steps": 0.0052068233489990234, "step": 348} +{"info/global_step": 349, "train_info/time_within_train_step": 24.724456548690796, "step": 349} +{"train_info/time_between_train_steps": 0.005097389221191406, "step": 349} +{"info/global_step": 350, "train_info/time_within_train_step": 24.73743486404419, "step": 350} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740931648, "_runtime": 9154}, "step": 350} +{"logs": {"train/loss": 3.623, "train/learning_rate": 0.00047222222222222224, "train/epoch": 9.02, "_timestamp": 1740931648, "_runtime": 9154}, "step": 350} +{"train_info/time_between_train_steps": 0.026351213455200195, "step": 350} +{"info/global_step": 351, "train_info/time_within_train_step": 24.73479652404785, "step": 351} +{"train_info/time_between_train_steps": 0.005482673645019531, "step": 351} +{"info/global_step": 352, "train_info/time_within_train_step": 24.734480619430542, "step": 352} +{"train_info/time_between_train_steps": 0.005041360855102539, "step": 352} +{"info/global_step": 353, "train_info/time_within_train_step": 24.81874680519104, "step": 353} +{"train_info/time_between_train_steps": 0.005212545394897461, "step": 353} +{"info/global_step": 354, "train_info/time_within_train_step": 24.735382556915283, "step": 354} +{"train_info/time_between_train_steps": 0.014503002166748047, "step": 354} +{"info/global_step": 355, "train_info/time_within_train_step": 25.135013341903687, "step": 355} +{"train_info/time_between_train_steps": 0.014436006546020508, "step": 355} +{"info/global_step": 356, "train_info/time_within_train_step": 25.178359746932983, "step": 356} +{"train_info/time_between_train_steps": 0.005355358123779297, "step": 356} +{"info/global_step": 357, "train_info/time_within_train_step": 24.783535957336426, "step": 357} +{"train_info/time_between_train_steps": 0.005307435989379883, "step": 357} +{"info/global_step": 358, "train_info/time_within_train_step": 24.81478524208069, "step": 358} +{"train_info/time_between_train_steps": 0.00528264045715332, "step": 358} +{"info/global_step": 359, "train_info/time_within_train_step": 24.746484994888306, "step": 359} +{"train_info/time_between_train_steps": 0.005724191665649414, "step": 359} +{"info/global_step": 360, "train_info/time_within_train_step": 24.85522222518921, "step": 360} +{"train_info/time_between_train_steps": 0.0058786869049072266, "step": 360} +{"train_info/time_between_train_steps": 16.587454080581665, "step": 360} +{"info/global_step": 361, "train_info/time_within_train_step": 24.730239868164062, "step": 361} +{"train_info/time_between_train_steps": 0.005351066589355469, "step": 361} +{"info/global_step": 362, "train_info/time_within_train_step": 24.96659517288208, "step": 362} +{"train_info/time_between_train_steps": 0.005259513854980469, "step": 362} +{"info/global_step": 363, "train_info/time_within_train_step": 24.727178812026978, "step": 363} +{"train_info/time_between_train_steps": 0.0050296783447265625, "step": 363} +{"info/global_step": 364, "train_info/time_within_train_step": 24.88714838027954, "step": 364} +{"train_info/time_between_train_steps": 0.005299806594848633, "step": 364} +{"info/global_step": 365, "train_info/time_within_train_step": 24.806894302368164, "step": 365} +{"train_info/time_between_train_steps": 0.005416154861450195, "step": 365} +{"info/global_step": 366, "train_info/time_within_train_step": 24.85028839111328, "step": 366} +{"train_info/time_between_train_steps": 0.005186796188354492, "step": 366} +{"info/global_step": 367, "train_info/time_within_train_step": 24.809995651245117, "step": 367} +{"train_info/time_between_train_steps": 0.005263328552246094, "step": 367} +{"info/global_step": 368, "train_info/time_within_train_step": 24.890339851379395, "step": 368} +{"train_info/time_between_train_steps": 0.005268096923828125, "step": 368} +{"info/global_step": 369, "train_info/time_within_train_step": 24.797441005706787, "step": 369} +{"train_info/time_between_train_steps": 0.005176544189453125, "step": 369} +{"info/global_step": 370, "train_info/time_within_train_step": 25.0200674533844, "step": 370} +{"train_info/time_between_train_steps": 0.0054280757904052734, "step": 370} +{"info/global_step": 371, "train_info/time_within_train_step": 24.742383003234863, "step": 371} +{"train_info/time_between_train_steps": 0.005370616912841797, "step": 371} +{"info/global_step": 372, "train_info/time_within_train_step": 24.855720043182373, "step": 372} +{"train_info/time_between_train_steps": 0.0053255558013916016, "step": 372} +{"info/global_step": 373, "train_info/time_within_train_step": 24.751095056533813, "step": 373} +{"train_info/time_between_train_steps": 0.005652427673339844, "step": 373} +{"info/global_step": 374, "train_info/time_within_train_step": 24.972365140914917, "step": 374} +{"train_info/time_between_train_steps": 0.01014256477355957, "step": 374} +{"info/global_step": 375, "train_info/time_within_train_step": 24.751189708709717, "step": 375} +{"train_info/time_between_train_steps": 0.010612010955810547, "step": 375} +{"info/global_step": 376, "train_info/time_within_train_step": 24.91073441505432, "step": 376} +{"train_info/time_between_train_steps": 0.0065004825592041016, "step": 376} +{"info/global_step": 377, "train_info/time_within_train_step": 24.880767822265625, "step": 377} +{"train_info/time_between_train_steps": 0.02702474594116211, "step": 377} +{"info/global_step": 378, "train_info/time_within_train_step": 24.731865882873535, "step": 378} +{"train_info/time_between_train_steps": 0.005282163619995117, "step": 378} +{"info/global_step": 379, "train_info/time_within_train_step": 24.810805082321167, "step": 379} +{"train_info/time_between_train_steps": 0.005162239074707031, "step": 379} +{"info/global_step": 380, "train_info/time_within_train_step": 24.735485792160034, "step": 380} +{"train_info/time_between_train_steps": 0.005029201507568359, "step": 380} +{"info/global_step": 381, "train_info/time_within_train_step": 24.74913501739502, "step": 381} +{"train_info/time_between_train_steps": 0.005109071731567383, "step": 381} +{"info/global_step": 382, "train_info/time_within_train_step": 24.796094179153442, "step": 382} +{"train_info/time_between_train_steps": 0.005209445953369141, "step": 382} +{"info/global_step": 383, "train_info/time_within_train_step": 24.72769021987915, "step": 383} +{"train_info/time_between_train_steps": 0.0051250457763671875, "step": 383} +{"info/global_step": 384, "train_info/time_within_train_step": 24.802579164505005, "step": 384} +{"train_info/time_between_train_steps": 0.0052869319915771484, "step": 384} +{"info/global_step": 385, "train_info/time_within_train_step": 24.722601413726807, "step": 385} +{"train_info/time_between_train_steps": 0.0050525665283203125, "step": 385} +{"info/global_step": 386, "train_info/time_within_train_step": 24.861934900283813, "step": 386} +{"train_info/time_between_train_steps": 0.005020856857299805, "step": 386} +{"info/global_step": 387, "train_info/time_within_train_step": 24.729610919952393, "step": 387} +{"train_info/time_between_train_steps": 0.005282163619995117, "step": 387} +{"info/global_step": 388, "train_info/time_within_train_step": 24.742225885391235, "step": 388} +{"train_info/time_between_train_steps": 0.0051081180572509766, "step": 388} +{"info/global_step": 389, "train_info/time_within_train_step": 24.809773921966553, "step": 389} +{"train_info/time_between_train_steps": 0.005056142807006836, "step": 389} +{"info/global_step": 390, "train_info/time_within_train_step": 24.744343280792236, "step": 390} +{"train_info/time_between_train_steps": 0.005296468734741211, "step": 390} +{"info/global_step": 391, "train_info/time_within_train_step": 24.815504789352417, "step": 391} +{"train_info/time_between_train_steps": 0.005057334899902344, "step": 391} +{"info/global_step": 392, "train_info/time_within_train_step": 24.726878881454468, "step": 392} +{"train_info/time_between_train_steps": 0.005156517028808594, "step": 392} +{"info/global_step": 393, "train_info/time_within_train_step": 24.75304341316223, "step": 393} +{"train_info/time_between_train_steps": 0.005628347396850586, "step": 393} +{"info/global_step": 394, "train_info/time_within_train_step": 24.79934000968933, "step": 394} +{"train_info/time_between_train_steps": 0.0053293704986572266, "step": 394} +{"info/global_step": 395, "train_info/time_within_train_step": 24.766616821289062, "step": 395} +{"train_info/time_between_train_steps": 0.00563812255859375, "step": 395} +{"info/global_step": 396, "train_info/time_within_train_step": 24.84606695175171, "step": 396} +{"train_info/time_between_train_steps": 0.005694150924682617, "step": 396} +{"train_info/time_between_train_steps": 16.834796905517578, "step": 396} +{"info/global_step": 397, "train_info/time_within_train_step": 24.739379167556763, "step": 397} +{"train_info/time_between_train_steps": 0.005323648452758789, "step": 397} +{"info/global_step": 398, "train_info/time_within_train_step": 24.88626503944397, "step": 398} +{"train_info/time_between_train_steps": 0.005257844924926758, "step": 398} +{"info/global_step": 399, "train_info/time_within_train_step": 24.731362104415894, "step": 399} +{"train_info/time_between_train_steps": 0.0051229000091552734, "step": 399} +{"info/global_step": 400, "train_info/time_within_train_step": 24.852471590042114, "step": 400} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740932924, "_runtime": 10430}, "step": 400} +{"logs": {"train/loss": 3.4921, "train/learning_rate": 0.00044444444444444436, "train/epoch": 11.0, "_timestamp": 1740932924, "_runtime": 10430}, "step": 400} +{"train_info/time_between_train_steps": 37.63803672790527, "step": 400} +{"info/global_step": 401, "train_info/time_within_train_step": 26.945250749588013, "step": 401} +{"train_info/time_between_train_steps": 0.005704164505004883, "step": 401} +{"info/global_step": 402, "train_info/time_within_train_step": 24.89948296546936, "step": 402} +{"train_info/time_between_train_steps": 0.005767345428466797, "step": 402} +{"info/global_step": 403, "train_info/time_within_train_step": 26.859920740127563, "step": 403} +{"train_info/time_between_train_steps": 0.00555872917175293, "step": 403} +{"info/global_step": 404, "train_info/time_within_train_step": 24.87012028694153, "step": 404} +{"train_info/time_between_train_steps": 0.005181312561035156, "step": 404} +{"info/global_step": 405, "train_info/time_within_train_step": 24.73083472251892, "step": 405} +{"train_info/time_between_train_steps": 0.005179643630981445, "step": 405} +{"info/global_step": 406, "train_info/time_within_train_step": 24.97066569328308, "step": 406} +{"train_info/time_between_train_steps": 0.005171775817871094, "step": 406} +{"info/global_step": 407, "train_info/time_within_train_step": 24.75162172317505, "step": 407} +{"train_info/time_between_train_steps": 0.010406732559204102, "step": 407} +{"info/global_step": 408, "train_info/time_within_train_step": 24.873782634735107, "step": 408} +{"train_info/time_between_train_steps": 0.005335807800292969, "step": 408} +{"info/global_step": 409, "train_info/time_within_train_step": 24.739912271499634, "step": 409} +{"train_info/time_between_train_steps": 0.005601167678833008, "step": 409} +{"info/global_step": 410, "train_info/time_within_train_step": 24.89665412902832, "step": 410} +{"train_info/time_between_train_steps": 0.0052335262298583984, "step": 410} +{"info/global_step": 411, "train_info/time_within_train_step": 24.740046501159668, "step": 411} +{"train_info/time_between_train_steps": 0.0051784515380859375, "step": 411} +{"info/global_step": 412, "train_info/time_within_train_step": 25.102231979370117, "step": 412} +{"train_info/time_between_train_steps": 0.005320072174072266, "step": 412} +{"info/global_step": 413, "train_info/time_within_train_step": 24.814663410186768, "step": 413} +{"train_info/time_between_train_steps": 0.020537853240966797, "step": 413} +{"info/global_step": 414, "train_info/time_within_train_step": 24.713643312454224, "step": 414} +{"train_info/time_between_train_steps": 0.004947662353515625, "step": 414} +{"info/global_step": 415, "train_info/time_within_train_step": 24.804877042770386, "step": 415} +{"train_info/time_between_train_steps": 0.005179882049560547, "step": 415} +{"info/global_step": 416, "train_info/time_within_train_step": 24.811930894851685, "step": 416} +{"train_info/time_between_train_steps": 0.0101318359375, "step": 416} +{"info/global_step": 417, "train_info/time_within_train_step": 24.724965572357178, "step": 417} +{"train_info/time_between_train_steps": 0.005095005035400391, "step": 417} +{"info/global_step": 418, "train_info/time_within_train_step": 24.74247980117798, "step": 418} +{"train_info/time_between_train_steps": 0.005114316940307617, "step": 418} +{"info/global_step": 419, "train_info/time_within_train_step": 24.729809761047363, "step": 419} +{"train_info/time_between_train_steps": 0.005091428756713867, "step": 419} +{"info/global_step": 420, "train_info/time_within_train_step": 24.80586576461792, "step": 420} +{"train_info/time_between_train_steps": 0.005088090896606445, "step": 420} +{"info/global_step": 421, "train_info/time_within_train_step": 24.731606006622314, "step": 421} +{"train_info/time_between_train_steps": 0.005293130874633789, "step": 421} +{"info/global_step": 422, "train_info/time_within_train_step": 24.74069333076477, "step": 422} +{"train_info/time_between_train_steps": 0.005482912063598633, "step": 422} +{"info/global_step": 423, "train_info/time_within_train_step": 24.80952548980713, "step": 423} +{"train_info/time_between_train_steps": 0.0052073001861572266, "step": 423} +{"info/global_step": 424, "train_info/time_within_train_step": 24.736974954605103, "step": 424} +{"train_info/time_between_train_steps": 0.005151510238647461, "step": 424} +{"info/global_step": 425, "train_info/time_within_train_step": 24.803300380706787, "step": 425} +{"train_info/time_between_train_steps": 0.005043983459472656, "step": 425} +{"info/global_step": 426, "train_info/time_within_train_step": 24.727061986923218, "step": 426} +{"train_info/time_between_train_steps": 0.00526118278503418, "step": 426} +{"info/global_step": 427, "train_info/time_within_train_step": 24.755445957183838, "step": 427} +{"train_info/time_between_train_steps": 0.005058765411376953, "step": 427} +{"info/global_step": 428, "train_info/time_within_train_step": 24.79217028617859, "step": 428} +{"train_info/time_between_train_steps": 0.005175113677978516, "step": 428} +{"info/global_step": 429, "train_info/time_within_train_step": 24.757221937179565, "step": 429} +{"train_info/time_between_train_steps": 0.005432844161987305, "step": 429} +{"info/global_step": 430, "train_info/time_within_train_step": 24.78714656829834, "step": 430} +{"train_info/time_between_train_steps": 0.005361080169677734, "step": 430} +{"info/global_step": 431, "train_info/time_within_train_step": 24.752819061279297, "step": 431} +{"train_info/time_between_train_steps": 0.005812406539916992, "step": 431} +{"info/global_step": 432, "train_info/time_within_train_step": 25.16528081893921, "step": 432} +{"train_info/time_between_train_steps": 0.005866527557373047, "step": 432} +{"train_info/time_between_train_steps": 16.951308488845825, "step": 432} +{"info/global_step": 433, "train_info/time_within_train_step": 24.721514463424683, "step": 433} +{"train_info/time_between_train_steps": 0.004964351654052734, "step": 433} +{"info/global_step": 434, "train_info/time_within_train_step": 24.883320093154907, "step": 434} +{"train_info/time_between_train_steps": 0.005257129669189453, "step": 434} +{"info/global_step": 435, "train_info/time_within_train_step": 24.745346069335938, "step": 435} +{"train_info/time_between_train_steps": 0.0052564144134521484, "step": 435} +{"info/global_step": 436, "train_info/time_within_train_step": 24.90133237838745, "step": 436} +{"train_info/time_between_train_steps": 0.0052337646484375, "step": 436} +{"info/global_step": 437, "train_info/time_within_train_step": 24.82246732711792, "step": 437} +{"train_info/time_between_train_steps": 0.005223274230957031, "step": 437} +{"info/global_step": 438, "train_info/time_within_train_step": 24.878637552261353, "step": 438} +{"train_info/time_between_train_steps": 0.005334138870239258, "step": 438} +{"info/global_step": 439, "train_info/time_within_train_step": 24.81961679458618, "step": 439} +{"train_info/time_between_train_steps": 0.005179882049560547, "step": 439} +{"info/global_step": 440, "train_info/time_within_train_step": 24.91898775100708, "step": 440} +{"train_info/time_between_train_steps": 0.0053026676177978516, "step": 440} +{"info/global_step": 441, "train_info/time_within_train_step": 24.818694353103638, "step": 441} +{"train_info/time_between_train_steps": 0.005106687545776367, "step": 441} +{"info/global_step": 442, "train_info/time_within_train_step": 24.85245132446289, "step": 442} +{"train_info/time_between_train_steps": 0.007272958755493164, "step": 442} +{"info/global_step": 443, "train_info/time_within_train_step": 24.746449947357178, "step": 443} +{"train_info/time_between_train_steps": 0.005616426467895508, "step": 443} +{"info/global_step": 444, "train_info/time_within_train_step": 24.86448884010315, "step": 444} +{"train_info/time_between_train_steps": 0.005262136459350586, "step": 444} +{"info/global_step": 445, "train_info/time_within_train_step": 24.73224925994873, "step": 445} +{"train_info/time_between_train_steps": 0.005419254302978516, "step": 445} +{"info/global_step": 446, "train_info/time_within_train_step": 24.992104053497314, "step": 446} +{"train_info/time_between_train_steps": 0.005208253860473633, "step": 446} +{"info/global_step": 447, "train_info/time_within_train_step": 24.81624484062195, "step": 447} +{"train_info/time_between_train_steps": 0.005053520202636719, "step": 447} +{"info/global_step": 448, "train_info/time_within_train_step": 24.847280025482178, "step": 448} +{"train_info/time_between_train_steps": 0.005410432815551758, "step": 448} +{"info/global_step": 449, "train_info/time_within_train_step": 24.78226089477539, "step": 449} +{"train_info/time_between_train_steps": 0.022909164428710938, "step": 449} +{"info/global_step": 450, "train_info/time_within_train_step": 24.724358558654785, "step": 450} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740934227, "_runtime": 11733}, "step": 450} +{"logs": {"train/loss": 3.3277, "train/learning_rate": 0.00041666666666666664, "train/epoch": 12.02, "_timestamp": 1740934227, "_runtime": 11733}, "step": 450} +{"train_info/time_between_train_steps": 0.026296615600585938, "step": 450} +{"info/global_step": 451, "train_info/time_within_train_step": 24.79570484161377, "step": 451} +{"train_info/time_between_train_steps": 0.005065441131591797, "step": 451} +{"info/global_step": 452, "train_info/time_within_train_step": 24.72874689102173, "step": 452} +{"train_info/time_between_train_steps": 0.0050699710845947266, "step": 452} +{"info/global_step": 453, "train_info/time_within_train_step": 24.76698064804077, "step": 453} +{"train_info/time_between_train_steps": 0.005017757415771484, "step": 453} +{"info/global_step": 454, "train_info/time_within_train_step": 24.722646713256836, "step": 454} +{"train_info/time_between_train_steps": 0.0050547122955322266, "step": 454} +{"info/global_step": 455, "train_info/time_within_train_step": 24.734500885009766, "step": 455} +{"train_info/time_between_train_steps": 0.0053021907806396484, "step": 455} +{"info/global_step": 456, "train_info/time_within_train_step": 24.79694366455078, "step": 456} +{"train_info/time_between_train_steps": 0.005101442337036133, "step": 456} +{"info/global_step": 457, "train_info/time_within_train_step": 24.736043453216553, "step": 457} +{"train_info/time_between_train_steps": 0.005322933197021484, "step": 457} +{"info/global_step": 458, "train_info/time_within_train_step": 24.804399251937866, "step": 458} +{"train_info/time_between_train_steps": 0.00503230094909668, "step": 458} +{"info/global_step": 459, "train_info/time_within_train_step": 24.73420476913452, "step": 459} +{"train_info/time_between_train_steps": 0.005110979080200195, "step": 459} +{"info/global_step": 460, "train_info/time_within_train_step": 24.74098253250122, "step": 460} +{"train_info/time_between_train_steps": 0.005178689956665039, "step": 460} +{"info/global_step": 461, "train_info/time_within_train_step": 24.795833110809326, "step": 461} +{"train_info/time_between_train_steps": 0.004946231842041016, "step": 461} +{"info/global_step": 462, "train_info/time_within_train_step": 24.735759496688843, "step": 462} +{"train_info/time_between_train_steps": 0.005247354507446289, "step": 462} +{"info/global_step": 463, "train_info/time_within_train_step": 24.839438676834106, "step": 463} +{"train_info/time_between_train_steps": 0.005502223968505859, "step": 463} +{"info/global_step": 464, "train_info/time_within_train_step": 24.728047609329224, "step": 464} +{"train_info/time_between_train_steps": 0.005022287368774414, "step": 464} +{"info/global_step": 465, "train_info/time_within_train_step": 24.749754428863525, "step": 465} +{"train_info/time_between_train_steps": 0.005279541015625, "step": 465} +{"info/global_step": 466, "train_info/time_within_train_step": 24.804122924804688, "step": 466} +{"train_info/time_between_train_steps": 0.00542759895324707, "step": 466} +{"info/global_step": 467, "train_info/time_within_train_step": 24.754740476608276, "step": 467} +{"train_info/time_between_train_steps": 0.005530118942260742, "step": 467} +{"info/global_step": 468, "train_info/time_within_train_step": 24.842944383621216, "step": 468} +{"train_info/time_between_train_steps": 0.0060956478118896484, "step": 468} +{"train_info/time_between_train_steps": 16.612773895263672, "step": 468} +{"info/global_step": 469, "train_info/time_within_train_step": 24.730278730392456, "step": 469} +{"train_info/time_between_train_steps": 0.004971504211425781, "step": 469} +{"info/global_step": 470, "train_info/time_within_train_step": 24.954940795898438, "step": 470} +{"train_info/time_between_train_steps": 0.0052716732025146484, "step": 470} +{"info/global_step": 471, "train_info/time_within_train_step": 24.747838020324707, "step": 471} +{"train_info/time_between_train_steps": 0.007727861404418945, "step": 471} +{"info/global_step": 472, "train_info/time_within_train_step": 24.86815571784973, "step": 472} +{"train_info/time_between_train_steps": 0.005320072174072266, "step": 472} +{"info/global_step": 473, "train_info/time_within_train_step": 24.757800579071045, "step": 473} +{"train_info/time_between_train_steps": 0.005635738372802734, "step": 473} +{"info/global_step": 474, "train_info/time_within_train_step": 24.904314517974854, "step": 474} +{"train_info/time_between_train_steps": 0.01039266586303711, "step": 474} +{"info/global_step": 475, "train_info/time_within_train_step": 24.74137854576111, "step": 475} +{"train_info/time_between_train_steps": 0.010290384292602539, "step": 475} +{"info/global_step": 476, "train_info/time_within_train_step": 24.930947065353394, "step": 476} +{"train_info/time_between_train_steps": 0.016150712966918945, "step": 476} +{"info/global_step": 477, "train_info/time_within_train_step": 24.74385118484497, "step": 477} +{"train_info/time_between_train_steps": 0.010383367538452148, "step": 477} +{"info/global_step": 478, "train_info/time_within_train_step": 24.95102858543396, "step": 478} +{"train_info/time_between_train_steps": 0.005010843276977539, "step": 478} +{"info/global_step": 479, "train_info/time_within_train_step": 24.85144329071045, "step": 479} +{"train_info/time_between_train_steps": 0.010761022567749023, "step": 479} +{"info/global_step": 480, "train_info/time_within_train_step": 24.896334648132324, "step": 480} +{"train_info/time_between_train_steps": 0.010291099548339844, "step": 480} +{"info/global_step": 481, "train_info/time_within_train_step": 24.748008489608765, "step": 481} +{"train_info/time_between_train_steps": 0.00543665885925293, "step": 481} +{"info/global_step": 482, "train_info/time_within_train_step": 24.996342182159424, "step": 482} +{"train_info/time_between_train_steps": 0.005334377288818359, "step": 482} +{"info/global_step": 483, "train_info/time_within_train_step": 24.73723292350769, "step": 483} +{"train_info/time_between_train_steps": 0.005112409591674805, "step": 483} +{"info/global_step": 484, "train_info/time_within_train_step": 24.8426456451416, "step": 484} +{"train_info/time_between_train_steps": 0.00541996955871582, "step": 484} +{"info/global_step": 485, "train_info/time_within_train_step": 24.75457787513733, "step": 485} +{"train_info/time_between_train_steps": 0.03052067756652832, "step": 485} +{"info/global_step": 486, "train_info/time_within_train_step": 24.72637939453125, "step": 486} +{"train_info/time_between_train_steps": 0.005030632019042969, "step": 486} +{"info/global_step": 487, "train_info/time_within_train_step": 24.80349040031433, "step": 487} +{"train_info/time_between_train_steps": 0.005098104476928711, "step": 487} +{"info/global_step": 488, "train_info/time_within_train_step": 24.73416829109192, "step": 488} +{"train_info/time_between_train_steps": 0.005116701126098633, "step": 488} +{"info/global_step": 489, "train_info/time_within_train_step": 24.806518077850342, "step": 489} +{"train_info/time_between_train_steps": 0.005059719085693359, "step": 489} +{"info/global_step": 490, "train_info/time_within_train_step": 24.731812238693237, "step": 490} +{"train_info/time_between_train_steps": 0.005155801773071289, "step": 490} +{"info/global_step": 491, "train_info/time_within_train_step": 24.759082078933716, "step": 491} +{"train_info/time_between_train_steps": 0.005138397216796875, "step": 491} +{"info/global_step": 492, "train_info/time_within_train_step": 24.78634023666382, "step": 492} +{"train_info/time_between_train_steps": 0.005155324935913086, "step": 492} +{"info/global_step": 493, "train_info/time_within_train_step": 24.838552236557007, "step": 493} +{"train_info/time_between_train_steps": 0.005358219146728516, "step": 493} +{"info/global_step": 494, "train_info/time_within_train_step": 24.756309270858765, "step": 494} +{"train_info/time_between_train_steps": 0.005115985870361328, "step": 494} +{"info/global_step": 495, "train_info/time_within_train_step": 24.727153778076172, "step": 495} +{"train_info/time_between_train_steps": 0.005097150802612305, "step": 495} +{"info/global_step": 496, "train_info/time_within_train_step": 24.81155228614807, "step": 496} +{"train_info/time_between_train_steps": 0.005320549011230469, "step": 496} +{"info/global_step": 497, "train_info/time_within_train_step": 24.73166298866272, "step": 497} +{"train_info/time_between_train_steps": 0.005060434341430664, "step": 497} +{"info/global_step": 498, "train_info/time_within_train_step": 24.746105194091797, "step": 498} +{"train_info/time_between_train_steps": 0.005072832107543945, "step": 498} +{"info/global_step": 499, "train_info/time_within_train_step": 24.808163166046143, "step": 499} +{"train_info/time_between_train_steps": 0.0052797794342041016, "step": 499} +{"info/global_step": 500, "train_info/time_within_train_step": 24.729576110839844, "step": 500} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740935484, "_runtime": 12990}, "step": 500} +{"logs": {"train/loss": 3.2253, "train/learning_rate": 0.00038888888888888887, "train/epoch": 13.03, "_timestamp": 1740935484, "_runtime": 12990}, "step": 500} +{"train_info/time_between_train_steps": 40.619683265686035, "step": 500} +{"info/global_step": 501, "train_info/time_within_train_step": 24.734675884246826, "step": 501} +{"train_info/time_between_train_steps": 0.00531458854675293, "step": 501} +{"info/global_step": 502, "train_info/time_within_train_step": 26.545287132263184, "step": 502} +{"train_info/time_between_train_steps": 0.00579833984375, "step": 502} +{"info/global_step": 503, "train_info/time_within_train_step": 25.043986558914185, "step": 503} +{"train_info/time_between_train_steps": 0.005656003952026367, "step": 503} +{"info/global_step": 504, "train_info/time_within_train_step": 27.76943016052246, "step": 504} +{"train_info/time_between_train_steps": 0.006412982940673828, "step": 504} +{"train_info/time_between_train_steps": 17.126632928848267, "step": 504} +{"info/global_step": 505, "train_info/time_within_train_step": 24.723557233810425, "step": 505} +{"train_info/time_between_train_steps": 0.004929542541503906, "step": 505} +{"info/global_step": 506, "train_info/time_within_train_step": 24.889078855514526, "step": 506} +{"train_info/time_between_train_steps": 0.010356664657592773, "step": 506} +{"info/global_step": 507, "train_info/time_within_train_step": 24.745255708694458, "step": 507} +{"train_info/time_between_train_steps": 0.005290985107421875, "step": 507} +{"info/global_step": 508, "train_info/time_within_train_step": 24.990702390670776, "step": 508} +{"train_info/time_between_train_steps": 0.0052716732025146484, "step": 508} +{"info/global_step": 509, "train_info/time_within_train_step": 24.727917194366455, "step": 509} +{"train_info/time_between_train_steps": 0.005662202835083008, "step": 509} +{"info/global_step": 510, "train_info/time_within_train_step": 24.923966646194458, "step": 510} +{"train_info/time_between_train_steps": 0.005282402038574219, "step": 510} +{"info/global_step": 511, "train_info/time_within_train_step": 24.810344696044922, "step": 511} +{"train_info/time_between_train_steps": 0.005233287811279297, "step": 511} +{"info/global_step": 512, "train_info/time_within_train_step": 24.87553596496582, "step": 512} +{"train_info/time_between_train_steps": 0.00525212287902832, "step": 512} +{"info/global_step": 513, "train_info/time_within_train_step": 24.751330137252808, "step": 513} +{"train_info/time_between_train_steps": 0.00530552864074707, "step": 513} +{"info/global_step": 514, "train_info/time_within_train_step": 24.859421014785767, "step": 514} +{"train_info/time_between_train_steps": 0.005125999450683594, "step": 514} +{"info/global_step": 515, "train_info/time_within_train_step": 24.758901357650757, "step": 515} +{"train_info/time_between_train_steps": 0.005644559860229492, "step": 515} +{"info/global_step": 516, "train_info/time_within_train_step": 25.13696575164795, "step": 516} +{"train_info/time_between_train_steps": 0.017844676971435547, "step": 516} +{"info/global_step": 517, "train_info/time_within_train_step": 24.742257356643677, "step": 517} +{"train_info/time_between_train_steps": 0.005228519439697266, "step": 517} +{"info/global_step": 518, "train_info/time_within_train_step": 24.959237813949585, "step": 518} +{"train_info/time_between_train_steps": 0.005273103713989258, "step": 518} +{"info/global_step": 519, "train_info/time_within_train_step": 24.741021394729614, "step": 519} +{"train_info/time_between_train_steps": 0.005228996276855469, "step": 519} +{"info/global_step": 520, "train_info/time_within_train_step": 24.86824631690979, "step": 520} +{"train_info/time_between_train_steps": 0.0052947998046875, "step": 520} +{"info/global_step": 521, "train_info/time_within_train_step": 24.745755672454834, "step": 521} +{"train_info/time_between_train_steps": 0.038246870040893555, "step": 521} +{"info/global_step": 522, "train_info/time_within_train_step": 24.733730792999268, "step": 522} +{"train_info/time_between_train_steps": 0.0050525665283203125, "step": 522} +{"info/global_step": 523, "train_info/time_within_train_step": 24.785008430480957, "step": 523} +{"train_info/time_between_train_steps": 0.004929065704345703, "step": 523} +{"info/global_step": 524, "train_info/time_within_train_step": 24.818209886550903, "step": 524} +{"train_info/time_between_train_steps": 0.005011558532714844, "step": 524} +{"info/global_step": 525, "train_info/time_within_train_step": 24.775924921035767, "step": 525} +{"train_info/time_between_train_steps": 0.005106925964355469, "step": 525} +{"info/global_step": 526, "train_info/time_within_train_step": 24.723657608032227, "step": 526} +{"train_info/time_between_train_steps": 0.004985809326171875, "step": 526} +{"info/global_step": 527, "train_info/time_within_train_step": 24.7371563911438, "step": 527} +{"train_info/time_between_train_steps": 0.005284786224365234, "step": 527} +{"info/global_step": 528, "train_info/time_within_train_step": 24.82113552093506, "step": 528} +{"train_info/time_between_train_steps": 0.005101203918457031, "step": 528} +{"info/global_step": 529, "train_info/time_within_train_step": 24.740662813186646, "step": 529} +{"train_info/time_between_train_steps": 0.0052661895751953125, "step": 529} +{"info/global_step": 530, "train_info/time_within_train_step": 24.809163331985474, "step": 530} +{"train_info/time_between_train_steps": 0.00512242317199707, "step": 530} +{"info/global_step": 531, "train_info/time_within_train_step": 24.735946893692017, "step": 531} +{"train_info/time_between_train_steps": 0.005233287811279297, "step": 531} +{"info/global_step": 532, "train_info/time_within_train_step": 24.758421182632446, "step": 532} +{"train_info/time_between_train_steps": 0.005112886428833008, "step": 532} +{"info/global_step": 533, "train_info/time_within_train_step": 24.776226043701172, "step": 533} +{"train_info/time_between_train_steps": 0.005076169967651367, "step": 533} +{"info/global_step": 534, "train_info/time_within_train_step": 24.73645043373108, "step": 534} +{"train_info/time_between_train_steps": 0.005029439926147461, "step": 534} +{"info/global_step": 535, "train_info/time_within_train_step": 24.801625728607178, "step": 535} +{"train_info/time_between_train_steps": 0.00525975227355957, "step": 535} +{"info/global_step": 536, "train_info/time_within_train_step": 24.731441736221313, "step": 536} +{"train_info/time_between_train_steps": 0.0051229000091552734, "step": 536} +{"info/global_step": 537, "train_info/time_within_train_step": 24.805036306381226, "step": 537} +{"train_info/time_between_train_steps": 0.005262851715087891, "step": 537} +{"info/global_step": 538, "train_info/time_within_train_step": 24.7374005317688, "step": 538} +{"train_info/time_between_train_steps": 0.00553131103515625, "step": 538} +{"info/global_step": 539, "train_info/time_within_train_step": 24.733449697494507, "step": 539} +{"train_info/time_between_train_steps": 0.005254268646240234, "step": 539} +{"info/global_step": 540, "train_info/time_within_train_step": 24.85087513923645, "step": 540} +{"train_info/time_between_train_steps": 0.005687236785888672, "step": 540} +{"train_info/time_between_train_steps": 16.840537786483765, "step": 540} +{"info/global_step": 541, "train_info/time_within_train_step": 24.714160680770874, "step": 541} +{"train_info/time_between_train_steps": 0.004914283752441406, "step": 541} +{"info/global_step": 542, "train_info/time_within_train_step": 24.827792406082153, "step": 542} +{"train_info/time_between_train_steps": 0.004952907562255859, "step": 542} +{"info/global_step": 543, "train_info/time_within_train_step": 24.729306936264038, "step": 543} +{"train_info/time_between_train_steps": 0.005327463150024414, "step": 543} +{"info/global_step": 544, "train_info/time_within_train_step": 24.889456510543823, "step": 544} +{"train_info/time_between_train_steps": 0.005284309387207031, "step": 544} +{"info/global_step": 545, "train_info/time_within_train_step": 24.723475694656372, "step": 545} +{"train_info/time_between_train_steps": 0.005091190338134766, "step": 545} +{"info/global_step": 546, "train_info/time_within_train_step": 24.922597885131836, "step": 546} +{"train_info/time_between_train_steps": 0.005240917205810547, "step": 546} +{"info/global_step": 547, "train_info/time_within_train_step": 24.736469745635986, "step": 547} +{"train_info/time_between_train_steps": 0.0052225589752197266, "step": 547} +{"info/global_step": 548, "train_info/time_within_train_step": 24.858200550079346, "step": 548} +{"train_info/time_between_train_steps": 0.0050966739654541016, "step": 548} +{"info/global_step": 549, "train_info/time_within_train_step": 24.74137830734253, "step": 549} +{"train_info/time_between_train_steps": 0.005570411682128906, "step": 549} +{"info/global_step": 550, "train_info/time_within_train_step": 24.871549606323242, "step": 550} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740936807, "_runtime": 14313}, "step": 550} +{"logs": {"train/loss": 3.1759, "train/learning_rate": 0.0003611111111111111, "train/epoch": 15.01, "_timestamp": 1740936807, "_runtime": 14313}, "step": 550} +{"train_info/time_between_train_steps": 0.026659011840820312, "step": 550} +{"info/global_step": 551, "train_info/time_within_train_step": 24.750704526901245, "step": 551} +{"train_info/time_between_train_steps": 0.005346059799194336, "step": 551} +{"info/global_step": 552, "train_info/time_within_train_step": 24.894230365753174, "step": 552} +{"train_info/time_between_train_steps": 0.005414009094238281, "step": 552} +{"info/global_step": 553, "train_info/time_within_train_step": 24.899888515472412, "step": 553} +{"train_info/time_between_train_steps": 0.005043506622314453, "step": 553} +{"info/global_step": 554, "train_info/time_within_train_step": 24.927033185958862, "step": 554} +{"train_info/time_between_train_steps": 0.005267620086669922, "step": 554} +{"info/global_step": 555, "train_info/time_within_train_step": 24.83765196800232, "step": 555} +{"train_info/time_between_train_steps": 0.005189180374145508, "step": 555} +{"info/global_step": 556, "train_info/time_within_train_step": 24.912099838256836, "step": 556} +{"train_info/time_between_train_steps": 0.005266666412353516, "step": 556} +{"info/global_step": 557, "train_info/time_within_train_step": 24.748265266418457, "step": 557} +{"train_info/time_between_train_steps": 0.04094266891479492, "step": 557} +{"info/global_step": 558, "train_info/time_within_train_step": 24.72436547279358, "step": 558} +{"train_info/time_between_train_steps": 0.005109548568725586, "step": 558} +{"info/global_step": 559, "train_info/time_within_train_step": 24.717060804367065, "step": 559} +{"train_info/time_between_train_steps": 0.004926919937133789, "step": 559} +{"info/global_step": 560, "train_info/time_within_train_step": 24.743820190429688, "step": 560} +{"train_info/time_between_train_steps": 0.005059480667114258, "step": 560} +{"info/global_step": 561, "train_info/time_within_train_step": 24.79579520225525, "step": 561} +{"train_info/time_between_train_steps": 0.005166769027709961, "step": 561} +{"info/global_step": 562, "train_info/time_within_train_step": 24.728024005889893, "step": 562} +{"train_info/time_between_train_steps": 0.004977703094482422, "step": 562} +{"info/global_step": 563, "train_info/time_within_train_step": 24.80263376235962, "step": 563} +{"train_info/time_between_train_steps": 0.005216121673583984, "step": 563} +{"info/global_step": 564, "train_info/time_within_train_step": 24.717312574386597, "step": 564} +{"train_info/time_between_train_steps": 0.0050601959228515625, "step": 564} +{"info/global_step": 565, "train_info/time_within_train_step": 24.72675895690918, "step": 565} +{"train_info/time_between_train_steps": 0.005162715911865234, "step": 565} +{"info/global_step": 566, "train_info/time_within_train_step": 24.80698251724243, "step": 566} +{"train_info/time_between_train_steps": 0.0051190853118896484, "step": 566} +{"info/global_step": 567, "train_info/time_within_train_step": 24.731475353240967, "step": 567} +{"train_info/time_between_train_steps": 0.005063533782958984, "step": 567} +{"info/global_step": 568, "train_info/time_within_train_step": 24.809142112731934, "step": 568} +{"train_info/time_between_train_steps": 0.005088090896606445, "step": 568} +{"info/global_step": 569, "train_info/time_within_train_step": 24.727445125579834, "step": 569} +{"train_info/time_between_train_steps": 0.005234479904174805, "step": 569} +{"info/global_step": 570, "train_info/time_within_train_step": 24.82180905342102, "step": 570} +{"train_info/time_between_train_steps": 0.010399580001831055, "step": 570} +{"info/global_step": 571, "train_info/time_within_train_step": 24.739838361740112, "step": 571} +{"train_info/time_between_train_steps": 0.005330324172973633, "step": 571} +{"info/global_step": 572, "train_info/time_within_train_step": 24.726688861846924, "step": 572} +{"train_info/time_between_train_steps": 0.0052356719970703125, "step": 572} +{"info/global_step": 573, "train_info/time_within_train_step": 24.76980209350586, "step": 573} +{"train_info/time_between_train_steps": 0.005288362503051758, "step": 573} +{"info/global_step": 574, "train_info/time_within_train_step": 24.744027376174927, "step": 574} +{"train_info/time_between_train_steps": 0.005441188812255859, "step": 574} +{"info/global_step": 575, "train_info/time_within_train_step": 24.805593967437744, "step": 575} +{"train_info/time_between_train_steps": 0.00535273551940918, "step": 575} +{"info/global_step": 576, "train_info/time_within_train_step": 24.761144876480103, "step": 576} +{"train_info/time_between_train_steps": 0.0057871341705322266, "step": 576} +{"train_info/time_between_train_steps": 17.09349822998047, "step": 576} +{"info/global_step": 577, "train_info/time_within_train_step": 24.789525508880615, "step": 577} +{"train_info/time_between_train_steps": 0.0048520565032958984, "step": 577} +{"info/global_step": 578, "train_info/time_within_train_step": 24.847193241119385, "step": 578} +{"train_info/time_between_train_steps": 0.004972696304321289, "step": 578} +{"info/global_step": 579, "train_info/time_within_train_step": 24.738926649093628, "step": 579} +{"train_info/time_between_train_steps": 0.005404233932495117, "step": 579} +{"info/global_step": 580, "train_info/time_within_train_step": 24.908841371536255, "step": 580} +{"train_info/time_between_train_steps": 0.005373477935791016, "step": 580} +{"info/global_step": 581, "train_info/time_within_train_step": 24.73165273666382, "step": 581} +{"train_info/time_between_train_steps": 0.0073587894439697266, "step": 581} +{"info/global_step": 582, "train_info/time_within_train_step": 24.897483348846436, "step": 582} +{"train_info/time_between_train_steps": 0.0052869319915771484, "step": 582} +{"info/global_step": 583, "train_info/time_within_train_step": 24.73743772506714, "step": 583} +{"train_info/time_between_train_steps": 0.00531768798828125, "step": 583} +{"info/global_step": 584, "train_info/time_within_train_step": 24.91905903816223, "step": 584} +{"train_info/time_between_train_steps": 0.005275726318359375, "step": 584} +{"info/global_step": 585, "train_info/time_within_train_step": 24.83060574531555, "step": 585} +{"train_info/time_between_train_steps": 0.009479045867919922, "step": 585} +{"info/global_step": 586, "train_info/time_within_train_step": 24.907548189163208, "step": 586} +{"train_info/time_between_train_steps": 0.005182743072509766, "step": 586} +{"info/global_step": 587, "train_info/time_within_train_step": 24.732327222824097, "step": 587} +{"train_info/time_between_train_steps": 0.00530695915222168, "step": 587} +{"info/global_step": 588, "train_info/time_within_train_step": 24.894566297531128, "step": 588} +{"train_info/time_between_train_steps": 0.00545811653137207, "step": 588} +{"info/global_step": 589, "train_info/time_within_train_step": 24.76949691772461, "step": 589} +{"train_info/time_between_train_steps": 0.00535273551940918, "step": 589} +{"info/global_step": 590, "train_info/time_within_train_step": 24.863776445388794, "step": 590} +{"train_info/time_between_train_steps": 0.005330801010131836, "step": 590} +{"info/global_step": 591, "train_info/time_within_train_step": 25.032857179641724, "step": 591} +{"train_info/time_between_train_steps": 0.0050661563873291016, "step": 591} +{"info/global_step": 592, "train_info/time_within_train_step": 24.881978273391724, "step": 592} +{"train_info/time_between_train_steps": 0.009473562240600586, "step": 592} +{"info/global_step": 593, "train_info/time_within_train_step": 24.742043256759644, "step": 593} +{"train_info/time_between_train_steps": 0.031224489212036133, "step": 593} +{"info/global_step": 594, "train_info/time_within_train_step": 24.723555326461792, "step": 594} +{"train_info/time_between_train_steps": 0.005147695541381836, "step": 594} +{"info/global_step": 595, "train_info/time_within_train_step": 24.724490880966187, "step": 595} +{"train_info/time_between_train_steps": 0.005269289016723633, "step": 595} +{"info/global_step": 596, "train_info/time_within_train_step": 24.751523971557617, "step": 596} +{"train_info/time_between_train_steps": 0.005094289779663086, "step": 596} +{"info/global_step": 597, "train_info/time_within_train_step": 24.771713733673096, "step": 597} +{"train_info/time_between_train_steps": 0.005249500274658203, "step": 597} +{"info/global_step": 598, "train_info/time_within_train_step": 24.726884365081787, "step": 598} +{"train_info/time_between_train_steps": 0.005002021789550781, "step": 598} +{"info/global_step": 599, "train_info/time_within_train_step": 24.799149990081787, "step": 599} +{"train_info/time_between_train_steps": 0.005224704742431641, "step": 599} +{"info/global_step": 600, "train_info/time_within_train_step": 24.738183975219727, "step": 600} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740938065, "_runtime": 15571}, "step": 600} +{"logs": {"train/loss": 3.0702, "train/learning_rate": 0.0003333333333333333, "train/epoch": 16.02, "_timestamp": 1740938065, "_runtime": 15571}, "step": 600} +{"train_info/time_between_train_steps": 39.62007737159729, "step": 600} +{"info/global_step": 601, "train_info/time_within_train_step": 24.83381986618042, "step": 601} +{"train_info/time_between_train_steps": 0.005057334899902344, "step": 601} +{"info/global_step": 602, "train_info/time_within_train_step": 24.753774166107178, "step": 602} +{"train_info/time_between_train_steps": 0.01006937026977539, "step": 602} +{"info/global_step": 603, "train_info/time_within_train_step": 24.763505935668945, "step": 603} +{"train_info/time_between_train_steps": 0.005749940872192383, "step": 603} +{"info/global_step": 604, "train_info/time_within_train_step": 25.281919479370117, "step": 604} +{"train_info/time_between_train_steps": 0.005362033843994141, "step": 604} +{"info/global_step": 605, "train_info/time_within_train_step": 25.906700372695923, "step": 605} +{"train_info/time_between_train_steps": 0.0051631927490234375, "step": 605} +{"info/global_step": 606, "train_info/time_within_train_step": 24.730941772460938, "step": 606} +{"train_info/time_between_train_steps": 0.005227088928222656, "step": 606} +{"info/global_step": 607, "train_info/time_within_train_step": 24.73289728164673, "step": 607} +{"train_info/time_between_train_steps": 0.005211830139160156, "step": 607} +{"info/global_step": 608, "train_info/time_within_train_step": 24.72677731513977, "step": 608} +{"train_info/time_between_train_steps": 0.005094051361083984, "step": 608} +{"info/global_step": 609, "train_info/time_within_train_step": 24.731148958206177, "step": 609} +{"train_info/time_between_train_steps": 0.0052530765533447266, "step": 609} +{"info/global_step": 610, "train_info/time_within_train_step": 24.73710036277771, "step": 610} +{"train_info/time_between_train_steps": 0.0055654048919677734, "step": 610} +{"info/global_step": 611, "train_info/time_within_train_step": 24.750377655029297, "step": 611} +{"train_info/time_between_train_steps": 0.005277156829833984, "step": 611} +{"info/global_step": 612, "train_info/time_within_train_step": 24.809725522994995, "step": 612} +{"train_info/time_between_train_steps": 0.005953550338745117, "step": 612} +{"train_info/time_between_train_steps": 16.971450328826904, "step": 612} +{"info/global_step": 613, "train_info/time_within_train_step": 24.713895320892334, "step": 613} +{"train_info/time_between_train_steps": 0.0049762725830078125, "step": 613} +{"info/global_step": 614, "train_info/time_within_train_step": 24.830535411834717, "step": 614} +{"train_info/time_between_train_steps": 0.0050258636474609375, "step": 614} +{"info/global_step": 615, "train_info/time_within_train_step": 24.73689317703247, "step": 615} +{"train_info/time_between_train_steps": 0.005335330963134766, "step": 615} +{"info/global_step": 616, "train_info/time_within_train_step": 24.966907024383545, "step": 616} +{"train_info/time_between_train_steps": 0.006162405014038086, "step": 616} +{"info/global_step": 617, "train_info/time_within_train_step": 24.812106370925903, "step": 617} +{"train_info/time_between_train_steps": 0.005246400833129883, "step": 617} +{"info/global_step": 618, "train_info/time_within_train_step": 24.94482970237732, "step": 618} +{"train_info/time_between_train_steps": 0.005304098129272461, "step": 618} +{"info/global_step": 619, "train_info/time_within_train_step": 24.735955476760864, "step": 619} +{"train_info/time_between_train_steps": 0.005332469940185547, "step": 619} +{"info/global_step": 620, "train_info/time_within_train_step": 24.874086380004883, "step": 620} +{"train_info/time_between_train_steps": 0.005135059356689453, "step": 620} +{"info/global_step": 621, "train_info/time_within_train_step": 24.77277159690857, "step": 621} +{"train_info/time_between_train_steps": 0.005584001541137695, "step": 621} +{"info/global_step": 622, "train_info/time_within_train_step": 24.917343378067017, "step": 622} +{"train_info/time_between_train_steps": 0.005173206329345703, "step": 622} +{"info/global_step": 623, "train_info/time_within_train_step": 24.75916886329651, "step": 623} +{"train_info/time_between_train_steps": 0.0052776336669921875, "step": 623} +{"info/global_step": 624, "train_info/time_within_train_step": 24.907995462417603, "step": 624} +{"train_info/time_between_train_steps": 0.005354642868041992, "step": 624} +{"info/global_step": 625, "train_info/time_within_train_step": 24.748514890670776, "step": 625} +{"train_info/time_between_train_steps": 0.005214214324951172, "step": 625} +{"info/global_step": 626, "train_info/time_within_train_step": 24.91618800163269, "step": 626} +{"train_info/time_between_train_steps": 0.005409955978393555, "step": 626} +{"info/global_step": 627, "train_info/time_within_train_step": 24.76179599761963, "step": 627} +{"train_info/time_between_train_steps": 0.0056264400482177734, "step": 627} +{"info/global_step": 628, "train_info/time_within_train_step": 24.847851514816284, "step": 628} +{"train_info/time_between_train_steps": 0.0053157806396484375, "step": 628} +{"info/global_step": 629, "train_info/time_within_train_step": 24.7463276386261, "step": 629} +{"train_info/time_between_train_steps": 0.032669782638549805, "step": 629} +{"info/global_step": 630, "train_info/time_within_train_step": 24.769511938095093, "step": 630} +{"train_info/time_between_train_steps": 0.005184173583984375, "step": 630} +{"info/global_step": 631, "train_info/time_within_train_step": 24.719733238220215, "step": 631} +{"train_info/time_between_train_steps": 0.005030393600463867, "step": 631} +{"info/global_step": 632, "train_info/time_within_train_step": 24.83031964302063, "step": 632} +{"train_info/time_between_train_steps": 0.00500035285949707, "step": 632} +{"info/global_step": 633, "train_info/time_within_train_step": 24.79925298690796, "step": 633} +{"train_info/time_between_train_steps": 0.005192279815673828, "step": 633} +{"info/global_step": 634, "train_info/time_within_train_step": 24.72847867012024, "step": 634} +{"train_info/time_between_train_steps": 0.009909629821777344, "step": 634} +{"info/global_step": 635, "train_info/time_within_train_step": 24.800881147384644, "step": 635} +{"train_info/time_between_train_steps": 0.010198116302490234, "step": 635} +{"info/global_step": 636, "train_info/time_within_train_step": 24.725451231002808, "step": 636} +{"train_info/time_between_train_steps": 0.009677886962890625, "step": 636} +{"info/global_step": 637, "train_info/time_within_train_step": 24.723628282546997, "step": 637} +{"train_info/time_between_train_steps": 0.010143041610717773, "step": 637} +{"info/global_step": 638, "train_info/time_within_train_step": 24.78000807762146, "step": 638} +{"train_info/time_between_train_steps": 0.005227804183959961, "step": 638} +{"info/global_step": 639, "train_info/time_within_train_step": 24.74211859703064, "step": 639} +{"train_info/time_between_train_steps": 0.005118370056152344, "step": 639} +{"info/global_step": 640, "train_info/time_within_train_step": 24.78849220275879, "step": 640} +{"train_info/time_between_train_steps": 0.005128622055053711, "step": 640} +{"info/global_step": 641, "train_info/time_within_train_step": 24.736228704452515, "step": 641} +{"train_info/time_between_train_steps": 0.005179643630981445, "step": 641} +{"info/global_step": 642, "train_info/time_within_train_step": 24.74337410926819, "step": 642} +{"train_info/time_between_train_steps": 0.005146026611328125, "step": 642} +{"info/global_step": 643, "train_info/time_within_train_step": 24.76146674156189, "step": 643} +{"train_info/time_between_train_steps": 0.005639076232910156, "step": 643} +{"info/global_step": 644, "train_info/time_within_train_step": 24.741496801376343, "step": 644} +{"train_info/time_between_train_steps": 0.0051534175872802734, "step": 644} +{"info/global_step": 645, "train_info/time_within_train_step": 24.800020933151245, "step": 645} +{"train_info/time_between_train_steps": 0.005193471908569336, "step": 645} +{"info/global_step": 646, "train_info/time_within_train_step": 24.869752168655396, "step": 646} +{"train_info/time_between_train_steps": 0.00561070442199707, "step": 646} +{"info/global_step": 647, "train_info/time_within_train_step": 24.830118894577026, "step": 647} +{"train_info/time_between_train_steps": 0.00579833984375, "step": 647} +{"info/global_step": 648, "train_info/time_within_train_step": 24.759987115859985, "step": 648} +{"train_info/time_between_train_steps": 0.00577092170715332, "step": 648} +{"train_info/time_between_train_steps": 16.785557985305786, "step": 648} +{"info/global_step": 649, "train_info/time_within_train_step": 24.717110872268677, "step": 649} +{"train_info/time_between_train_steps": 0.004903554916381836, "step": 649} +{"info/global_step": 650, "train_info/time_within_train_step": 24.820815801620483, "step": 650} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740939382, "_runtime": 16888}, "step": 650} +{"logs": {"train/loss": 3.0373, "train/learning_rate": 0.00030555555555555555, "train/epoch": 18.0, "_timestamp": 1740939382, "_runtime": 16888}, "step": 650} +{"train_info/time_between_train_steps": 0.025553464889526367, "step": 650} +{"info/global_step": 651, "train_info/time_within_train_step": 24.727381706237793, "step": 651} +{"train_info/time_between_train_steps": 0.005402803421020508, "step": 651} +{"info/global_step": 652, "train_info/time_within_train_step": 24.877211093902588, "step": 652} +{"train_info/time_between_train_steps": 0.005325794219970703, "step": 652} +{"info/global_step": 653, "train_info/time_within_train_step": 24.76048755645752, "step": 653} +{"train_info/time_between_train_steps": 0.005133152008056641, "step": 653} +{"info/global_step": 654, "train_info/time_within_train_step": 24.88153839111328, "step": 654} +{"train_info/time_between_train_steps": 0.0052797794342041016, "step": 654} +{"info/global_step": 655, "train_info/time_within_train_step": 24.74543333053589, "step": 655} +{"train_info/time_between_train_steps": 0.007409334182739258, "step": 655} +{"info/global_step": 656, "train_info/time_within_train_step": 24.96385955810547, "step": 656} +{"train_info/time_between_train_steps": 0.0052640438079833984, "step": 656} +{"info/global_step": 657, "train_info/time_within_train_step": 24.736627340316772, "step": 657} +{"train_info/time_between_train_steps": 0.00750279426574707, "step": 657} +{"info/global_step": 658, "train_info/time_within_train_step": 24.88663125038147, "step": 658} +{"train_info/time_between_train_steps": 0.007463693618774414, "step": 658} +{"info/global_step": 659, "train_info/time_within_train_step": 24.752917766571045, "step": 659} +{"train_info/time_between_train_steps": 0.005343437194824219, "step": 659} +{"info/global_step": 660, "train_info/time_within_train_step": 24.88442325592041, "step": 660} +{"train_info/time_between_train_steps": 0.007360696792602539, "step": 660} +{"info/global_step": 661, "train_info/time_within_train_step": 24.76395010948181, "step": 661} +{"train_info/time_between_train_steps": 0.007914066314697266, "step": 661} +{"info/global_step": 662, "train_info/time_within_train_step": 24.85524034500122, "step": 662} +{"train_info/time_between_train_steps": 0.005171775817871094, "step": 662} +{"info/global_step": 663, "train_info/time_within_train_step": 24.849427223205566, "step": 663} +{"train_info/time_between_train_steps": 0.005410194396972656, "step": 663} +{"info/global_step": 664, "train_info/time_within_train_step": 24.90232801437378, "step": 664} +{"train_info/time_between_train_steps": 0.00540471076965332, "step": 664} +{"info/global_step": 665, "train_info/time_within_train_step": 24.74204111099243, "step": 665} +{"train_info/time_between_train_steps": 0.023869037628173828, "step": 665} +{"info/global_step": 666, "train_info/time_within_train_step": 24.741865158081055, "step": 666} +{"train_info/time_between_train_steps": 0.005227804183959961, "step": 666} +{"info/global_step": 667, "train_info/time_within_train_step": 24.72671937942505, "step": 667} +{"train_info/time_between_train_steps": 0.00977325439453125, "step": 667} +{"info/global_step": 668, "train_info/time_within_train_step": 24.7237446308136, "step": 668} +{"train_info/time_between_train_steps": 0.00499725341796875, "step": 668} +{"info/global_step": 669, "train_info/time_within_train_step": 24.726340770721436, "step": 669} +{"train_info/time_between_train_steps": 0.005145549774169922, "step": 669} +{"info/global_step": 670, "train_info/time_within_train_step": 24.734875440597534, "step": 670} +{"train_info/time_between_train_steps": 0.004986763000488281, "step": 670} +{"info/global_step": 671, "train_info/time_within_train_step": 24.859004497528076, "step": 671} +{"train_info/time_between_train_steps": 0.005230426788330078, "step": 671} +{"info/global_step": 672, "train_info/time_within_train_step": 24.728784799575806, "step": 672} +{"train_info/time_between_train_steps": 0.005253791809082031, "step": 672} +{"info/global_step": 673, "train_info/time_within_train_step": 24.736541509628296, "step": 673} +{"train_info/time_between_train_steps": 0.005090475082397461, "step": 673} +{"info/global_step": 674, "train_info/time_within_train_step": 24.72135591506958, "step": 674} +{"train_info/time_between_train_steps": 0.00507044792175293, "step": 674} +{"info/global_step": 675, "train_info/time_within_train_step": 24.74501371383667, "step": 675} +{"train_info/time_between_train_steps": 0.005533456802368164, "step": 675} +{"info/global_step": 676, "train_info/time_within_train_step": 24.736588716506958, "step": 676} +{"train_info/time_between_train_steps": 0.005073070526123047, "step": 676} +{"info/global_step": 677, "train_info/time_within_train_step": 24.72364616394043, "step": 677} +{"train_info/time_between_train_steps": 0.005086183547973633, "step": 677} +{"info/global_step": 678, "train_info/time_within_train_step": 24.817781448364258, "step": 678} +{"train_info/time_between_train_steps": 0.0051043033599853516, "step": 678} +{"info/global_step": 679, "train_info/time_within_train_step": 24.72780179977417, "step": 679} +{"train_info/time_between_train_steps": 0.005372285842895508, "step": 679} +{"info/global_step": 680, "train_info/time_within_train_step": 24.736652612686157, "step": 680} +{"train_info/time_between_train_steps": 0.005323171615600586, "step": 680} +{"info/global_step": 681, "train_info/time_within_train_step": 24.73065972328186, "step": 681} +{"train_info/time_between_train_steps": 0.005162715911865234, "step": 681} +{"info/global_step": 682, "train_info/time_within_train_step": 24.732932329177856, "step": 682} +{"train_info/time_between_train_steps": 0.005327463150024414, "step": 682} +{"info/global_step": 683, "train_info/time_within_train_step": 24.749197721481323, "step": 683} +{"train_info/time_between_train_steps": 0.005759477615356445, "step": 683} +{"info/global_step": 684, "train_info/time_within_train_step": 24.76283597946167, "step": 684} +{"train_info/time_between_train_steps": 0.005583763122558594, "step": 684} +{"train_info/time_between_train_steps": 17.39242458343506, "step": 684} +{"info/global_step": 685, "train_info/time_within_train_step": 24.726581573486328, "step": 685} +{"train_info/time_between_train_steps": 0.0052585601806640625, "step": 685} +{"info/global_step": 686, "train_info/time_within_train_step": 24.894516468048096, "step": 686} +{"train_info/time_between_train_steps": 0.005342245101928711, "step": 686} +{"info/global_step": 687, "train_info/time_within_train_step": 24.72626829147339, "step": 687} +{"train_info/time_between_train_steps": 0.00513148307800293, "step": 687} +{"info/global_step": 688, "train_info/time_within_train_step": 24.847247838974, "step": 688} +{"train_info/time_between_train_steps": 0.005245208740234375, "step": 688} +{"info/global_step": 689, "train_info/time_within_train_step": 24.753804206848145, "step": 689} +{"train_info/time_between_train_steps": 0.005299806594848633, "step": 689} +{"info/global_step": 690, "train_info/time_within_train_step": 24.853449821472168, "step": 690} +{"train_info/time_between_train_steps": 0.00519108772277832, "step": 690} +{"info/global_step": 691, "train_info/time_within_train_step": 24.737238883972168, "step": 691} +{"train_info/time_between_train_steps": 0.005361795425415039, "step": 691} +{"info/global_step": 692, "train_info/time_within_train_step": 24.909313440322876, "step": 692} +{"train_info/time_between_train_steps": 0.0052928924560546875, "step": 692} +{"info/global_step": 693, "train_info/time_within_train_step": 24.947462558746338, "step": 693} +{"train_info/time_between_train_steps": 0.0054094791412353516, "step": 693} +{"info/global_step": 694, "train_info/time_within_train_step": 24.900713443756104, "step": 694} +{"train_info/time_between_train_steps": 0.005281209945678711, "step": 694} +{"info/global_step": 695, "train_info/time_within_train_step": 24.753678798675537, "step": 695} +{"train_info/time_between_train_steps": 0.005476713180541992, "step": 695} +{"info/global_step": 696, "train_info/time_within_train_step": 24.878489017486572, "step": 696} +{"train_info/time_between_train_steps": 0.00524139404296875, "step": 696} +{"info/global_step": 697, "train_info/time_within_train_step": 24.79323172569275, "step": 697} +{"train_info/time_between_train_steps": 0.010657548904418945, "step": 697} +{"info/global_step": 698, "train_info/time_within_train_step": 24.883511066436768, "step": 698} +{"train_info/time_between_train_steps": 0.005227088928222656, "step": 698} +{"info/global_step": 699, "train_info/time_within_train_step": 24.738216400146484, "step": 699} +{"train_info/time_between_train_steps": 0.005158185958862305, "step": 699} +{"info/global_step": 700, "train_info/time_within_train_step": 24.870187520980835, "step": 700} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740940641, "_runtime": 18147}, "step": 700} +{"logs": {"train/loss": 2.9519, "train/learning_rate": 0.0002777777777777778, "train/epoch": 19.01, "_timestamp": 1740940641, "_runtime": 18147}, "step": 700} +{"train_info/time_between_train_steps": 35.66048455238342, "step": 700} +{"info/global_step": 701, "train_info/time_within_train_step": 24.74456024169922, "step": 701} +{"train_info/time_between_train_steps": 0.02018570899963379, "step": 701} +{"info/global_step": 702, "train_info/time_within_train_step": 24.725228786468506, "step": 702} +{"train_info/time_between_train_steps": 0.00501704216003418, "step": 702} +{"info/global_step": 703, "train_info/time_within_train_step": 24.746031999588013, "step": 703} +{"train_info/time_between_train_steps": 0.005163669586181641, "step": 703} +{"info/global_step": 704, "train_info/time_within_train_step": 25.035950422286987, "step": 704} +{"train_info/time_between_train_steps": 0.00525665283203125, "step": 704} +{"info/global_step": 705, "train_info/time_within_train_step": 26.60981059074402, "step": 705} +{"train_info/time_between_train_steps": 0.005349159240722656, "step": 705} +{"info/global_step": 706, "train_info/time_within_train_step": 25.25563073158264, "step": 706} +{"train_info/time_between_train_steps": 0.010217905044555664, "step": 706} +{"info/global_step": 707, "train_info/time_within_train_step": 24.76907444000244, "step": 707} +{"train_info/time_between_train_steps": 0.005998134613037109, "step": 707} +{"info/global_step": 708, "train_info/time_within_train_step": 24.72873592376709, "step": 708} +{"train_info/time_between_train_steps": 0.005183219909667969, "step": 708} +{"info/global_step": 709, "train_info/time_within_train_step": 24.815311908721924, "step": 709} +{"train_info/time_between_train_steps": 0.005053520202636719, "step": 709} +{"info/global_step": 710, "train_info/time_within_train_step": 24.72923231124878, "step": 710} +{"train_info/time_between_train_steps": 0.005084514617919922, "step": 710} +{"info/global_step": 711, "train_info/time_within_train_step": 24.73186159133911, "step": 711} +{"train_info/time_between_train_steps": 0.005202054977416992, "step": 711} +{"info/global_step": 712, "train_info/time_within_train_step": 24.791756629943848, "step": 712} +{"train_info/time_between_train_steps": 0.005011796951293945, "step": 712} +{"info/global_step": 713, "train_info/time_within_train_step": 24.736477851867676, "step": 713} +{"train_info/time_between_train_steps": 0.005075693130493164, "step": 713} +{"info/global_step": 714, "train_info/time_within_train_step": 24.743391275405884, "step": 714} +{"train_info/time_between_train_steps": 0.005237102508544922, "step": 714} +{"info/global_step": 715, "train_info/time_within_train_step": 24.78901171684265, "step": 715} +{"train_info/time_between_train_steps": 0.005066633224487305, "step": 715} +{"info/global_step": 716, "train_info/time_within_train_step": 24.741719961166382, "step": 716} +{"train_info/time_between_train_steps": 0.005087137222290039, "step": 716} +{"info/global_step": 717, "train_info/time_within_train_step": 24.80318331718445, "step": 717} +{"train_info/time_between_train_steps": 0.0052950382232666016, "step": 717} +{"info/global_step": 718, "train_info/time_within_train_step": 24.738154411315918, "step": 718} +{"train_info/time_between_train_steps": 0.005348682403564453, "step": 718} +{"info/global_step": 719, "train_info/time_within_train_step": 24.769057035446167, "step": 719} +{"train_info/time_between_train_steps": 0.005800008773803711, "step": 719} +{"info/global_step": 720, "train_info/time_within_train_step": 24.811625242233276, "step": 720} +{"train_info/time_between_train_steps": 0.00563359260559082, "step": 720} +{"train_info/time_between_train_steps": 16.818358421325684, "step": 720} +{"info/global_step": 721, "train_info/time_within_train_step": 24.727837800979614, "step": 721} +{"train_info/time_between_train_steps": 0.004906177520751953, "step": 721} +{"info/global_step": 722, "train_info/time_within_train_step": 24.86798119544983, "step": 722} +{"train_info/time_between_train_steps": 0.011068344116210938, "step": 722} +{"info/global_step": 723, "train_info/time_within_train_step": 24.73280358314514, "step": 723} +{"train_info/time_between_train_steps": 0.010228872299194336, "step": 723} +{"info/global_step": 724, "train_info/time_within_train_step": 24.96015453338623, "step": 724} +{"train_info/time_between_train_steps": 0.0051920413970947266, "step": 724} +{"info/global_step": 725, "train_info/time_within_train_step": 24.748666763305664, "step": 725} +{"train_info/time_between_train_steps": 0.00542449951171875, "step": 725} +{"info/global_step": 726, "train_info/time_within_train_step": 24.988094568252563, "step": 726} +{"train_info/time_between_train_steps": 0.00629115104675293, "step": 726} +{"info/global_step": 727, "train_info/time_within_train_step": 24.732168674468994, "step": 727} +{"train_info/time_between_train_steps": 0.005228281021118164, "step": 727} +{"info/global_step": 728, "train_info/time_within_train_step": 24.893797874450684, "step": 728} +{"train_info/time_between_train_steps": 0.005185842514038086, "step": 728} +{"info/global_step": 729, "train_info/time_within_train_step": 24.739916563034058, "step": 729} +{"train_info/time_between_train_steps": 0.005321502685546875, "step": 729} +{"info/global_step": 730, "train_info/time_within_train_step": 24.8559889793396, "step": 730} +{"train_info/time_between_train_steps": 0.005069255828857422, "step": 730} +{"info/global_step": 731, "train_info/time_within_train_step": 24.812737703323364, "step": 731} +{"train_info/time_between_train_steps": 0.005670309066772461, "step": 731} +{"info/global_step": 732, "train_info/time_within_train_step": 24.891059160232544, "step": 732} +{"train_info/time_between_train_steps": 0.00525212287902832, "step": 732} +{"info/global_step": 733, "train_info/time_within_train_step": 24.80687665939331, "step": 733} +{"train_info/time_between_train_steps": 0.005295276641845703, "step": 733} +{"info/global_step": 734, "train_info/time_within_train_step": 24.907938718795776, "step": 734} +{"train_info/time_between_train_steps": 0.005321025848388672, "step": 734} +{"info/global_step": 735, "train_info/time_within_train_step": 24.736103057861328, "step": 735} +{"train_info/time_between_train_steps": 0.005146980285644531, "step": 735} +{"info/global_step": 736, "train_info/time_within_train_step": 24.868266344070435, "step": 736} +{"train_info/time_between_train_steps": 0.005350351333618164, "step": 736} +{"info/global_step": 737, "train_info/time_within_train_step": 24.76714587211609, "step": 737} +{"train_info/time_between_train_steps": 0.030611038208007812, "step": 737} +{"info/global_step": 738, "train_info/time_within_train_step": 24.73313283920288, "step": 738} +{"train_info/time_between_train_steps": 0.0050089359283447266, "step": 738} +{"info/global_step": 739, "train_info/time_within_train_step": 24.717146635055542, "step": 739} +{"train_info/time_between_train_steps": 0.005067110061645508, "step": 739} +{"info/global_step": 740, "train_info/time_within_train_step": 24.814554691314697, "step": 740} +{"train_info/time_between_train_steps": 0.005107879638671875, "step": 740} +{"info/global_step": 741, "train_info/time_within_train_step": 24.725518703460693, "step": 741} +{"train_info/time_between_train_steps": 0.005017280578613281, "step": 741} +{"info/global_step": 742, "train_info/time_within_train_step": 24.72419238090515, "step": 742} +{"train_info/time_between_train_steps": 0.00503993034362793, "step": 742} +{"info/global_step": 743, "train_info/time_within_train_step": 24.72203254699707, "step": 743} +{"train_info/time_between_train_steps": 0.005415916442871094, "step": 743} +{"info/global_step": 744, "train_info/time_within_train_step": 24.7182354927063, "step": 744} +{"train_info/time_between_train_steps": 0.005050182342529297, "step": 744} +{"info/global_step": 745, "train_info/time_within_train_step": 24.762098789215088, "step": 745} +{"train_info/time_between_train_steps": 0.005243062973022461, "step": 745} +{"info/global_step": 746, "train_info/time_within_train_step": 24.75697088241577, "step": 746} +{"train_info/time_between_train_steps": 0.005057096481323242, "step": 746} +{"info/global_step": 747, "train_info/time_within_train_step": 24.724122047424316, "step": 747} +{"train_info/time_between_train_steps": 0.005172014236450195, "step": 747} +{"info/global_step": 748, "train_info/time_within_train_step": 24.765623569488525, "step": 748} +{"train_info/time_between_train_steps": 0.00522923469543457, "step": 748} +{"info/global_step": 749, "train_info/time_within_train_step": 24.722686290740967, "step": 749} +{"train_info/time_between_train_steps": 0.005038738250732422, "step": 749} +{"info/global_step": 750, "train_info/time_within_train_step": 24.723270893096924, "step": 750} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740941937, "_runtime": 19443}, "step": 750} +{"logs": {"train/loss": 2.9004, "train/learning_rate": 0.00025, "train/epoch": 20.02, "_timestamp": 1740941937, "_runtime": 19443}, "step": 750} +{"train_info/time_between_train_steps": 0.02609109878540039, "step": 750} +{"info/global_step": 751, "train_info/time_within_train_step": 24.72675323486328, "step": 751} +{"train_info/time_between_train_steps": 0.00533604621887207, "step": 751} +{"info/global_step": 752, "train_info/time_within_train_step": 24.724544525146484, "step": 752} +{"train_info/time_between_train_steps": 0.005096912384033203, "step": 752} +{"info/global_step": 753, "train_info/time_within_train_step": 24.732876777648926, "step": 753} +{"train_info/time_between_train_steps": 0.005362987518310547, "step": 753} +{"info/global_step": 754, "train_info/time_within_train_step": 24.733770608901978, "step": 754} +{"train_info/time_between_train_steps": 0.005328178405761719, "step": 754} +{"info/global_step": 755, "train_info/time_within_train_step": 24.84368634223938, "step": 755} +{"train_info/time_between_train_steps": 0.005506992340087891, "step": 755} +{"info/global_step": 756, "train_info/time_within_train_step": 24.76642942428589, "step": 756} +{"train_info/time_between_train_steps": 0.005799293518066406, "step": 756} +{"train_info/time_between_train_steps": 16.7277991771698, "step": 756} +{"info/global_step": 757, "train_info/time_within_train_step": 24.712950944900513, "step": 757} +{"train_info/time_between_train_steps": 0.009311676025390625, "step": 757} +{"info/global_step": 758, "train_info/time_within_train_step": 24.82468295097351, "step": 758} +{"train_info/time_between_train_steps": 0.004947662353515625, "step": 758} +{"info/global_step": 759, "train_info/time_within_train_step": 24.729102849960327, "step": 759} +{"train_info/time_between_train_steps": 0.005240440368652344, "step": 759} +{"info/global_step": 760, "train_info/time_within_train_step": 24.86588764190674, "step": 760} +{"train_info/time_between_train_steps": 0.0053403377532958984, "step": 760} +{"info/global_step": 761, "train_info/time_within_train_step": 24.7279634475708, "step": 761} +{"train_info/time_between_train_steps": 0.005190610885620117, "step": 761} +{"info/global_step": 762, "train_info/time_within_train_step": 24.93362045288086, "step": 762} +{"train_info/time_between_train_steps": 0.0053212642669677734, "step": 762} +{"info/global_step": 763, "train_info/time_within_train_step": 24.741868019104004, "step": 763} +{"train_info/time_between_train_steps": 0.0052301883697509766, "step": 763} +{"info/global_step": 764, "train_info/time_within_train_step": 24.874112129211426, "step": 764} +{"train_info/time_between_train_steps": 0.005081892013549805, "step": 764} +{"info/global_step": 765, "train_info/time_within_train_step": 24.733646631240845, "step": 765} +{"train_info/time_between_train_steps": 0.005427122116088867, "step": 765} +{"info/global_step": 766, "train_info/time_within_train_step": 24.887482166290283, "step": 766} +{"train_info/time_between_train_steps": 0.005199909210205078, "step": 766} +{"info/global_step": 767, "train_info/time_within_train_step": 24.73005175590515, "step": 767} +{"train_info/time_between_train_steps": 0.005299568176269531, "step": 767} +{"info/global_step": 768, "train_info/time_within_train_step": 24.898708820343018, "step": 768} +{"train_info/time_between_train_steps": 0.0053670406341552734, "step": 768} +{"info/global_step": 769, "train_info/time_within_train_step": 24.744231700897217, "step": 769} +{"train_info/time_between_train_steps": 0.005475521087646484, "step": 769} +{"info/global_step": 770, "train_info/time_within_train_step": 24.950664281845093, "step": 770} +{"train_info/time_between_train_steps": 0.005111217498779297, "step": 770} +{"info/global_step": 771, "train_info/time_within_train_step": 24.758899688720703, "step": 771} +{"train_info/time_between_train_steps": 0.010241508483886719, "step": 771} +{"info/global_step": 772, "train_info/time_within_train_step": 24.82810616493225, "step": 772} +{"train_info/time_between_train_steps": 0.005230426788330078, "step": 772} +{"info/global_step": 773, "train_info/time_within_train_step": 24.749608278274536, "step": 773} +{"train_info/time_between_train_steps": 0.026113510131835938, "step": 773} +{"info/global_step": 774, "train_info/time_within_train_step": 24.774337768554688, "step": 774} +{"train_info/time_between_train_steps": 0.0051593780517578125, "step": 774} +{"info/global_step": 775, "train_info/time_within_train_step": 24.7336368560791, "step": 775} +{"train_info/time_between_train_steps": 0.004976034164428711, "step": 775} +{"info/global_step": 776, "train_info/time_within_train_step": 24.792726516723633, "step": 776} +{"train_info/time_between_train_steps": 0.005236625671386719, "step": 776} +{"info/global_step": 777, "train_info/time_within_train_step": 24.724923610687256, "step": 777} +{"train_info/time_between_train_steps": 0.005137443542480469, "step": 777} +{"info/global_step": 778, "train_info/time_within_train_step": 24.74237370491028, "step": 778} +{"train_info/time_between_train_steps": 0.005007028579711914, "step": 778} +{"info/global_step": 779, "train_info/time_within_train_step": 24.79455828666687, "step": 779} +{"train_info/time_between_train_steps": 0.005336761474609375, "step": 779} +{"info/global_step": 780, "train_info/time_within_train_step": 24.7446391582489, "step": 780} +{"train_info/time_between_train_steps": 0.0050907135009765625, "step": 780} +{"info/global_step": 781, "train_info/time_within_train_step": 24.794323921203613, "step": 781} +{"train_info/time_between_train_steps": 0.005179405212402344, "step": 781} +{"info/global_step": 782, "train_info/time_within_train_step": 24.743619441986084, "step": 782} +{"train_info/time_between_train_steps": 0.005484104156494141, "step": 782} +{"info/global_step": 783, "train_info/time_within_train_step": 24.747774124145508, "step": 783} +{"train_info/time_between_train_steps": 0.005097389221191406, "step": 783} +{"info/global_step": 784, "train_info/time_within_train_step": 24.784904956817627, "step": 784} +{"train_info/time_between_train_steps": 0.005140542984008789, "step": 784} +{"info/global_step": 785, "train_info/time_within_train_step": 24.75478196144104, "step": 785} +{"train_info/time_between_train_steps": 0.005162954330444336, "step": 785} +{"info/global_step": 786, "train_info/time_within_train_step": 24.850297689437866, "step": 786} +{"train_info/time_between_train_steps": 0.005082845687866211, "step": 786} +{"info/global_step": 787, "train_info/time_within_train_step": 24.741159439086914, "step": 787} +{"train_info/time_between_train_steps": 0.005219221115112305, "step": 787} +{"info/global_step": 788, "train_info/time_within_train_step": 24.7983615398407, "step": 788} +{"train_info/time_between_train_steps": 0.005199432373046875, "step": 788} +{"info/global_step": 789, "train_info/time_within_train_step": 24.730390787124634, "step": 789} +{"train_info/time_between_train_steps": 0.005277395248413086, "step": 789} +{"info/global_step": 790, "train_info/time_within_train_step": 24.75391697883606, "step": 790} +{"train_info/time_between_train_steps": 0.005681037902832031, "step": 790} +{"info/global_step": 791, "train_info/time_within_train_step": 24.743228435516357, "step": 791} +{"train_info/time_between_train_steps": 0.0055773258209228516, "step": 791} +{"info/global_step": 792, "train_info/time_within_train_step": 24.77616572380066, "step": 792} +{"train_info/time_between_train_steps": 0.005561351776123047, "step": 792} +{"train_info/time_between_train_steps": 16.88741135597229, "step": 792} +{"info/global_step": 793, "train_info/time_within_train_step": 24.719029903411865, "step": 793} +{"train_info/time_between_train_steps": 0.004995584487915039, "step": 793} +{"info/global_step": 794, "train_info/time_within_train_step": 24.815943241119385, "step": 794} +{"train_info/time_between_train_steps": 0.004842281341552734, "step": 794} +{"info/global_step": 795, "train_info/time_within_train_step": 24.71244978904724, "step": 795} +{"train_info/time_between_train_steps": 0.004967212677001953, "step": 795} +{"info/global_step": 796, "train_info/time_within_train_step": 24.88487982749939, "step": 796} +{"train_info/time_between_train_steps": 0.005194902420043945, "step": 796} +{"info/global_step": 797, "train_info/time_within_train_step": 24.738104820251465, "step": 797} +{"train_info/time_between_train_steps": 0.005329132080078125, "step": 797} +{"info/global_step": 798, "train_info/time_within_train_step": 24.951988697052002, "step": 798} +{"train_info/time_between_train_steps": 0.005085468292236328, "step": 798} +{"info/global_step": 799, "train_info/time_within_train_step": 24.732640504837036, "step": 799} +{"train_info/time_between_train_steps": 0.00549626350402832, "step": 799} +{"info/global_step": 800, "train_info/time_within_train_step": 24.917522192001343, "step": 800} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740943212, "_runtime": 20718}, "step": 800} +{"logs": {"train/loss": 2.8864, "train/learning_rate": 0.00022222222222222218, "train/epoch": 22.01, "_timestamp": 1740943212, "_runtime": 20718}, "step": 800} +{"train_info/time_between_train_steps": 42.437042474746704, "step": 800} +{"info/global_step": 801, "train_info/time_within_train_step": 25.052353143692017, "step": 801} +{"train_info/time_between_train_steps": 0.009632587432861328, "step": 801} +{"info/global_step": 802, "train_info/time_within_train_step": 24.980876922607422, "step": 802} +{"train_info/time_between_train_steps": 0.005341291427612305, "step": 802} +{"info/global_step": 803, "train_info/time_within_train_step": 24.755086660385132, "step": 803} +{"train_info/time_between_train_steps": 0.0055887699127197266, "step": 803} +{"info/global_step": 804, "train_info/time_within_train_step": 24.84954023361206, "step": 804} +{"train_info/time_between_train_steps": 0.005357980728149414, "step": 804} +{"info/global_step": 805, "train_info/time_within_train_step": 27.091053009033203, "step": 805} +{"train_info/time_between_train_steps": 0.005497932434082031, "step": 805} +{"info/global_step": 806, "train_info/time_within_train_step": 25.128865003585815, "step": 806} +{"train_info/time_between_train_steps": 0.005378007888793945, "step": 806} +{"info/global_step": 807, "train_info/time_within_train_step": 25.2085440158844, "step": 807} +{"train_info/time_between_train_steps": 0.005428314208984375, "step": 807} +{"info/global_step": 808, "train_info/time_within_train_step": 24.907221794128418, "step": 808} +{"train_info/time_between_train_steps": 0.00550389289855957, "step": 808} +{"info/global_step": 809, "train_info/time_within_train_step": 24.733978271484375, "step": 809} +{"train_info/time_between_train_steps": 0.019988536834716797, "step": 809} +{"info/global_step": 810, "train_info/time_within_train_step": 24.789525270462036, "step": 810} +{"train_info/time_between_train_steps": 0.0048868656158447266, "step": 810} +{"info/global_step": 811, "train_info/time_within_train_step": 24.738815307617188, "step": 811} +{"train_info/time_between_train_steps": 0.006630897521972656, "step": 811} +{"info/global_step": 812, "train_info/time_within_train_step": 24.786779403686523, "step": 812} +{"train_info/time_between_train_steps": 0.0049703121185302734, "step": 812} +{"info/global_step": 813, "train_info/time_within_train_step": 24.73047423362732, "step": 813} +{"train_info/time_between_train_steps": 0.00559544563293457, "step": 813} +{"info/global_step": 814, "train_info/time_within_train_step": 24.736085891723633, "step": 814} +{"train_info/time_between_train_steps": 0.00511932373046875, "step": 814} +{"info/global_step": 815, "train_info/time_within_train_step": 24.792728185653687, "step": 815} +{"train_info/time_between_train_steps": 0.005035400390625, "step": 815} +{"info/global_step": 816, "train_info/time_within_train_step": 24.73744225502014, "step": 816} +{"train_info/time_between_train_steps": 0.005098819732666016, "step": 816} +{"info/global_step": 817, "train_info/time_within_train_step": 24.90293312072754, "step": 817} +{"train_info/time_between_train_steps": 0.005214214324951172, "step": 817} +{"info/global_step": 818, "train_info/time_within_train_step": 24.72643232345581, "step": 818} +{"train_info/time_between_train_steps": 0.005068302154541016, "step": 818} +{"info/global_step": 819, "train_info/time_within_train_step": 24.732880353927612, "step": 819} +{"train_info/time_between_train_steps": 0.005208015441894531, "step": 819} +{"info/global_step": 820, "train_info/time_within_train_step": 24.796231985092163, "step": 820} +{"train_info/time_between_train_steps": 0.005064725875854492, "step": 820} +{"info/global_step": 821, "train_info/time_within_train_step": 24.743776082992554, "step": 821} +{"train_info/time_between_train_steps": 0.005031585693359375, "step": 821} +{"info/global_step": 822, "train_info/time_within_train_step": 24.813937664031982, "step": 822} +{"train_info/time_between_train_steps": 0.0051784515380859375, "step": 822} +{"info/global_step": 823, "train_info/time_within_train_step": 24.740906715393066, "step": 823} +{"train_info/time_between_train_steps": 0.005007266998291016, "step": 823} +{"info/global_step": 824, "train_info/time_within_train_step": 24.7535240650177, "step": 824} +{"train_info/time_between_train_steps": 0.0051097869873046875, "step": 824} +{"info/global_step": 825, "train_info/time_within_train_step": 24.790861129760742, "step": 825} +{"train_info/time_between_train_steps": 0.005384683609008789, "step": 825} +{"info/global_step": 826, "train_info/time_within_train_step": 24.7320396900177, "step": 826} +{"train_info/time_between_train_steps": 0.005423069000244141, "step": 826} +{"info/global_step": 827, "train_info/time_within_train_step": 24.776249408721924, "step": 827} +{"train_info/time_between_train_steps": 0.0057830810546875, "step": 827} +{"info/global_step": 828, "train_info/time_within_train_step": 24.770889282226562, "step": 828} +{"train_info/time_between_train_steps": 0.010804176330566406, "step": 828} +{"train_info/time_between_train_steps": 16.630046129226685, "step": 828} +{"info/global_step": 829, "train_info/time_within_train_step": 24.98806643486023, "step": 829} +{"train_info/time_between_train_steps": 0.0049228668212890625, "step": 829} +{"info/global_step": 830, "train_info/time_within_train_step": 24.865620374679565, "step": 830} +{"train_info/time_between_train_steps": 0.005074262619018555, "step": 830} +{"info/global_step": 831, "train_info/time_within_train_step": 24.74364733695984, "step": 831} +{"train_info/time_between_train_steps": 0.005377769470214844, "step": 831} +{"info/global_step": 832, "train_info/time_within_train_step": 24.97427988052368, "step": 832} +{"train_info/time_between_train_steps": 0.0053517818450927734, "step": 832} +{"info/global_step": 833, "train_info/time_within_train_step": 24.742409706115723, "step": 833} +{"train_info/time_between_train_steps": 0.005179405212402344, "step": 833} +{"info/global_step": 834, "train_info/time_within_train_step": 24.932499408721924, "step": 834} +{"train_info/time_between_train_steps": 0.005446910858154297, "step": 834} +{"info/global_step": 835, "train_info/time_within_train_step": 24.754385948181152, "step": 835} +{"train_info/time_between_train_steps": 0.005126237869262695, "step": 835} +{"info/global_step": 836, "train_info/time_within_train_step": 24.856759786605835, "step": 836} +{"train_info/time_between_train_steps": 0.0052530765533447266, "step": 836} +{"info/global_step": 837, "train_info/time_within_train_step": 24.753978490829468, "step": 837} +{"train_info/time_between_train_steps": 0.005577802658081055, "step": 837} +{"info/global_step": 838, "train_info/time_within_train_step": 24.92206907272339, "step": 838} +{"train_info/time_between_train_steps": 0.0051555633544921875, "step": 838} +{"info/global_step": 839, "train_info/time_within_train_step": 24.74101161956787, "step": 839} +{"train_info/time_between_train_steps": 0.005464076995849609, "step": 839} +{"info/global_step": 840, "train_info/time_within_train_step": 24.885008335113525, "step": 840} +{"train_info/time_between_train_steps": 0.005258083343505859, "step": 840} +{"info/global_step": 841, "train_info/time_within_train_step": 24.738019704818726, "step": 841} +{"train_info/time_between_train_steps": 0.00534510612487793, "step": 841} +{"info/global_step": 842, "train_info/time_within_train_step": 24.91239023208618, "step": 842} +{"train_info/time_between_train_steps": 0.005183696746826172, "step": 842} +{"info/global_step": 843, "train_info/time_within_train_step": 24.73810648918152, "step": 843} +{"train_info/time_between_train_steps": 0.005187511444091797, "step": 843} +{"info/global_step": 844, "train_info/time_within_train_step": 24.82477831840515, "step": 844} +{"train_info/time_between_train_steps": 0.0052661895751953125, "step": 844} +{"info/global_step": 845, "train_info/time_within_train_step": 24.75134563446045, "step": 845} +{"train_info/time_between_train_steps": 0.03960990905761719, "step": 845} +{"info/global_step": 846, "train_info/time_within_train_step": 24.722983360290527, "step": 846} +{"train_info/time_between_train_steps": 0.00496983528137207, "step": 846} +{"info/global_step": 847, "train_info/time_within_train_step": 24.814876079559326, "step": 847} +{"train_info/time_between_train_steps": 0.0049283504486083984, "step": 847} +{"info/global_step": 848, "train_info/time_within_train_step": 24.823065519332886, "step": 848} +{"train_info/time_between_train_steps": 0.01144719123840332, "step": 848} +{"info/global_step": 849, "train_info/time_within_train_step": 24.726343393325806, "step": 849} +{"train_info/time_between_train_steps": 0.005032777786254883, "step": 849} +{"info/global_step": 850, "train_info/time_within_train_step": 24.764631271362305, "step": 850} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740944518, "_runtime": 22024}, "step": 850} +{"logs": {"train/loss": 2.8145, "train/learning_rate": 0.00019444444444444443, "train/epoch": 23.02, "_timestamp": 1740944518, "_runtime": 22024}, "step": 850} +{"train_info/time_between_train_steps": 0.026914358139038086, "step": 850} +{"info/global_step": 851, "train_info/time_within_train_step": 24.75732159614563, "step": 851} +{"train_info/time_between_train_steps": 0.0051746368408203125, "step": 851} +{"info/global_step": 852, "train_info/time_within_train_step": 24.729362726211548, "step": 852} +{"train_info/time_between_train_steps": 0.005028963088989258, "step": 852} +{"info/global_step": 853, "train_info/time_within_train_step": 24.738094091415405, "step": 853} +{"train_info/time_between_train_steps": 0.0051593780517578125, "step": 853} +{"info/global_step": 854, "train_info/time_within_train_step": 24.735393524169922, "step": 854} +{"train_info/time_between_train_steps": 0.005089759826660156, "step": 854} +{"info/global_step": 855, "train_info/time_within_train_step": 24.722259283065796, "step": 855} +{"train_info/time_between_train_steps": 0.005051612854003906, "step": 855} +{"info/global_step": 856, "train_info/time_within_train_step": 24.74247670173645, "step": 856} +{"train_info/time_between_train_steps": 0.005130290985107422, "step": 856} +{"info/global_step": 857, "train_info/time_within_train_step": 24.73270845413208, "step": 857} +{"train_info/time_between_train_steps": 0.0052297115325927734, "step": 857} +{"info/global_step": 858, "train_info/time_within_train_step": 24.725228309631348, "step": 858} +{"train_info/time_between_train_steps": 0.005077838897705078, "step": 858} +{"info/global_step": 859, "train_info/time_within_train_step": 24.72916030883789, "step": 859} +{"train_info/time_between_train_steps": 0.0053250789642333984, "step": 859} +{"info/global_step": 860, "train_info/time_within_train_step": 24.794079303741455, "step": 860} +{"train_info/time_between_train_steps": 0.005107879638671875, "step": 860} +{"info/global_step": 861, "train_info/time_within_train_step": 24.73529052734375, "step": 861} +{"train_info/time_between_train_steps": 0.005443572998046875, "step": 861} +{"info/global_step": 862, "train_info/time_within_train_step": 24.73827815055847, "step": 862} +{"train_info/time_between_train_steps": 0.0054187774658203125, "step": 862} +{"info/global_step": 863, "train_info/time_within_train_step": 24.84064745903015, "step": 863} +{"train_info/time_between_train_steps": 0.0055315494537353516, "step": 863} +{"info/global_step": 864, "train_info/time_within_train_step": 24.7612202167511, "step": 864} +{"train_info/time_between_train_steps": 0.00580143928527832, "step": 864} +{"train_info/time_between_train_steps": 16.897115468978882, "step": 864} +{"info/global_step": 865, "train_info/time_within_train_step": 24.731802225112915, "step": 865} +{"train_info/time_between_train_steps": 0.004931926727294922, "step": 865} +{"info/global_step": 866, "train_info/time_within_train_step": 24.843568086624146, "step": 866} +{"train_info/time_between_train_steps": 0.005061149597167969, "step": 866} +{"info/global_step": 867, "train_info/time_within_train_step": 24.92856454849243, "step": 867} +{"train_info/time_between_train_steps": 0.009918689727783203, "step": 867} +{"info/global_step": 868, "train_info/time_within_train_step": 25.01024079322815, "step": 868} +{"train_info/time_between_train_steps": 0.005315065383911133, "step": 868} +{"info/global_step": 869, "train_info/time_within_train_step": 24.86758589744568, "step": 869} +{"train_info/time_between_train_steps": 0.005348682403564453, "step": 869} +{"info/global_step": 870, "train_info/time_within_train_step": 24.919381618499756, "step": 870} +{"train_info/time_between_train_steps": 0.0053327083587646484, "step": 870} +{"info/global_step": 871, "train_info/time_within_train_step": 24.748022079467773, "step": 871} +{"train_info/time_between_train_steps": 0.005083799362182617, "step": 871} +{"info/global_step": 872, "train_info/time_within_train_step": 24.84737515449524, "step": 872} +{"train_info/time_between_train_steps": 0.0051920413970947266, "step": 872} +{"info/global_step": 873, "train_info/time_within_train_step": 24.75995659828186, "step": 873} +{"train_info/time_between_train_steps": 0.0056378841400146484, "step": 873} +{"info/global_step": 874, "train_info/time_within_train_step": 24.924317836761475, "step": 874} +{"train_info/time_between_train_steps": 0.009913206100463867, "step": 874} +{"info/global_step": 875, "train_info/time_within_train_step": 24.735528469085693, "step": 875} +{"train_info/time_between_train_steps": 0.010497808456420898, "step": 875} +{"info/global_step": 876, "train_info/time_within_train_step": 24.90837073326111, "step": 876} +{"train_info/time_between_train_steps": 0.005293130874633789, "step": 876} +{"info/global_step": 877, "train_info/time_within_train_step": 24.728323936462402, "step": 877} +{"train_info/time_between_train_steps": 0.005308389663696289, "step": 877} +{"info/global_step": 878, "train_info/time_within_train_step": 24.9392831325531, "step": 878} +{"train_info/time_between_train_steps": 0.010026216506958008, "step": 878} +{"info/global_step": 879, "train_info/time_within_train_step": 24.952231884002686, "step": 879} +{"train_info/time_between_train_steps": 0.005426168441772461, "step": 879} +{"info/global_step": 880, "train_info/time_within_train_step": 24.851560831069946, "step": 880} +{"train_info/time_between_train_steps": 0.005367755889892578, "step": 880} +{"info/global_step": 881, "train_info/time_within_train_step": 24.822258472442627, "step": 881} +{"train_info/time_between_train_steps": 0.03595280647277832, "step": 881} +{"info/global_step": 882, "train_info/time_within_train_step": 24.734315156936646, "step": 882} +{"train_info/time_between_train_steps": 0.005110502243041992, "step": 882} +{"info/global_step": 883, "train_info/time_within_train_step": 24.72321844100952, "step": 883} +{"train_info/time_between_train_steps": 0.005001068115234375, "step": 883} +{"info/global_step": 884, "train_info/time_within_train_step": 24.79387664794922, "step": 884} +{"train_info/time_between_train_steps": 0.0050241947174072266, "step": 884} +{"info/global_step": 885, "train_info/time_within_train_step": 24.74095320701599, "step": 885} +{"train_info/time_between_train_steps": 0.005158662796020508, "step": 885} +{"info/global_step": 886, "train_info/time_within_train_step": 24.79170870780945, "step": 886} +{"train_info/time_between_train_steps": 0.005056142807006836, "step": 886} +{"info/global_step": 887, "train_info/time_within_train_step": 24.787779092788696, "step": 887} +{"train_info/time_between_train_steps": 0.014841556549072266, "step": 887} +{"info/global_step": 888, "train_info/time_within_train_step": 25.11621379852295, "step": 888} +{"train_info/time_between_train_steps": 0.005013704299926758, "step": 888} +{"info/global_step": 889, "train_info/time_within_train_step": 24.76721501350403, "step": 889} +{"train_info/time_between_train_steps": 0.014666318893432617, "step": 889} +{"info/global_step": 890, "train_info/time_within_train_step": 25.11317777633667, "step": 890} +{"train_info/time_between_train_steps": 0.014566183090209961, "step": 890} +{"info/global_step": 891, "train_info/time_within_train_step": 25.122668504714966, "step": 891} +{"train_info/time_between_train_steps": 0.00955343246459961, "step": 891} +{"info/global_step": 892, "train_info/time_within_train_step": 24.79699683189392, "step": 892} +{"train_info/time_between_train_steps": 0.014750003814697266, "step": 892} +{"info/global_step": 893, "train_info/time_within_train_step": 25.1306893825531, "step": 893} +{"train_info/time_between_train_steps": 0.0075571537017822266, "step": 893} +{"info/global_step": 894, "train_info/time_within_train_step": 25.14845371246338, "step": 894} +{"train_info/time_between_train_steps": 0.014394044876098633, "step": 894} +{"info/global_step": 895, "train_info/time_within_train_step": 24.726760387420654, "step": 895} +{"train_info/time_between_train_steps": 0.00531005859375, "step": 895} +{"info/global_step": 896, "train_info/time_within_train_step": 24.760884046554565, "step": 896} +{"train_info/time_between_train_steps": 0.00504612922668457, "step": 896} +{"info/global_step": 897, "train_info/time_within_train_step": 24.74559450149536, "step": 897} +{"train_info/time_between_train_steps": 0.005262851715087891, "step": 897} +{"info/global_step": 898, "train_info/time_within_train_step": 24.797462701797485, "step": 898} +{"train_info/time_between_train_steps": 0.005486011505126953, "step": 898} +{"info/global_step": 899, "train_info/time_within_train_step": 24.75855565071106, "step": 899} +{"train_info/time_between_train_steps": 0.005507707595825195, "step": 899} +{"info/global_step": 900, "train_info/time_within_train_step": 24.766134023666382, "step": 900} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740945778, "_runtime": 23284}, "step": 900} +{"logs": {"train/loss": 2.7771, "train/learning_rate": 0.00016666666666666666, "train/epoch": 24.03, "_timestamp": 1740945778, "_runtime": 23284}, "step": 900} +{"train_info/time_between_train_steps": 41.235666275024414, "step": 900} +{"train_info/time_between_train_steps": 58.015663146972656, "step": 900} +{"info/global_step": 901, "train_info/time_within_train_step": 24.758694171905518, "step": 901} +{"train_info/time_between_train_steps": 0.004852771759033203, "step": 901} +{"info/global_step": 902, "train_info/time_within_train_step": 24.871646881103516, "step": 902} +{"train_info/time_between_train_steps": 0.0052144527435302734, "step": 902} +{"info/global_step": 903, "train_info/time_within_train_step": 24.776259422302246, "step": 903} +{"train_info/time_between_train_steps": 0.0053288936614990234, "step": 903} +{"info/global_step": 904, "train_info/time_within_train_step": 24.856091737747192, "step": 904} +{"train_info/time_between_train_steps": 0.005385875701904297, "step": 904} +{"info/global_step": 905, "train_info/time_within_train_step": 27.0196795463562, "step": 905} +{"train_info/time_between_train_steps": 0.005720376968383789, "step": 905} +{"info/global_step": 906, "train_info/time_within_train_step": 25.075500965118408, "step": 906} +{"train_info/time_between_train_steps": 0.005583047866821289, "step": 906} +{"info/global_step": 907, "train_info/time_within_train_step": 25.124600648880005, "step": 907} +{"train_info/time_between_train_steps": 0.0054340362548828125, "step": 907} +{"info/global_step": 908, "train_info/time_within_train_step": 26.235055208206177, "step": 908} +{"train_info/time_between_train_steps": 0.006363630294799805, "step": 908} +{"info/global_step": 909, "train_info/time_within_train_step": 24.91216492652893, "step": 909} +{"train_info/time_between_train_steps": 0.007965803146362305, "step": 909} +{"info/global_step": 910, "train_info/time_within_train_step": 24.94987916946411, "step": 910} +{"train_info/time_between_train_steps": 0.005271196365356445, "step": 910} +{"info/global_step": 911, "train_info/time_within_train_step": 24.756555557250977, "step": 911} +{"train_info/time_between_train_steps": 0.005629062652587891, "step": 911} +{"info/global_step": 912, "train_info/time_within_train_step": 24.862255573272705, "step": 912} +{"train_info/time_between_train_steps": 0.00527191162109375, "step": 912} +{"info/global_step": 913, "train_info/time_within_train_step": 24.753435850143433, "step": 913} +{"train_info/time_between_train_steps": 0.015625953674316406, "step": 913} +{"info/global_step": 914, "train_info/time_within_train_step": 25.145651817321777, "step": 914} +{"train_info/time_between_train_steps": 0.007041454315185547, "step": 914} +{"info/global_step": 915, "train_info/time_within_train_step": 24.779462337493896, "step": 915} +{"train_info/time_between_train_steps": 0.005808353424072266, "step": 915} +{"info/global_step": 916, "train_info/time_within_train_step": 24.88184905052185, "step": 916} +{"train_info/time_between_train_steps": 0.00795888900756836, "step": 916} +{"info/global_step": 917, "train_info/time_within_train_step": 24.8205885887146, "step": 917} +{"train_info/time_between_train_steps": 0.019901514053344727, "step": 917} +{"info/global_step": 918, "train_info/time_within_train_step": 24.7221097946167, "step": 918} +{"train_info/time_between_train_steps": 0.005067348480224609, "step": 918} +{"info/global_step": 919, "train_info/time_within_train_step": 24.73346996307373, "step": 919} +{"train_info/time_between_train_steps": 0.005240201950073242, "step": 919} +{"info/global_step": 920, "train_info/time_within_train_step": 24.796372890472412, "step": 920} +{"train_info/time_between_train_steps": 0.005086660385131836, "step": 920} +{"info/global_step": 921, "train_info/time_within_train_step": 24.733612537384033, "step": 921} +{"train_info/time_between_train_steps": 0.0050389766693115234, "step": 921} +{"info/global_step": 922, "train_info/time_within_train_step": 24.812243700027466, "step": 922} +{"train_info/time_between_train_steps": 0.005752086639404297, "step": 922} +{"info/global_step": 923, "train_info/time_within_train_step": 24.740467309951782, "step": 923} +{"train_info/time_between_train_steps": 0.0054624080657958984, "step": 923} +{"info/global_step": 924, "train_info/time_within_train_step": 24.827512979507446, "step": 924} +{"train_info/time_between_train_steps": 0.0052318572998046875, "step": 924} +{"info/global_step": 925, "train_info/time_within_train_step": 24.73528289794922, "step": 925} +{"train_info/time_between_train_steps": 0.0056247711181640625, "step": 925} +{"info/global_step": 926, "train_info/time_within_train_step": 24.749306678771973, "step": 926} +{"train_info/time_between_train_steps": 0.0053899288177490234, "step": 926} +{"info/global_step": 927, "train_info/time_within_train_step": 24.869916439056396, "step": 927} +{"train_info/time_between_train_steps": 0.0053386688232421875, "step": 927} +{"info/global_step": 928, "train_info/time_within_train_step": 24.729745626449585, "step": 928} +{"train_info/time_between_train_steps": 0.005110740661621094, "step": 928} +{"info/global_step": 929, "train_info/time_within_train_step": 24.81226348876953, "step": 929} +{"train_info/time_between_train_steps": 0.005056619644165039, "step": 929} +{"info/global_step": 930, "train_info/time_within_train_step": 24.74826693534851, "step": 930} +{"train_info/time_between_train_steps": 0.006301164627075195, "step": 930} +{"info/global_step": 931, "train_info/time_within_train_step": 24.73659658432007, "step": 931} +{"train_info/time_between_train_steps": 0.004988908767700195, "step": 931} +{"info/global_step": 932, "train_info/time_within_train_step": 24.741992473602295, "step": 932} +{"train_info/time_between_train_steps": 0.005137205123901367, "step": 932} +{"info/global_step": 933, "train_info/time_within_train_step": 24.757463693618774, "step": 933} +{"train_info/time_between_train_steps": 0.005371809005737305, "step": 933} +{"info/global_step": 934, "train_info/time_within_train_step": 24.808403491973877, "step": 934} +{"train_info/time_between_train_steps": 0.010364294052124023, "step": 934} +{"info/global_step": 935, "train_info/time_within_train_step": 24.757939338684082, "step": 935} +{"train_info/time_between_train_steps": 0.005812406539916992, "step": 935} +{"info/global_step": 936, "train_info/time_within_train_step": 24.76593041419983, "step": 936} +{"train_info/time_between_train_steps": 0.00574183464050293, "step": 936} +{"train_info/time_between_train_steps": 16.969599962234497, "step": 936} +{"info/global_step": 937, "train_info/time_within_train_step": 24.7241108417511, "step": 937} +{"train_info/time_between_train_steps": 0.0049571990966796875, "step": 937} +{"info/global_step": 938, "train_info/time_within_train_step": 24.910980939865112, "step": 938} +{"train_info/time_between_train_steps": 0.00526738166809082, "step": 938} +{"info/global_step": 939, "train_info/time_within_train_step": 24.844417333602905, "step": 939} +{"train_info/time_between_train_steps": 0.005338907241821289, "step": 939} +{"info/global_step": 940, "train_info/time_within_train_step": 24.862175941467285, "step": 940} +{"train_info/time_between_train_steps": 0.005240678787231445, "step": 940} +{"info/global_step": 941, "train_info/time_within_train_step": 24.731878519058228, "step": 941} +{"train_info/time_between_train_steps": 0.005239725112915039, "step": 941} +{"info/global_step": 942, "train_info/time_within_train_step": 24.87251615524292, "step": 942} +{"train_info/time_between_train_steps": 0.0052793025970458984, "step": 942} +{"info/global_step": 943, "train_info/time_within_train_step": 24.734192371368408, "step": 943} +{"train_info/time_between_train_steps": 0.005230903625488281, "step": 943} +{"info/global_step": 944, "train_info/time_within_train_step": 24.929403066635132, "step": 944} +{"train_info/time_between_train_steps": 0.005247592926025391, "step": 944} +{"info/global_step": 945, "train_info/time_within_train_step": 24.72746205329895, "step": 945} +{"train_info/time_between_train_steps": 0.005064487457275391, "step": 945} +{"info/global_step": 946, "train_info/time_within_train_step": 24.847947597503662, "step": 946} +{"train_info/time_between_train_steps": 0.005189657211303711, "step": 946} +{"info/global_step": 947, "train_info/time_within_train_step": 24.751914501190186, "step": 947} +{"train_info/time_between_train_steps": 0.0056116580963134766, "step": 947} +{"info/global_step": 948, "train_info/time_within_train_step": 24.90005588531494, "step": 948} +{"train_info/time_between_train_steps": 0.005216121673583984, "step": 948} +{"info/global_step": 949, "train_info/time_within_train_step": 24.73913264274597, "step": 949} +{"train_info/time_between_train_steps": 0.0054128170013427734, "step": 949} +{"info/global_step": 950, "train_info/time_within_train_step": 24.90783977508545, "step": 950} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740947101, "_runtime": 24607}, "step": 950} +{"logs": {"train/loss": 2.7731, "train/learning_rate": 0.0001388888888888889, "train/epoch": 26.01, "_timestamp": 1740947101, "_runtime": 24607}, "step": 950} +{"train_info/time_between_train_steps": 0.03133702278137207, "step": 950} +{"info/global_step": 951, "train_info/time_within_train_step": 24.939258575439453, "step": 951} +{"train_info/time_between_train_steps": 0.004946470260620117, "step": 951} +{"info/global_step": 952, "train_info/time_within_train_step": 24.84143567085266, "step": 952} +{"train_info/time_between_train_steps": 0.005498170852661133, "step": 952} +{"info/global_step": 953, "train_info/time_within_train_step": 24.7537202835083, "step": 953} +{"train_info/time_between_train_steps": 0.021650075912475586, "step": 953} +{"info/global_step": 954, "train_info/time_within_train_step": 24.71685767173767, "step": 954} +{"train_info/time_between_train_steps": 0.00502467155456543, "step": 954} +{"info/global_step": 955, "train_info/time_within_train_step": 24.863976001739502, "step": 955} +{"train_info/time_between_train_steps": 0.005370616912841797, "step": 955} +{"info/global_step": 956, "train_info/time_within_train_step": 24.73933696746826, "step": 956} +{"train_info/time_between_train_steps": 0.005258321762084961, "step": 956} +{"info/global_step": 957, "train_info/time_within_train_step": 24.746155500411987, "step": 957} +{"train_info/time_between_train_steps": 0.005596637725830078, "step": 957} +{"info/global_step": 958, "train_info/time_within_train_step": 24.748040199279785, "step": 958} +{"train_info/time_between_train_steps": 0.0052890777587890625, "step": 958} +{"info/global_step": 959, "train_info/time_within_train_step": 24.73845410346985, "step": 959} +{"train_info/time_between_train_steps": 0.005265235900878906, "step": 959} +{"info/global_step": 960, "train_info/time_within_train_step": 24.738687992095947, "step": 960} +{"train_info/time_between_train_steps": 0.0053272247314453125, "step": 960} +{"info/global_step": 961, "train_info/time_within_train_step": 24.74533987045288, "step": 961} +{"train_info/time_between_train_steps": 0.005397319793701172, "step": 961} +{"info/global_step": 962, "train_info/time_within_train_step": 24.72673773765564, "step": 962} +{"train_info/time_between_train_steps": 0.005103349685668945, "step": 962} +{"info/global_step": 963, "train_info/time_within_train_step": 24.799788236618042, "step": 963} +{"train_info/time_between_train_steps": 0.005211353302001953, "step": 963} +{"info/global_step": 964, "train_info/time_within_train_step": 24.754240036010742, "step": 964} +{"train_info/time_between_train_steps": 0.005174160003662109, "step": 964} +{"info/global_step": 965, "train_info/time_within_train_step": 24.81948208808899, "step": 965} +{"train_info/time_between_train_steps": 0.005200386047363281, "step": 965} +{"info/global_step": 966, "train_info/time_within_train_step": 24.74086570739746, "step": 966} +{"train_info/time_between_train_steps": 0.005432844161987305, "step": 966} +{"info/global_step": 967, "train_info/time_within_train_step": 24.79111361503601, "step": 967} +{"train_info/time_between_train_steps": 0.0052568912506103516, "step": 967} +{"info/global_step": 968, "train_info/time_within_train_step": 24.7299222946167, "step": 968} +{"train_info/time_between_train_steps": 0.005232810974121094, "step": 968} +{"info/global_step": 969, "train_info/time_within_train_step": 24.73884630203247, "step": 969} +{"train_info/time_between_train_steps": 0.005269050598144531, "step": 969} +{"info/global_step": 970, "train_info/time_within_train_step": 24.8039608001709, "step": 970} +{"train_info/time_between_train_steps": 0.0062177181243896484, "step": 970} +{"info/global_step": 971, "train_info/time_within_train_step": 24.837851524353027, "step": 971} +{"train_info/time_between_train_steps": 0.005538225173950195, "step": 971} +{"info/global_step": 972, "train_info/time_within_train_step": 24.76129126548767, "step": 972} +{"train_info/time_between_train_steps": 0.005683422088623047, "step": 972} +{"train_info/time_between_train_steps": 16.58024525642395, "step": 972} +{"info/global_step": 973, "train_info/time_within_train_step": 24.729690551757812, "step": 973} +{"train_info/time_between_train_steps": 0.004855632781982422, "step": 973} +{"info/global_step": 974, "train_info/time_within_train_step": 24.828939199447632, "step": 974} +{"train_info/time_between_train_steps": 0.005277395248413086, "step": 974} +{"info/global_step": 975, "train_info/time_within_train_step": 24.73785138130188, "step": 975} +{"train_info/time_between_train_steps": 0.010343790054321289, "step": 975} +{"info/global_step": 976, "train_info/time_within_train_step": 24.873166799545288, "step": 976} +{"train_info/time_between_train_steps": 0.005306720733642578, "step": 976} +{"info/global_step": 977, "train_info/time_within_train_step": 24.728734254837036, "step": 977} +{"train_info/time_between_train_steps": 0.005237579345703125, "step": 977} +{"info/global_step": 978, "train_info/time_within_train_step": 24.89982318878174, "step": 978} +{"train_info/time_between_train_steps": 0.005884647369384766, "step": 978} +{"info/global_step": 979, "train_info/time_within_train_step": 24.801409244537354, "step": 979} +{"train_info/time_between_train_steps": 0.0051767826080322266, "step": 979} +{"info/global_step": 980, "train_info/time_within_train_step": 24.88764715194702, "step": 980} +{"train_info/time_between_train_steps": 0.005053520202636719, "step": 980} +{"info/global_step": 981, "train_info/time_within_train_step": 24.754468202590942, "step": 981} +{"train_info/time_between_train_steps": 0.005682945251464844, "step": 981} +{"info/global_step": 982, "train_info/time_within_train_step": 24.883609533309937, "step": 982} +{"train_info/time_between_train_steps": 0.0051615238189697266, "step": 982} +{"info/global_step": 983, "train_info/time_within_train_step": 24.734298944473267, "step": 983} +{"train_info/time_between_train_steps": 0.005302906036376953, "step": 983} +{"info/global_step": 984, "train_info/time_within_train_step": 24.904595375061035, "step": 984} +{"train_info/time_between_train_steps": 0.005380868911743164, "step": 984} +{"info/global_step": 985, "train_info/time_within_train_step": 24.73311161994934, "step": 985} +{"train_info/time_between_train_steps": 0.005326747894287109, "step": 985} +{"info/global_step": 986, "train_info/time_within_train_step": 25.019901037216187, "step": 986} +{"train_info/time_between_train_steps": 0.005370616912841797, "step": 986} +{"info/global_step": 987, "train_info/time_within_train_step": 24.742554426193237, "step": 987} +{"train_info/time_between_train_steps": 0.005396366119384766, "step": 987} +{"info/global_step": 988, "train_info/time_within_train_step": 24.827547550201416, "step": 988} +{"train_info/time_between_train_steps": 0.005346059799194336, "step": 988} +{"info/global_step": 989, "train_info/time_within_train_step": 24.73936676979065, "step": 989} +{"train_info/time_between_train_steps": 0.03734159469604492, "step": 989} +{"info/global_step": 990, "train_info/time_within_train_step": 24.724319458007812, "step": 990} +{"train_info/time_between_train_steps": 0.005114316940307617, "step": 990} +{"info/global_step": 991, "train_info/time_within_train_step": 24.79459047317505, "step": 991} +{"train_info/time_between_train_steps": 0.0050656795501708984, "step": 991} +{"info/global_step": 992, "train_info/time_within_train_step": 24.724730014801025, "step": 992} +{"train_info/time_between_train_steps": 0.005238533020019531, "step": 992} +{"info/global_step": 993, "train_info/time_within_train_step": 24.79643177986145, "step": 993} +{"train_info/time_between_train_steps": 0.005318880081176758, "step": 993} +{"info/global_step": 994, "train_info/time_within_train_step": 24.72629737854004, "step": 994} +{"train_info/time_between_train_steps": 0.00502467155456543, "step": 994} +{"info/global_step": 995, "train_info/time_within_train_step": 24.724307537078857, "step": 995} +{"train_info/time_between_train_steps": 0.005175113677978516, "step": 995} +{"info/global_step": 996, "train_info/time_within_train_step": 24.796952724456787, "step": 996} +{"train_info/time_between_train_steps": 0.0050449371337890625, "step": 996} +{"info/global_step": 997, "train_info/time_within_train_step": 24.742318391799927, "step": 997} +{"train_info/time_between_train_steps": 0.005236625671386719, "step": 997} +{"info/global_step": 998, "train_info/time_within_train_step": 24.80823254585266, "step": 998} +{"train_info/time_between_train_steps": 0.00518488883972168, "step": 998} +{"info/global_step": 999, "train_info/time_within_train_step": 24.73631191253662, "step": 999} +{"train_info/time_between_train_steps": 0.005114078521728516, "step": 999} +{"info/global_step": 1000, "train_info/time_within_train_step": 24.731943130493164, "step": 1000} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 19104.0, "train_info/memory_max_reserved": 19104.0, "_timestamp": 1740948359, "_runtime": 25865}, "step": 1000} +{"logs": {"train/loss": 2.709, "train/learning_rate": 0.00011111111111111109, "train/epoch": 27.02, "_timestamp": 1740948359, "_runtime": 25865}, "step": 1000} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 21960.0, "train_info/memory_max_reserved": 21960.0, "_timestamp": 1740948362, "_runtime": 25868}, "step": 1000} +{"logs": {"eval/loss": 3.4275035858154297, "eval/runtime": 2.4722, "eval/samples_per_second": 53.393, "eval/steps_per_second": 3.64, "train/epoch": 27.02, "_timestamp": 1740948362, "_runtime": 25868}, "step": 1000} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 21960.0, "train_info/memory_max_reserved": 21960.0, "_timestamp": 1740948362, "_runtime": 25868}, "step": 1000} +{"logs": {"eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_loss": 3.4275035858154297, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_ppl": 30.7996579936446, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_runtime": 2.4722, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 53.393, "train/epoch": 27.02, "_timestamp": 1740948362, "_runtime": 25868}, "step": 1000} +{"train_info/time_between_train_steps": 59.96675896644592, "step": 1000} +{"info/global_step": 1001, "train_info/time_within_train_step": 24.805634260177612, "step": 1001} +{"train_info/time_between_train_steps": 0.004919767379760742, "step": 1001} +{"info/global_step": 1002, "train_info/time_within_train_step": 24.711248874664307, "step": 1002} +{"train_info/time_between_train_steps": 0.00493311882019043, "step": 1002} +{"info/global_step": 1003, "train_info/time_within_train_step": 24.737095594406128, "step": 1003} +{"train_info/time_between_train_steps": 0.005265951156616211, "step": 1003} +{"info/global_step": 1004, "train_info/time_within_train_step": 24.761247396469116, "step": 1004} +{"train_info/time_between_train_steps": 0.00506281852722168, "step": 1004} +{"info/global_step": 1005, "train_info/time_within_train_step": 24.75206732749939, "step": 1005} +{"train_info/time_between_train_steps": 0.005234479904174805, "step": 1005} +{"info/global_step": 1006, "train_info/time_within_train_step": 24.806004762649536, "step": 1006} +{"train_info/time_between_train_steps": 0.005641937255859375, "step": 1006} +{"info/global_step": 1007, "train_info/time_within_train_step": 25.077558994293213, "step": 1007} +{"train_info/time_between_train_steps": 0.006220102310180664, "step": 1007} +{"info/global_step": 1008, "train_info/time_within_train_step": 28.10293436050415, "step": 1008} +{"train_info/time_between_train_steps": 0.005970954895019531, "step": 1008} +{"train_info/time_between_train_steps": 21.63965129852295, "step": 1008} +{"info/global_step": 1009, "train_info/time_within_train_step": 27.353113889694214, "step": 1009} +{"train_info/time_between_train_steps": 0.004888057708740234, "step": 1009} +{"info/global_step": 1010, "train_info/time_within_train_step": 24.827672719955444, "step": 1010} +{"train_info/time_between_train_steps": 0.00488591194152832, "step": 1010} +{"info/global_step": 1011, "train_info/time_within_train_step": 24.73745560646057, "step": 1011} +{"train_info/time_between_train_steps": 0.0051996707916259766, "step": 1011} +{"info/global_step": 1012, "train_info/time_within_train_step": 24.905038595199585, "step": 1012} +{"train_info/time_between_train_steps": 0.005365610122680664, "step": 1012} +{"info/global_step": 1013, "train_info/time_within_train_step": 24.73007893562317, "step": 1013} +{"train_info/time_between_train_steps": 0.005198001861572266, "step": 1013} +{"info/global_step": 1014, "train_info/time_within_train_step": 24.87281107902527, "step": 1014} +{"train_info/time_between_train_steps": 0.005472421646118164, "step": 1014} +{"info/global_step": 1015, "train_info/time_within_train_step": 24.83662176132202, "step": 1015} +{"train_info/time_between_train_steps": 0.005199432373046875, "step": 1015} +{"info/global_step": 1016, "train_info/time_within_train_step": 24.991981506347656, "step": 1016} +{"train_info/time_between_train_steps": 0.005101680755615234, "step": 1016} +{"info/global_step": 1017, "train_info/time_within_train_step": 24.751673698425293, "step": 1017} +{"train_info/time_between_train_steps": 0.005563259124755859, "step": 1017} +{"info/global_step": 1018, "train_info/time_within_train_step": 24.891634702682495, "step": 1018} +{"train_info/time_between_train_steps": 0.005158424377441406, "step": 1018} +{"info/global_step": 1019, "train_info/time_within_train_step": 24.728071689605713, "step": 1019} +{"train_info/time_between_train_steps": 0.005362272262573242, "step": 1019} +{"info/global_step": 1020, "train_info/time_within_train_step": 24.91531467437744, "step": 1020} +{"train_info/time_between_train_steps": 0.005462646484375, "step": 1020} +{"info/global_step": 1021, "train_info/time_within_train_step": 24.819575548171997, "step": 1021} +{"train_info/time_between_train_steps": 0.005188941955566406, "step": 1021} +{"info/global_step": 1022, "train_info/time_within_train_step": 24.846196174621582, "step": 1022} +{"train_info/time_between_train_steps": 0.005194425582885742, "step": 1022} +{"info/global_step": 1023, "train_info/time_within_train_step": 24.744730234146118, "step": 1023} +{"train_info/time_between_train_steps": 0.005238056182861328, "step": 1023} +{"info/global_step": 1024, "train_info/time_within_train_step": 24.82241129875183, "step": 1024} +{"train_info/time_between_train_steps": 0.0052490234375, "step": 1024} +{"info/global_step": 1025, "train_info/time_within_train_step": 24.75742769241333, "step": 1025} +{"train_info/time_between_train_steps": 0.03538990020751953, "step": 1025} +{"info/global_step": 1026, "train_info/time_within_train_step": 24.724424600601196, "step": 1026} +{"train_info/time_between_train_steps": 0.005207538604736328, "step": 1026} +{"info/global_step": 1027, "train_info/time_within_train_step": 24.734216690063477, "step": 1027} +{"train_info/time_between_train_steps": 0.004978179931640625, "step": 1027} +{"info/global_step": 1028, "train_info/time_within_train_step": 24.75236964225769, "step": 1028} +{"train_info/time_between_train_steps": 0.005109310150146484, "step": 1028} +{"info/global_step": 1029, "train_info/time_within_train_step": 24.745452404022217, "step": 1029} +{"train_info/time_between_train_steps": 0.005129814147949219, "step": 1029} +{"info/global_step": 1030, "train_info/time_within_train_step": 24.73179602622986, "step": 1030} +{"train_info/time_between_train_steps": 0.004993915557861328, "step": 1030} +{"info/global_step": 1031, "train_info/time_within_train_step": 24.778099060058594, "step": 1031} +{"train_info/time_between_train_steps": 0.005214691162109375, "step": 1031} +{"info/global_step": 1032, "train_info/time_within_train_step": 24.82324457168579, "step": 1032} +{"train_info/time_between_train_steps": 0.005042314529418945, "step": 1032} +{"info/global_step": 1033, "train_info/time_within_train_step": 24.80175757408142, "step": 1033} +{"train_info/time_between_train_steps": 0.005109071731567383, "step": 1033} +{"info/global_step": 1034, "train_info/time_within_train_step": 24.739139795303345, "step": 1034} +{"train_info/time_between_train_steps": 0.005136728286743164, "step": 1034} +{"info/global_step": 1035, "train_info/time_within_train_step": 24.728676557540894, "step": 1035} +{"train_info/time_between_train_steps": 0.00887918472290039, "step": 1035} +{"info/global_step": 1036, "train_info/time_within_train_step": 24.87920880317688, "step": 1036} +{"train_info/time_between_train_steps": 0.005143642425537109, "step": 1036} +{"info/global_step": 1037, "train_info/time_within_train_step": 24.72463369369507, "step": 1037} +{"train_info/time_between_train_steps": 0.0051441192626953125, "step": 1037} +{"info/global_step": 1038, "train_info/time_within_train_step": 24.750692129135132, "step": 1038} +{"train_info/time_between_train_steps": 0.005025386810302734, "step": 1038} +{"info/global_step": 1039, "train_info/time_within_train_step": 24.730239868164062, "step": 1039} +{"train_info/time_between_train_steps": 0.00546717643737793, "step": 1039} +{"info/global_step": 1040, "train_info/time_within_train_step": 24.735581874847412, "step": 1040} +{"train_info/time_between_train_steps": 0.005023002624511719, "step": 1040} +{"info/global_step": 1041, "train_info/time_within_train_step": 24.7682204246521, "step": 1041} +{"train_info/time_between_train_steps": 0.005105257034301758, "step": 1041} +{"info/global_step": 1042, "train_info/time_within_train_step": 24.748202323913574, "step": 1042} +{"train_info/time_between_train_steps": 0.0054700374603271484, "step": 1042} +{"info/global_step": 1043, "train_info/time_within_train_step": 24.81070113182068, "step": 1043} +{"train_info/time_between_train_steps": 0.005334138870239258, "step": 1043} +{"info/global_step": 1044, "train_info/time_within_train_step": 24.757614850997925, "step": 1044} +{"train_info/time_between_train_steps": 0.005574703216552734, "step": 1044} +{"train_info/time_between_train_steps": 16.839760780334473, "step": 1044} +{"info/global_step": 1045, "train_info/time_within_train_step": 24.71796488761902, "step": 1045} +{"train_info/time_between_train_steps": 0.004846811294555664, "step": 1045} +{"info/global_step": 1046, "train_info/time_within_train_step": 24.820902347564697, "step": 1046} +{"train_info/time_between_train_steps": 0.004925966262817383, "step": 1046} +{"info/global_step": 1047, "train_info/time_within_train_step": 24.810945510864258, "step": 1047} +{"train_info/time_between_train_steps": 0.005060434341430664, "step": 1047} +{"info/global_step": 1048, "train_info/time_within_train_step": 24.880175828933716, "step": 1048} +{"train_info/time_between_train_steps": 0.005260467529296875, "step": 1048} +{"info/global_step": 1049, "train_info/time_within_train_step": 24.747223377227783, "step": 1049} +{"train_info/time_between_train_steps": 0.005336761474609375, "step": 1049} +{"info/global_step": 1050, "train_info/time_within_train_step": 24.861138582229614, "step": 1050} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 21960.0, "train_info/memory_max_reserved": 21960.0, "_timestamp": 1740949708, "_runtime": 27214}, "step": 1050} +{"logs": {"train/loss": 2.7083, "train/learning_rate": 8.333333333333333e-05, "train/epoch": 29.0, "_timestamp": 1740949708, "_runtime": 27214}, "step": 1050} +{"train_info/time_between_train_steps": 0.02612471580505371, "step": 1050} +{"info/global_step": 1051, "train_info/time_within_train_step": 24.7545747756958, "step": 1051} +{"train_info/time_between_train_steps": 0.005415439605712891, "step": 1051} +{"info/global_step": 1052, "train_info/time_within_train_step": 24.976486921310425, "step": 1052} +{"train_info/time_between_train_steps": 0.005179643630981445, "step": 1052} +{"info/global_step": 1053, "train_info/time_within_train_step": 24.731539726257324, "step": 1053} +{"train_info/time_between_train_steps": 0.005267620086669922, "step": 1053} +{"info/global_step": 1054, "train_info/time_within_train_step": 24.953437328338623, "step": 1054} +{"train_info/time_between_train_steps": 0.00995945930480957, "step": 1054} +{"info/global_step": 1055, "train_info/time_within_train_step": 24.74444007873535, "step": 1055} +{"train_info/time_between_train_steps": 0.005545854568481445, "step": 1055} +{"info/global_step": 1056, "train_info/time_within_train_step": 24.872103691101074, "step": 1056} +{"train_info/time_between_train_steps": 0.0053937435150146484, "step": 1056} +{"info/global_step": 1057, "train_info/time_within_train_step": 24.823122024536133, "step": 1057} +{"train_info/time_between_train_steps": 0.005405902862548828, "step": 1057} +{"info/global_step": 1058, "train_info/time_within_train_step": 24.85873818397522, "step": 1058} +{"train_info/time_between_train_steps": 0.007571220397949219, "step": 1058} +{"info/global_step": 1059, "train_info/time_within_train_step": 24.744768619537354, "step": 1059} +{"train_info/time_between_train_steps": 0.010312557220458984, "step": 1059} +{"info/global_step": 1060, "train_info/time_within_train_step": 24.871164560317993, "step": 1060} +{"train_info/time_between_train_steps": 0.010354042053222656, "step": 1060} +{"info/global_step": 1061, "train_info/time_within_train_step": 24.737029790878296, "step": 1061} +{"train_info/time_between_train_steps": 0.026823997497558594, "step": 1061} +{"info/global_step": 1062, "train_info/time_within_train_step": 24.724945306777954, "step": 1062} +{"train_info/time_between_train_steps": 0.0050601959228515625, "step": 1062} +{"info/global_step": 1063, "train_info/time_within_train_step": 24.82491111755371, "step": 1063} +{"train_info/time_between_train_steps": 0.005047798156738281, "step": 1063} +{"info/global_step": 1064, "train_info/time_within_train_step": 24.781314611434937, "step": 1064} +{"train_info/time_between_train_steps": 0.005091667175292969, "step": 1064} +{"info/global_step": 1065, "train_info/time_within_train_step": 24.742523908615112, "step": 1065} +{"train_info/time_between_train_steps": 0.006449699401855469, "step": 1065} +{"info/global_step": 1066, "train_info/time_within_train_step": 24.751348972320557, "step": 1066} +{"train_info/time_between_train_steps": 0.005095958709716797, "step": 1066} +{"info/global_step": 1067, "train_info/time_within_train_step": 24.782553911209106, "step": 1067} +{"train_info/time_between_train_steps": 0.005104780197143555, "step": 1067} +{"info/global_step": 1068, "train_info/time_within_train_step": 24.74960994720459, "step": 1068} +{"train_info/time_between_train_steps": 0.005227804183959961, "step": 1068} +{"info/global_step": 1069, "train_info/time_within_train_step": 24.7945499420166, "step": 1069} +{"train_info/time_between_train_steps": 0.005065441131591797, "step": 1069} +{"info/global_step": 1070, "train_info/time_within_train_step": 24.74192237854004, "step": 1070} +{"train_info/time_between_train_steps": 0.005103111267089844, "step": 1070} +{"info/global_step": 1071, "train_info/time_within_train_step": 24.809133768081665, "step": 1071} +{"train_info/time_between_train_steps": 0.0052530765533447266, "step": 1071} +{"info/global_step": 1072, "train_info/time_within_train_step": 24.724120140075684, "step": 1072} +{"train_info/time_between_train_steps": 0.005115985870361328, "step": 1072} +{"info/global_step": 1073, "train_info/time_within_train_step": 24.74308204650879, "step": 1073} +{"train_info/time_between_train_steps": 0.00511622428894043, "step": 1073} +{"info/global_step": 1074, "train_info/time_within_train_step": 24.7983341217041, "step": 1074} +{"train_info/time_between_train_steps": 0.0051610469818115234, "step": 1074} +{"info/global_step": 1075, "train_info/time_within_train_step": 24.73676609992981, "step": 1075} +{"train_info/time_between_train_steps": 0.005159616470336914, "step": 1075} +{"info/global_step": 1076, "train_info/time_within_train_step": 24.795570373535156, "step": 1076} +{"train_info/time_between_train_steps": 0.00516510009765625, "step": 1076} +{"info/global_step": 1077, "train_info/time_within_train_step": 24.7444748878479, "step": 1077} +{"train_info/time_between_train_steps": 0.005123138427734375, "step": 1077} +{"info/global_step": 1078, "train_info/time_within_train_step": 24.826818227767944, "step": 1078} +{"train_info/time_between_train_steps": 0.00523686408996582, "step": 1078} +{"info/global_step": 1079, "train_info/time_within_train_step": 24.743022441864014, "step": 1079} +{"train_info/time_between_train_steps": 0.0056154727935791016, "step": 1079} +{"info/global_step": 1080, "train_info/time_within_train_step": 24.74998188018799, "step": 1080} +{"train_info/time_between_train_steps": 0.00548863410949707, "step": 1080} +{"train_info/time_between_train_steps": 16.57440757751465, "step": 1080} +{"info/global_step": 1081, "train_info/time_within_train_step": 24.709579706192017, "step": 1081} +{"train_info/time_between_train_steps": 0.0048520565032958984, "step": 1081} +{"info/global_step": 1082, "train_info/time_within_train_step": 24.86597990989685, "step": 1082} +{"train_info/time_between_train_steps": 0.005227804183959961, "step": 1082} +{"info/global_step": 1083, "train_info/time_within_train_step": 24.807761907577515, "step": 1083} +{"train_info/time_between_train_steps": 0.0053844451904296875, "step": 1083} +{"info/global_step": 1084, "train_info/time_within_train_step": 24.849445819854736, "step": 1084} +{"train_info/time_between_train_steps": 0.0051326751708984375, "step": 1084} +{"info/global_step": 1085, "train_info/time_within_train_step": 24.725461959838867, "step": 1085} +{"train_info/time_between_train_steps": 0.005259513854980469, "step": 1085} +{"info/global_step": 1086, "train_info/time_within_train_step": 24.893353939056396, "step": 1086} +{"train_info/time_between_train_steps": 0.0052051544189453125, "step": 1086} +{"info/global_step": 1087, "train_info/time_within_train_step": 24.737226963043213, "step": 1087} +{"train_info/time_between_train_steps": 0.005277156829833984, "step": 1087} +{"info/global_step": 1088, "train_info/time_within_train_step": 24.844319581985474, "step": 1088} +{"train_info/time_between_train_steps": 0.010450363159179688, "step": 1088} +{"info/global_step": 1089, "train_info/time_within_train_step": 24.73810839653015, "step": 1089} +{"train_info/time_between_train_steps": 0.005765676498413086, "step": 1089} +{"info/global_step": 1090, "train_info/time_within_train_step": 24.913383960723877, "step": 1090} +{"train_info/time_between_train_steps": 0.005266427993774414, "step": 1090} +{"info/global_step": 1091, "train_info/time_within_train_step": 24.739511013031006, "step": 1091} +{"train_info/time_between_train_steps": 0.005346059799194336, "step": 1091} +{"info/global_step": 1092, "train_info/time_within_train_step": 24.912596464157104, "step": 1092} +{"train_info/time_between_train_steps": 0.01047205924987793, "step": 1092} +{"info/global_step": 1093, "train_info/time_within_train_step": 24.822775840759277, "step": 1093} +{"train_info/time_between_train_steps": 0.0052165985107421875, "step": 1093} +{"info/global_step": 1094, "train_info/time_within_train_step": 24.8618426322937, "step": 1094} +{"train_info/time_between_train_steps": 0.005054950714111328, "step": 1094} +{"info/global_step": 1095, "train_info/time_within_train_step": 24.772202014923096, "step": 1095} +{"train_info/time_between_train_steps": 0.00559234619140625, "step": 1095} +{"info/global_step": 1096, "train_info/time_within_train_step": 24.863402605056763, "step": 1096} +{"train_info/time_between_train_steps": 0.006360530853271484, "step": 1096} +{"info/global_step": 1097, "train_info/time_within_train_step": 24.770942211151123, "step": 1097} +{"train_info/time_between_train_steps": 0.02685070037841797, "step": 1097} +{"info/global_step": 1098, "train_info/time_within_train_step": 24.749961137771606, "step": 1098} +{"train_info/time_between_train_steps": 0.0052640438079833984, "step": 1098} +{"info/global_step": 1099, "train_info/time_within_train_step": 24.739070177078247, "step": 1099} +{"train_info/time_between_train_steps": 0.004931211471557617, "step": 1099} +{"info/global_step": 1100, "train_info/time_within_train_step": 24.80786156654358, "step": 1100} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 21960.0, "train_info/memory_max_reserved": 21960.0, "_timestamp": 1740950966, "_runtime": 28472}, "step": 1100} +{"logs": {"train/loss": 2.6528, "train/learning_rate": 5.5555555555555545e-05, "train/epoch": 30.02, "_timestamp": 1740950966, "_runtime": 28472}, "step": 1100} +{"train_info/time_between_train_steps": 34.805805921554565, "step": 1100} +{"info/global_step": 1101, "train_info/time_within_train_step": 24.816805601119995, "step": 1101} +{"train_info/time_between_train_steps": 0.00519108772277832, "step": 1101} +{"info/global_step": 1102, "train_info/time_within_train_step": 24.74382758140564, "step": 1102} +{"train_info/time_between_train_steps": 0.005143165588378906, "step": 1102} +{"info/global_step": 1103, "train_info/time_within_train_step": 24.806084871292114, "step": 1103} +{"train_info/time_between_train_steps": 0.005144357681274414, "step": 1103} +{"info/global_step": 1104, "train_info/time_within_train_step": 24.742785692214966, "step": 1104} +{"train_info/time_between_train_steps": 0.005198240280151367, "step": 1104} +{"info/global_step": 1105, "train_info/time_within_train_step": 24.733038902282715, "step": 1105} +{"train_info/time_between_train_steps": 0.005172014236450195, "step": 1105} +{"info/global_step": 1106, "train_info/time_within_train_step": 24.789259433746338, "step": 1106} +{"train_info/time_between_train_steps": 0.005219936370849609, "step": 1106} +{"info/global_step": 1107, "train_info/time_within_train_step": 24.74314856529236, "step": 1107} +{"train_info/time_between_train_steps": 0.005031585693359375, "step": 1107} +{"info/global_step": 1108, "train_info/time_within_train_step": 24.807196855545044, "step": 1108} +{"train_info/time_between_train_steps": 0.005663633346557617, "step": 1108} +{"info/global_step": 1109, "train_info/time_within_train_step": 25.25753664970398, "step": 1109} +{"train_info/time_between_train_steps": 0.005536556243896484, "step": 1109} +{"info/global_step": 1110, "train_info/time_within_train_step": 26.268389225006104, "step": 1110} +{"train_info/time_between_train_steps": 0.005204200744628906, "step": 1110} +{"info/global_step": 1111, "train_info/time_within_train_step": 24.731446266174316, "step": 1111} +{"train_info/time_between_train_steps": 0.005151987075805664, "step": 1111} +{"info/global_step": 1112, "train_info/time_within_train_step": 24.728302478790283, "step": 1112} +{"train_info/time_between_train_steps": 0.005238056182861328, "step": 1112} +{"info/global_step": 1113, "train_info/time_within_train_step": 24.738046884536743, "step": 1113} +{"train_info/time_between_train_steps": 0.005203962326049805, "step": 1113} +{"info/global_step": 1114, "train_info/time_within_train_step": 24.728402853012085, "step": 1114} +{"train_info/time_between_train_steps": 0.005233287811279297, "step": 1114} +{"info/global_step": 1115, "train_info/time_within_train_step": 24.771979331970215, "step": 1115} +{"train_info/time_between_train_steps": 0.005598545074462891, "step": 1115} +{"info/global_step": 1116, "train_info/time_within_train_step": 24.75585126876831, "step": 1116} +{"train_info/time_between_train_steps": 0.0056035518646240234, "step": 1116} +{"train_info/time_between_train_steps": 16.64189076423645, "step": 1116} +{"info/global_step": 1117, "train_info/time_within_train_step": 24.78183150291443, "step": 1117} +{"train_info/time_between_train_steps": 0.0049855709075927734, "step": 1117} +{"info/global_step": 1118, "train_info/time_within_train_step": 24.82562780380249, "step": 1118} +{"train_info/time_between_train_steps": 0.0049533843994140625, "step": 1118} +{"info/global_step": 1119, "train_info/time_within_train_step": 24.773446559906006, "step": 1119} +{"train_info/time_between_train_steps": 0.004873752593994141, "step": 1119} +{"info/global_step": 1120, "train_info/time_within_train_step": 24.832030057907104, "step": 1120} +{"train_info/time_between_train_steps": 0.005203723907470703, "step": 1120} +{"info/global_step": 1121, "train_info/time_within_train_step": 24.74799370765686, "step": 1121} +{"train_info/time_between_train_steps": 0.005170345306396484, "step": 1121} +{"info/global_step": 1122, "train_info/time_within_train_step": 24.92631459236145, "step": 1122} +{"train_info/time_between_train_steps": 0.005306720733642578, "step": 1122} +{"info/global_step": 1123, "train_info/time_within_train_step": 24.740368127822876, "step": 1123} +{"train_info/time_between_train_steps": 0.005251407623291016, "step": 1123} +{"info/global_step": 1124, "train_info/time_within_train_step": 25.001675605773926, "step": 1124} +{"train_info/time_between_train_steps": 0.005189180374145508, "step": 1124} +{"info/global_step": 1125, "train_info/time_within_train_step": 24.742433071136475, "step": 1125} +{"train_info/time_between_train_steps": 0.005151271820068359, "step": 1125} +{"info/global_step": 1126, "train_info/time_within_train_step": 24.868855237960815, "step": 1126} +{"train_info/time_between_train_steps": 0.00503849983215332, "step": 1126} +{"info/global_step": 1127, "train_info/time_within_train_step": 24.82495403289795, "step": 1127} +{"train_info/time_between_train_steps": 0.01104283332824707, "step": 1127} +{"info/global_step": 1128, "train_info/time_within_train_step": 24.883254051208496, "step": 1128} +{"train_info/time_between_train_steps": 0.005433559417724609, "step": 1128} +{"info/global_step": 1129, "train_info/time_within_train_step": 24.746419429779053, "step": 1129} +{"train_info/time_between_train_steps": 0.005294322967529297, "step": 1129} +{"info/global_step": 1130, "train_info/time_within_train_step": 24.906259775161743, "step": 1130} +{"train_info/time_between_train_steps": 0.0055196285247802734, "step": 1130} +{"info/global_step": 1131, "train_info/time_within_train_step": 24.747795581817627, "step": 1131} +{"train_info/time_between_train_steps": 0.005077362060546875, "step": 1131} +{"info/global_step": 1132, "train_info/time_within_train_step": 24.82970356941223, "step": 1132} +{"train_info/time_between_train_steps": 0.005272626876831055, "step": 1132} +{"info/global_step": 1133, "train_info/time_within_train_step": 24.760803937911987, "step": 1133} +{"train_info/time_between_train_steps": 0.02848672866821289, "step": 1133} +{"info/global_step": 1134, "train_info/time_within_train_step": 24.78481912612915, "step": 1134} +{"train_info/time_between_train_steps": 0.004965782165527344, "step": 1134} +{"info/global_step": 1135, "train_info/time_within_train_step": 24.722097873687744, "step": 1135} +{"train_info/time_between_train_steps": 0.004907369613647461, "step": 1135} +{"info/global_step": 1136, "train_info/time_within_train_step": 24.71745252609253, "step": 1136} +{"train_info/time_between_train_steps": 0.005046844482421875, "step": 1136} +{"info/global_step": 1137, "train_info/time_within_train_step": 24.722620487213135, "step": 1137} +{"train_info/time_between_train_steps": 0.00503849983215332, "step": 1137} +{"info/global_step": 1138, "train_info/time_within_train_step": 24.7175772190094, "step": 1138} +{"train_info/time_between_train_steps": 0.005013704299926758, "step": 1138} +{"info/global_step": 1139, "train_info/time_within_train_step": 24.725415229797363, "step": 1139} +{"train_info/time_between_train_steps": 0.0051648616790771484, "step": 1139} +{"info/global_step": 1140, "train_info/time_within_train_step": 24.817137241363525, "step": 1140} +{"train_info/time_between_train_steps": 0.005052804946899414, "step": 1140} +{"info/global_step": 1141, "train_info/time_within_train_step": 24.785269737243652, "step": 1141} +{"train_info/time_between_train_steps": 0.00516057014465332, "step": 1141} +{"info/global_step": 1142, "train_info/time_within_train_step": 24.73087215423584, "step": 1142} +{"train_info/time_between_train_steps": 0.005149126052856445, "step": 1142} +{"info/global_step": 1143, "train_info/time_within_train_step": 24.734865188598633, "step": 1143} +{"train_info/time_between_train_steps": 0.005008697509765625, "step": 1143} +{"info/global_step": 1144, "train_info/time_within_train_step": 24.776411771774292, "step": 1144} +{"train_info/time_between_train_steps": 0.005190134048461914, "step": 1144} +{"info/global_step": 1145, "train_info/time_within_train_step": 24.722304582595825, "step": 1145} +{"train_info/time_between_train_steps": 0.005591392517089844, "step": 1145} +{"info/global_step": 1146, "train_info/time_within_train_step": 24.726880073547363, "step": 1146} +{"train_info/time_between_train_steps": 0.0050601959228515625, "step": 1146} +{"info/global_step": 1147, "train_info/time_within_train_step": 24.735869884490967, "step": 1147} +{"train_info/time_between_train_steps": 0.007042646408081055, "step": 1147} +{"info/global_step": 1148, "train_info/time_within_train_step": 24.79403257369995, "step": 1148} +{"train_info/time_between_train_steps": 0.00634312629699707, "step": 1148} +{"info/global_step": 1149, "train_info/time_within_train_step": 24.731130361557007, "step": 1149} +{"train_info/time_between_train_steps": 0.010357379913330078, "step": 1149} +{"info/global_step": 1150, "train_info/time_within_train_step": 24.76551127433777, "step": 1150} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 21960.0, "train_info/memory_max_reserved": 21960.0, "_timestamp": 1740952261, "_runtime": 29767}, "step": 1150} +{"logs": {"train/loss": 2.629, "train/learning_rate": 2.7777777777777772e-05, "train/epoch": 31.03, "_timestamp": 1740952261, "_runtime": 29767}, "step": 1150} +{"train_info/time_between_train_steps": 0.025704622268676758, "step": 1150} +{"info/global_step": 1151, "train_info/time_within_train_step": 24.80884552001953, "step": 1151} +{"train_info/time_between_train_steps": 0.005454540252685547, "step": 1151} +{"info/global_step": 1152, "train_info/time_within_train_step": 24.765774726867676, "step": 1152} +{"train_info/time_between_train_steps": 0.005803823471069336, "step": 1152} +{"train_info/time_between_train_steps": 16.807632446289062, "step": 1152} +{"info/global_step": 1153, "train_info/time_within_train_step": 24.713743209838867, "step": 1153} +{"train_info/time_between_train_steps": 0.0048313140869140625, "step": 1153} +{"info/global_step": 1154, "train_info/time_within_train_step": 24.834060430526733, "step": 1154} +{"train_info/time_between_train_steps": 0.004850625991821289, "step": 1154} +{"info/global_step": 1155, "train_info/time_within_train_step": 24.84324312210083, "step": 1155} +{"train_info/time_between_train_steps": 0.00505518913269043, "step": 1155} +{"info/global_step": 1156, "train_info/time_within_train_step": 24.927403688430786, "step": 1156} +{"train_info/time_between_train_steps": 0.005181312561035156, "step": 1156} +{"info/global_step": 1157, "train_info/time_within_train_step": 24.788060903549194, "step": 1157} +{"train_info/time_between_train_steps": 0.005110502243041992, "step": 1157} +{"info/global_step": 1158, "train_info/time_within_train_step": 24.842493772506714, "step": 1158} +{"train_info/time_between_train_steps": 0.005603313446044922, "step": 1158} +{"info/global_step": 1159, "train_info/time_within_train_step": 24.74914789199829, "step": 1159} +{"train_info/time_between_train_steps": 0.00532841682434082, "step": 1159} +{"info/global_step": 1160, "train_info/time_within_train_step": 24.866842031478882, "step": 1160} +{"train_info/time_between_train_steps": 0.005164623260498047, "step": 1160} +{"info/global_step": 1161, "train_info/time_within_train_step": 24.736641883850098, "step": 1161} +{"train_info/time_between_train_steps": 0.010268688201904297, "step": 1161} +{"info/global_step": 1162, "train_info/time_within_train_step": 24.958576202392578, "step": 1162} +{"train_info/time_between_train_steps": 0.010168313980102539, "step": 1162} +{"info/global_step": 1163, "train_info/time_within_train_step": 24.740421295166016, "step": 1163} +{"train_info/time_between_train_steps": 0.0052492618560791016, "step": 1163} +{"info/global_step": 1164, "train_info/time_within_train_step": 24.861274242401123, "step": 1164} +{"train_info/time_between_train_steps": 0.0054721832275390625, "step": 1164} +{"info/global_step": 1165, "train_info/time_within_train_step": 24.82253074645996, "step": 1165} +{"train_info/time_between_train_steps": 0.005545616149902344, "step": 1165} +{"info/global_step": 1166, "train_info/time_within_train_step": 24.874204635620117, "step": 1166} +{"train_info/time_between_train_steps": 0.005129098892211914, "step": 1166} +{"info/global_step": 1167, "train_info/time_within_train_step": 24.748067140579224, "step": 1167} +{"train_info/time_between_train_steps": 0.005484819412231445, "step": 1167} +{"info/global_step": 1168, "train_info/time_within_train_step": 24.839110612869263, "step": 1168} +{"train_info/time_between_train_steps": 0.005232572555541992, "step": 1168} +{"info/global_step": 1169, "train_info/time_within_train_step": 24.749142169952393, "step": 1169} +{"train_info/time_between_train_steps": 0.021255016326904297, "step": 1169} +{"info/global_step": 1170, "train_info/time_within_train_step": 24.81792688369751, "step": 1170} +{"train_info/time_between_train_steps": 0.005225419998168945, "step": 1170} +{"info/global_step": 1171, "train_info/time_within_train_step": 24.73210883140564, "step": 1171} +{"train_info/time_between_train_steps": 0.005075931549072266, "step": 1171} +{"info/global_step": 1172, "train_info/time_within_train_step": 24.803800344467163, "step": 1172} +{"train_info/time_between_train_steps": 0.005003929138183594, "step": 1172} +{"info/global_step": 1173, "train_info/time_within_train_step": 24.76242733001709, "step": 1173} +{"train_info/time_between_train_steps": 0.005265474319458008, "step": 1173} +{"info/global_step": 1174, "train_info/time_within_train_step": 24.799837589263916, "step": 1174} +{"train_info/time_between_train_steps": 0.005013465881347656, "step": 1174} +{"info/global_step": 1175, "train_info/time_within_train_step": 24.751444101333618, "step": 1175} +{"train_info/time_between_train_steps": 0.0052607059478759766, "step": 1175} +{"info/global_step": 1176, "train_info/time_within_train_step": 24.752215147018433, "step": 1176} +{"train_info/time_between_train_steps": 0.005179405212402344, "step": 1176} +{"info/global_step": 1177, "train_info/time_within_train_step": 24.80241823196411, "step": 1177} +{"train_info/time_between_train_steps": 0.005099296569824219, "step": 1177} +{"info/global_step": 1178, "train_info/time_within_train_step": 24.754267692565918, "step": 1178} +{"train_info/time_between_train_steps": 0.0051593780517578125, "step": 1178} +{"info/global_step": 1179, "train_info/time_within_train_step": 24.801555156707764, "step": 1179} +{"train_info/time_between_train_steps": 0.005480527877807617, "step": 1179} +{"info/global_step": 1180, "train_info/time_within_train_step": 24.74228549003601, "step": 1180} +{"train_info/time_between_train_steps": 0.005070209503173828, "step": 1180} +{"info/global_step": 1181, "train_info/time_within_train_step": 24.744771003723145, "step": 1181} +{"train_info/time_between_train_steps": 0.010239839553833008, "step": 1181} +{"info/global_step": 1182, "train_info/time_within_train_step": 24.792629718780518, "step": 1182} +{"train_info/time_between_train_steps": 0.0050601959228515625, "step": 1182} +{"info/global_step": 1183, "train_info/time_within_train_step": 24.746148586273193, "step": 1183} +{"train_info/time_between_train_steps": 0.005112886428833008, "step": 1183} +{"info/global_step": 1184, "train_info/time_within_train_step": 24.8037211894989, "step": 1184} +{"train_info/time_between_train_steps": 0.005294322967529297, "step": 1184} +{"info/global_step": 1185, "train_info/time_within_train_step": 24.747845888137817, "step": 1185} +{"train_info/time_between_train_steps": 0.005210161209106445, "step": 1185} +{"info/global_step": 1186, "train_info/time_within_train_step": 24.854100942611694, "step": 1186} +{"train_info/time_between_train_steps": 0.010578393936157227, "step": 1186} +{"info/global_step": 1187, "train_info/time_within_train_step": 24.745999336242676, "step": 1187} +{"train_info/time_between_train_steps": 0.005411386489868164, "step": 1187} +{"info/global_step": 1188, "train_info/time_within_train_step": 24.75995945930481, "step": 1188} +{"train_info/time_between_train_steps": 0.005435466766357422, "step": 1188} +{"train_info/time_between_train_steps": 16.962584018707275, "step": 1188} +{"info/global_step": 1189, "train_info/time_within_train_step": 24.717910289764404, "step": 1189} +{"train_info/time_between_train_steps": 0.014841794967651367, "step": 1189} +{"info/global_step": 1190, "train_info/time_within_train_step": 24.825624227523804, "step": 1190} +{"train_info/time_between_train_steps": 0.004895925521850586, "step": 1190} +{"info/global_step": 1191, "train_info/time_within_train_step": 24.77292490005493, "step": 1191} +{"train_info/time_between_train_steps": 0.004847526550292969, "step": 1191} +{"info/global_step": 1192, "train_info/time_within_train_step": 24.845539331436157, "step": 1192} +{"train_info/time_between_train_steps": 0.0049664974212646484, "step": 1192} +{"info/global_step": 1193, "train_info/time_within_train_step": 24.79357409477234, "step": 1193} +{"train_info/time_between_train_steps": 0.005080223083496094, "step": 1193} +{"info/global_step": 1194, "train_info/time_within_train_step": 24.864415407180786, "step": 1194} +{"train_info/time_between_train_steps": 0.005257606506347656, "step": 1194} +{"info/global_step": 1195, "train_info/time_within_train_step": 24.73192048072815, "step": 1195} +{"train_info/time_between_train_steps": 0.005224943161010742, "step": 1195} +{"info/global_step": 1196, "train_info/time_within_train_step": 24.909140825271606, "step": 1196} +{"train_info/time_between_train_steps": 0.005295515060424805, "step": 1196} +{"info/global_step": 1197, "train_info/time_within_train_step": 24.732125282287598, "step": 1197} +{"train_info/time_between_train_steps": 0.00511932373046875, "step": 1197} +{"info/global_step": 1198, "train_info/time_within_train_step": 24.91923451423645, "step": 1198} +{"train_info/time_between_train_steps": 0.00538325309753418, "step": 1198} +{"info/global_step": 1199, "train_info/time_within_train_step": 24.753721714019775, "step": 1199} +{"train_info/time_between_train_steps": 0.005598545074462891, "step": 1199} +{"info/global_step": 1200, "train_info/time_within_train_step": 24.898101568222046, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 21960.0, "train_info/memory_max_reserved": 21960.0, "_timestamp": 1740953536, "_runtime": 31042}, "step": 1200} +{"logs": {"train/loss": 2.639, "train/learning_rate": 0.0, "train/epoch": 33.01, "_timestamp": 1740953536, "_runtime": 31042}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1581.5546875, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 21960.0, "train_info/memory_max_reserved": 21960.0, "_timestamp": 1740953593, "_runtime": 31099}, "step": 1200} +{"logs": {"train/train_runtime": 31100.6233, "train/train_samples_per_second": 19.755, "train/train_steps_per_second": 0.039, "train/total_flos": 3.258180993024e+17, "train/train_loss": 3.3956481122970583, "train/epoch": 33.01, "_timestamp": 1740953593, "_runtime": 31099}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1581.5537109375, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 21960.0, "train_info/memory_max_reserved": 21960.0, "_timestamp": 1740953614, "_runtime": 31120}, "step": 1200} +{"logs": {"eval/loss": 3.4539854526519775, "eval/runtime": 2.3608, "eval/samples_per_second": 55.913, "eval/steps_per_second": 3.812, "train/epoch": 33.01, "_timestamp": 1740953614, "_runtime": 31120}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1581.5537109375, "train_info/memory_max_allocated": 18057.0224609375, "train_info/memory_reserved": 21960.0, "train_info/memory_max_reserved": 21960.0, "_timestamp": 1740953614, "_runtime": 31120}, "step": 1200} +{"logs": {"eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_loss": 3.4539854526519775, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_ppl": 31.62618613697903, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_runtime": 2.3608, "eval//scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 55.913, "train/epoch": 33.01, "_timestamp": 1740953614, "_runtime": 31120}, "step": 1200} diff --git a/perturb_adj_num_np_det_zh_ZH_randinit_seed53.log b/perturb_adj_num_np_det_zh_ZH_randinit_seed53.log new file mode 100755 index 0000000000000000000000000000000000000000..061aafb63a71e791b9a646bc56707b8ba2d4aa9d --- /dev/null +++ b/perturb_adj_num_np_det_zh_ZH_randinit_seed53.log @@ -0,0 +1,121 @@ +|=>> 03/02 [14:32:54] - mistral - INFO :: Starting Run: perturb_adj_num_np_det_zh_ZH_randinit_seed53... +|=>> 03/02 [14:32:54] - mistral - INFO :: Setting Random Seed to 53! +|=>> 03/02 [14:32:54] - mistral - INFO :: Building Tokenize and Initializing `gpt2-small` via AutoModel/AutoConfig... +|=>> 03/02 [14:32:54] - mistral - INFO :: Using Configs For Model From: /scratch/xiulyang/multilingual-LM/mistral/conf/models/gpt2-small-ZH.json ... +|=>> 03/02 [14:32:54] - mistral.models.auto - INFO :: Building Hugging Face GPT2Config from provided configs: {'activation_function': 'gelu_new', 'architectures': ['GPT2LMHeadModel'], 'attn_pdrop': 0.1, 'embd_pdrop': 0.1, 'initializer_range': 0.02, 'layer_norm_epsilon': 1e-05, 'model_type': 'gpt2', 'n_ctx': 1024, 'n_embd': 768, 'n_head': 12, 'n_inner': None, 'n_layer': 12, 'n_positions': 1024, 'reorder_and_upcast_attn': True, 'resid_pdrop': 0.1, 'scale_attn_by_inverse_layer_idx': True, 'scale_attn_weights': True, 'summary_activation': None, 'summary_first_dropout': 0.2, 'summary_proj_to_labels': True, 'summary_type': 'cls_index', 'summary_use_proj': True, 'task_specific_params': {'text-generation': {'do_sample': True, 'max_length': 1024}}, 'torch_dtype': 'float32', 'transformers_version': '4.35.2', 'use_cache': False, 'vocab_size': 21128} ... +|=>> 03/02 [14:32:54] - mistral.models.auto - INFO :: Fetching Hugging Face [Fast] AutoTokenizer for Model: `gpt2`... +|=>> 03/02 [14:32:54] - mistral.models.auto - INFO :: Using a Pretokenized Dataset +|=>> 03/02 [14:32:54] - mistral.models.auto - INFO :: Initializing Custom GPT-2 Model from Configuration: `gpt2`... +|=>> 03/02 [14:32:57] - mistral - INFO :: Setting Training Arguments from Quinfig... +|=>> 03/02 [14:32:57] - mistral.args.training - INFO :: Setting Gradient Accumulation Steps = `64` [BSZ: 512 World Size: 1 Device BSZ: 8] +|=>> 03/02 [14:32:57] - mistral - INFO :: Downloading and Preprocessing Dataset `/scratch/xiulyang/multilingual-LM/training/multilingual_dataset.py`... +|=>> 03/02 [14:32:58] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Generating examples from = /scratch/xiulyang/multilingual-LM/data/multilingual/multilingual_data_perturbed/perturb_adj_num_np_det_zh/train +|=>> 03/02 [14:33:00] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Total sentences: 1059468 +|=>> 03/02 [14:33:01] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Loading pre-tokenized data +|=>> 03/02 [14:33:07] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Concatenating tokenized data using EOS token +|=>> 03/02 [14:33:07] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Chunking tokens into sublists of 1024 +|=>> 03/02 [14:33:08] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Writing dataset as space-separated sequences of tokens +|=>> 03/02 [14:33:17] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Generating examples from = /scratch/xiulyang/multilingual-LM/data/multilingual/multilingual_data_perturbed/perturb_adj_num_np_det_zh/dev +|=>> 03/02 [14:33:17] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Total sentences: 5553 +|=>> 03/02 [14:33:17] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Loading pre-tokenized data +|=>> 03/02 [14:33:17] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Concatenating tokenized data using EOS token +|=>> 03/02 [14:33:17] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Chunking tokens into sublists of 1024 +|=>> 03/02 [14:33:17] - datasets_modules.datasets.multilingual_dataset.7438cc84b432d1f96804ae60491b88719fc49ba5f7f0153bff87402ac918d8d3.multilingual_dataset - INFO :: Writing dataset as space-separated sequences of tokens +|=>> 03/02 [14:33:18] - mistral.corpora.auto - INFO :: Building Tokenized Indexed Dataset for {dataset_id}/{dataset_name}... +|=>> 03/02 [14:33:18] - mistral.corpora.auto - INFO :: Building Indexed Dataset for train +|=>> 03/02 [14:34:00] - mistral.corpora.auto - INFO :: Building Indexed Dataset for validation +|=>> 03/02 [14:34:01] - mistral - INFO :: Initializing Model Trainer... +|=>> 03/02 [14:34:01] - mistral - INFO :: Training Arguments: TrainingArguments( +_n_gpu=1, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +bf16=False, +bf16_full_eval=False, +data_seed=53, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=IntervalStrategy.STEPS, +fp16=True, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +gradient_accumulation_steps=64, +gradient_checkpointing=False, +greater_is_better=None, +group_by_length=False, +half_precision_backend=auto, +hub_model_id=None, +hub_strategy=HubStrategy.EVERY_SAVE, +hub_token=, +ignore_data_skip=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0006, +length_column_name=length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=logs, +logging_first_step=True, +logging_nan_inf_filter=True, +logging_steps=50, +logging_strategy=IntervalStrategy.STEPS, +lr_scheduler_type=SchedulerType.LINEAR, +max_grad_norm=1.0, +max_steps=1200, +metric_for_best_model=None, +mp_parameters=, +no_cuda=False, +num_train_epochs=3.0, +optim=OptimizerNames.ADAMW_HF, +output_dir=//scratch/xiulyang/multilingual_models/perturb_adj_num_np_det_zh_ZH_randinit/babylm_perturb_adj_num_np_det_zh_ZH_randinit_seed53/runs/perturb_adj_num_np_det_zh_ZH_randinit_seed53, +overwrite_output_dir=False, +past_index=-1, +per_device_eval_batch_size=16, +per_device_train_batch_size=8, +prediction_loss_only=True, +push_to_hub=False, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +remove_unused_columns=True, +report_to=[], +resume_from_checkpoint=None, +run_name=perturb_adj_num_np_det_zh_ZH_randinit_seed53, +save_on_each_node=False, +save_steps=1000, +save_strategy=IntervalStrategy.STEPS, +save_total_limit=None, +seed=53, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=120, +weight_decay=0.1, +xpu_backend=None, +) +|=>> 03/02 [14:34:09] - mistral.core.callbacks - INFO :: Setting W&B Project: xiulin-yang-compling +|=>> 03/02 [14:34:52] - mistral - INFO :: Training... +|=>> 03/02 [14:34:53] - mistral.core.callbacks - INFO :: Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" +|=>> 03/02 [23:13:31] - mistral - INFO :: ...and that's all folks! +|=>> 03/02 [23:13:31] - mistral - INFO :: Running final evaluation... diff --git a/pytorch_model.bin b/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..e216e9d8c88d8042920198c6f7f9cd95f87ee885 --- /dev/null +++ b/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0908c891e567424ff7bbc9444303ea848ea1170463cec3e63b427ff1560a1f48 +size 420912233 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/training_args.bin b/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..6c86fbe0cf655f84c9ffa9639b21631e3e33c06f --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f3956944c191fac4dee6f5ef2e1b97a38ffc9f86d0d1b30990539a8111ac333 +size 3183