diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/checkpoint-0/config.json b/checkpoint-0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..010a39070324d45f3a6fa1fe9997539307c8f44b --- /dev/null +++ b/checkpoint-0/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 64000 +} diff --git a/checkpoint-0/pytorch_model.bin b/checkpoint-0/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..fa893f2ccbb9d7e06b71d530a16fca95fa6d3731 --- /dev/null +++ b/checkpoint-0/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d9938f9842ab67a4ffdb8192f0ef1a92afea4592e39c9cede29e616f6bb617f +size 552615017 diff --git a/checkpoint-0/special_tokens_map.json b/checkpoint-0/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-0/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-0/tokenizer_config.json b/checkpoint-0/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-0/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-0/training_args.bin b/checkpoint-0/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..938bd31b67d4eb91525b5f5b946519126a3331c7 --- /dev/null +++ b/checkpoint-0/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5ee64ec673c5915c725d59ab09d5567c896e983bbaa61ee563d03df6350d2f +size 3183 diff --git a/checkpoint-100/config.json b/checkpoint-100/config.json new file mode 100644 index 0000000000000000000000000000000000000000..010a39070324d45f3a6fa1fe9997539307c8f44b --- /dev/null +++ b/checkpoint-100/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 64000 +} diff --git a/checkpoint-100/optimizer.pt b/checkpoint-100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e35d5f3213bac9922db60fe1d2fc886df1f35a9a --- /dev/null +++ b/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4547f92d7100e31e3deb39b44f6f22305a0894d90aab2a883a4d15394b930012 +size 1080040817 diff --git a/checkpoint-100/pytorch_model.bin b/checkpoint-100/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8df722d3b68fe210aa7d3cc6bdccb3f1b29734fe --- /dev/null +++ b/checkpoint-100/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6606650d0527e0a3ac6af190f2cf114bc9c9df37604f2f0ea2c56bf7b88d8711 +size 552615017 diff --git a/checkpoint-100/rng_state.pth b/checkpoint-100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..bc84d288cc5248143e6e485f91cad5e598a5dc29 --- /dev/null +++ b/checkpoint-100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58496e945c16a650ef4a460d1ac1592df28955ae14889c83910e110247a11e11 +size 14567 diff --git a/checkpoint-100/scaler.pt b/checkpoint-100/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..45cc4a33e17645cb0ed4a911b11c77cb2e7ce7f3 --- /dev/null +++ b/checkpoint-100/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13a3423b2fe42f204bc8fe2c666ff379f9fd753a0f13613064a5e71e86b519e8 +size 559 diff --git a/checkpoint-100/scheduler.pt b/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..595eec9094e91b3eb24c2de88c461df2c22026ab --- /dev/null +++ b/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75e9b9d31d11c624d89b0c04ad496adf4b5addd3e703848d2583972c703e8da6 +size 623 diff --git a/checkpoint-100/special_tokens_map.json b/checkpoint-100/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-100/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-100/tokenizer_config.json b/checkpoint-100/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-100/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-100/trainer_state.json b/checkpoint-100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..bf2f2fde1a06e7970e8bc55d7d35f314a4598343 --- /dev/null +++ b/checkpoint-100/trainer_state.json @@ -0,0 +1,34 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0083333333333333, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.1049, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1845, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 5.9529, + "step": 100 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.7041634975744e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-100/training_args.bin b/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..938bd31b67d4eb91525b5f5b946519126a3331c7 --- /dev/null +++ b/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5ee64ec673c5915c725d59ab09d5567c896e983bbaa61ee563d03df6350d2f +size 3183 diff --git a/checkpoint-1000/config.json b/checkpoint-1000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..010a39070324d45f3a6fa1fe9997539307c8f44b --- /dev/null +++ b/checkpoint-1000/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 64000 +} diff --git a/checkpoint-1000/optimizer.pt b/checkpoint-1000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..158066f4f7a995350f2ae0e4bc640aa832829502 --- /dev/null +++ b/checkpoint-1000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d57f7023ea2fc223853366c846fc06cfd4e9af3565ac1f92228e07364e5b12b7 +size 1080041009 diff --git a/checkpoint-1000/pytorch_model.bin b/checkpoint-1000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..4595beddb8da778ba7a0a49e5c48afc3f97a9d66 --- /dev/null +++ b/checkpoint-1000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:feaf65ae0b5bc37c29dd98d0ce7aa049d828220e3602831b83266de5f7856fb8 +size 552615017 diff --git a/checkpoint-1000/rng_state.pth b/checkpoint-1000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..b496b3c5f0ffccc3c184d52660fc5fc060d3c7f7 --- /dev/null +++ b/checkpoint-1000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:394d180f63a23adf48a4ef738fe5b39b3b50aa94348457023737b14f18b9c037 +size 14567 diff --git a/checkpoint-1000/scaler.pt b/checkpoint-1000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b04695d3a30e4bab2b78883d9c849c25c37ef7d7 --- /dev/null +++ b/checkpoint-1000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f810fc7b695697c440d8985f6042b4ba23a9e1027604c265718b518ca29f1b2b +size 559 diff --git a/checkpoint-1000/scheduler.pt b/checkpoint-1000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2a616899591a36b66ac0bd1ceeb324087c181a2a --- /dev/null +++ b/checkpoint-1000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0691206f4bd9ca409d6e7104087a4e0eb05df8f8f555a400f6ecc532edba52d8 +size 623 diff --git a/checkpoint-1000/special_tokens_map.json b/checkpoint-1000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-1000/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-1000/tokenizer_config.json b/checkpoint-1000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-1000/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-1000/trainer_state.json b/checkpoint-1000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..443fabc5dacb6907dce4e082cd3d8e6e9937d4db --- /dev/null +++ b/checkpoint-1000/trainer_state.json @@ -0,0 +1,158 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 33.00833333333333, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.1049, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1845, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 5.9529, + "step": 100 + }, + { + "epoch": 4.03, + "learning_rate": 0.0005833333333333333, + "loss": 5.1965, + "step": 150 + }, + { + "epoch": 6.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.8925, + "step": 200 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005277777777777777, + "loss": 4.7182, + "step": 250 + }, + { + "epoch": 9.03, + "learning_rate": 0.0005, + "loss": 4.561, + "step": 300 + }, + { + "epoch": 11.02, + "learning_rate": 0.00047222222222222224, + "loss": 4.4282, + "step": 350 + }, + { + "epoch": 13.01, + "learning_rate": 0.00044444444444444436, + "loss": 4.2476, + "step": 400 + }, + { + "epoch": 14.03, + "learning_rate": 0.00041666666666666664, + "loss": 4.0259, + "step": 450 + }, + { + "epoch": 16.02, + "learning_rate": 0.00038888888888888887, + "loss": 3.8942, + "step": 500 + }, + { + "epoch": 18.01, + "learning_rate": 0.0003611111111111111, + "loss": 3.7583, + "step": 550 + }, + { + "epoch": 19.02, + "learning_rate": 0.0003333333333333333, + "loss": 3.6224, + "step": 600 + }, + { + "epoch": 21.02, + "learning_rate": 0.00030555555555555555, + "loss": 3.55, + "step": 650 + }, + { + "epoch": 23.01, + "learning_rate": 0.0002777777777777778, + "loss": 3.4646, + "step": 700 + }, + { + "epoch": 24.02, + "learning_rate": 0.00025, + "loss": 3.3628, + "step": 750 + }, + { + "epoch": 26.02, + "learning_rate": 0.00022222222222222218, + "loss": 3.3208, + "step": 800 + }, + { + "epoch": 28.01, + "learning_rate": 0.00019444444444444443, + "loss": 3.2586, + "step": 850 + }, + { + "epoch": 29.02, + "learning_rate": 0.00016666666666666666, + "loss": 3.179, + "step": 900 + }, + { + "epoch": 31.02, + "learning_rate": 0.0001388888888888889, + "loss": 3.1549, + "step": 950 + }, + { + "epoch": 33.01, + "learning_rate": 0.00011111111111111109, + "loss": 3.109, + "step": 1000 + }, + { + "epoch": 33.01, + "eval_loss": 3.771193027496338, + "eval_runtime": 2.3153, + "eval_samples_per_second": 51.828, + "eval_steps_per_second": 3.455, + "step": 1000 + }, + { + "epoch": 33.01, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_loss": 3.771193027496338, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_ppl": 43.43184933001428, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_runtime": 2.3153, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_samples_per_second": 51.828, + "step": 1000 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.70701680656384e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1000/training_args.bin b/checkpoint-1000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..938bd31b67d4eb91525b5f5b946519126a3331c7 --- /dev/null +++ b/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5ee64ec673c5915c725d59ab09d5567c896e983bbaa61ee563d03df6350d2f +size 3183 diff --git a/checkpoint-1100/config.json b/checkpoint-1100/config.json new file mode 100644 index 0000000000000000000000000000000000000000..010a39070324d45f3a6fa1fe9997539307c8f44b --- /dev/null +++ b/checkpoint-1100/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 64000 +} diff --git a/checkpoint-1100/optimizer.pt b/checkpoint-1100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b3a60e95b5b382f19d9e3d866079ab553967d18d --- /dev/null +++ b/checkpoint-1100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07d50cb83c99564e1e5d3175eea21dacb67ba376d216c36c391dd2180aa4f075 +size 1080041009 diff --git a/checkpoint-1100/pytorch_model.bin b/checkpoint-1100/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..a38dcfc99310c6a462a36fd2d69123784148c625 --- /dev/null +++ b/checkpoint-1100/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:467d06af9b109ba6e6d6491715b596472f2392acad5ef29a5dd5227f028ea538 +size 552615017 diff --git a/checkpoint-1100/rng_state.pth b/checkpoint-1100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a1837dd3329a6dd7865a960c2f9c61fcd57e613f --- /dev/null +++ b/checkpoint-1100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e93443d06884eb77ceedb13ea088d03d6d30a18ae355271b718160fb29419aa5 +size 14567 diff --git a/checkpoint-1100/scaler.pt b/checkpoint-1100/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..37bf049fbd5fd721203bf0238edc8ff67dbd8f94 --- /dev/null +++ b/checkpoint-1100/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fb16c30b686aa43e110b0d33f9d46bf3127b7124542ca8dc34831233d4675a0 +size 559 diff --git a/checkpoint-1100/scheduler.pt b/checkpoint-1100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c158169ec0bb09620952d544fd4b4edea0cc9cf4 --- /dev/null +++ b/checkpoint-1100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f1b7713e4bb40428f29080b7d08d4a52f779ac863737861e4724292b2cf6c59 +size 623 diff --git a/checkpoint-1100/special_tokens_map.json b/checkpoint-1100/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-1100/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-1100/tokenizer_config.json b/checkpoint-1100/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-1100/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-1100/trainer_state.json b/checkpoint-1100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7a04f0cc3e47f8afaf092b563b5abd4f65aa4b63 --- /dev/null +++ b/checkpoint-1100/trainer_state.json @@ -0,0 +1,170 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 36.016666666666666, + "global_step": 1100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.1049, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1845, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 5.9529, + "step": 100 + }, + { + "epoch": 4.03, + "learning_rate": 0.0005833333333333333, + "loss": 5.1965, + "step": 150 + }, + { + "epoch": 6.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.8925, + "step": 200 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005277777777777777, + "loss": 4.7182, + "step": 250 + }, + { + "epoch": 9.03, + "learning_rate": 0.0005, + "loss": 4.561, + "step": 300 + }, + { + "epoch": 11.02, + "learning_rate": 0.00047222222222222224, + "loss": 4.4282, + "step": 350 + }, + { + "epoch": 13.01, + "learning_rate": 0.00044444444444444436, + "loss": 4.2476, + "step": 400 + }, + { + "epoch": 14.03, + "learning_rate": 0.00041666666666666664, + "loss": 4.0259, + "step": 450 + }, + { + "epoch": 16.02, + "learning_rate": 0.00038888888888888887, + "loss": 3.8942, + "step": 500 + }, + { + "epoch": 18.01, + "learning_rate": 0.0003611111111111111, + "loss": 3.7583, + "step": 550 + }, + { + "epoch": 19.02, + "learning_rate": 0.0003333333333333333, + "loss": 3.6224, + "step": 600 + }, + { + "epoch": 21.02, + "learning_rate": 0.00030555555555555555, + "loss": 3.55, + "step": 650 + }, + { + "epoch": 23.01, + "learning_rate": 0.0002777777777777778, + "loss": 3.4646, + "step": 700 + }, + { + "epoch": 24.02, + "learning_rate": 0.00025, + "loss": 3.3628, + "step": 750 + }, + { + "epoch": 26.02, + "learning_rate": 0.00022222222222222218, + "loss": 3.3208, + "step": 800 + }, + { + "epoch": 28.01, + "learning_rate": 0.00019444444444444443, + "loss": 3.2586, + "step": 850 + }, + { + "epoch": 29.02, + "learning_rate": 0.00016666666666666666, + "loss": 3.179, + "step": 900 + }, + { + "epoch": 31.02, + "learning_rate": 0.0001388888888888889, + "loss": 3.1549, + "step": 950 + }, + { + "epoch": 33.01, + "learning_rate": 0.00011111111111111109, + "loss": 3.109, + "step": 1000 + }, + { + "epoch": 33.01, + "eval_loss": 3.771193027496338, + "eval_runtime": 2.3153, + "eval_samples_per_second": 51.828, + "eval_steps_per_second": 3.455, + "step": 1000 + }, + { + "epoch": 33.01, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_loss": 3.771193027496338, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_ppl": 43.43184933001428, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_runtime": 2.3153, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_samples_per_second": 51.828, + "step": 1000 + }, + { + "epoch": 34.02, + "learning_rate": 8.333333333333333e-05, + "loss": 3.0475, + "step": 1050 + }, + { + "epoch": 36.02, + "learning_rate": 5.5555555555555545e-05, + "loss": 3.0353, + "step": 1100 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.97743315632128e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1100/training_args.bin b/checkpoint-1100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..938bd31b67d4eb91525b5f5b946519126a3331c7 --- /dev/null +++ b/checkpoint-1100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5ee64ec673c5915c725d59ab09d5567c896e983bbaa61ee563d03df6350d2f +size 3183 diff --git a/checkpoint-1200/config.json b/checkpoint-1200/config.json new file mode 100644 index 0000000000000000000000000000000000000000..010a39070324d45f3a6fa1fe9997539307c8f44b --- /dev/null +++ b/checkpoint-1200/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 64000 +} diff --git a/checkpoint-1200/optimizer.pt b/checkpoint-1200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6350cd5e3d6f3a7f261c864fd3a70c7200cf2f92 --- /dev/null +++ b/checkpoint-1200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c77ebd7b3e810bc9abfed02f11a55b5c7c56c2f4218b2d712cda53ac3f984ab +size 1080041009 diff --git a/checkpoint-1200/pytorch_model.bin b/checkpoint-1200/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd1ed8166e639c1aa03e2f6d4738d036a1abc54f --- /dev/null +++ b/checkpoint-1200/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d481dd90e0112c7b86510b69821042c684cc4f5d27820394af11badd3314e8a +size 552615017 diff --git a/checkpoint-1200/rng_state.pth b/checkpoint-1200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8d0270cf0096a75cba1ca4bef9d344eeb0855098 --- /dev/null +++ b/checkpoint-1200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f9ebfe865a5282871ec45b1fd3fdc036c41e34c0448643d1196546f03780d0e +size 14567 diff --git a/checkpoint-1200/scaler.pt b/checkpoint-1200/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8953dddccbefc4703c09dcda27d83c15add2bade --- /dev/null +++ b/checkpoint-1200/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19c7277eaca0850ae3e9b6790b3d002d820169cce0671185e672c28c8ae8e056 +size 559 diff --git a/checkpoint-1200/scheduler.pt b/checkpoint-1200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..310d39c17fc616a9c83286ed00f0f4cefba9f5df --- /dev/null +++ b/checkpoint-1200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:935a8fb09a6e9698d9894853b05e181b3f56098deaaecddde08e55f06bf000c4 +size 623 diff --git a/checkpoint-1200/special_tokens_map.json b/checkpoint-1200/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-1200/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-1200/tokenizer_config.json b/checkpoint-1200/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-1200/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-1200/trainer_state.json b/checkpoint-1200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f4edac425d24b11f23ed7c32e17d274adcc69236 --- /dev/null +++ b/checkpoint-1200/trainer_state.json @@ -0,0 +1,182 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 39.025, + "global_step": 1200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.1049, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1845, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 5.9529, + "step": 100 + }, + { + "epoch": 4.03, + "learning_rate": 0.0005833333333333333, + "loss": 5.1965, + "step": 150 + }, + { + "epoch": 6.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.8925, + "step": 200 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005277777777777777, + "loss": 4.7182, + "step": 250 + }, + { + "epoch": 9.03, + "learning_rate": 0.0005, + "loss": 4.561, + "step": 300 + }, + { + "epoch": 11.02, + "learning_rate": 0.00047222222222222224, + "loss": 4.4282, + "step": 350 + }, + { + "epoch": 13.01, + "learning_rate": 0.00044444444444444436, + "loss": 4.2476, + "step": 400 + }, + { + "epoch": 14.03, + "learning_rate": 0.00041666666666666664, + "loss": 4.0259, + "step": 450 + }, + { + "epoch": 16.02, + "learning_rate": 0.00038888888888888887, + "loss": 3.8942, + "step": 500 + }, + { + "epoch": 18.01, + "learning_rate": 0.0003611111111111111, + "loss": 3.7583, + "step": 550 + }, + { + "epoch": 19.02, + "learning_rate": 0.0003333333333333333, + "loss": 3.6224, + "step": 600 + }, + { + "epoch": 21.02, + "learning_rate": 0.00030555555555555555, + "loss": 3.55, + "step": 650 + }, + { + "epoch": 23.01, + "learning_rate": 0.0002777777777777778, + "loss": 3.4646, + "step": 700 + }, + { + "epoch": 24.02, + "learning_rate": 0.00025, + "loss": 3.3628, + "step": 750 + }, + { + "epoch": 26.02, + "learning_rate": 0.00022222222222222218, + "loss": 3.3208, + "step": 800 + }, + { + "epoch": 28.01, + "learning_rate": 0.00019444444444444443, + "loss": 3.2586, + "step": 850 + }, + { + "epoch": 29.02, + "learning_rate": 0.00016666666666666666, + "loss": 3.179, + "step": 900 + }, + { + "epoch": 31.02, + "learning_rate": 0.0001388888888888889, + "loss": 3.1549, + "step": 950 + }, + { + "epoch": 33.01, + "learning_rate": 0.00011111111111111109, + "loss": 3.109, + "step": 1000 + }, + { + "epoch": 33.01, + "eval_loss": 3.771193027496338, + "eval_runtime": 2.3153, + "eval_samples_per_second": 51.828, + "eval_steps_per_second": 3.455, + "step": 1000 + }, + { + "epoch": 33.01, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_loss": 3.771193027496338, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_ppl": 43.43184933001428, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_runtime": 2.3153, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_samples_per_second": 51.828, + "step": 1000 + }, + { + "epoch": 34.02, + "learning_rate": 8.333333333333333e-05, + "loss": 3.0475, + "step": 1050 + }, + { + "epoch": 36.02, + "learning_rate": 5.5555555555555545e-05, + "loss": 3.0353, + "step": 1100 + }, + { + "epoch": 38.01, + "learning_rate": 2.7777777777777772e-05, + "loss": 3.0056, + "step": 1150 + }, + { + "epoch": 39.02, + "learning_rate": 0.0, + "loss": 2.9624, + "step": 1200 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 3.24784950607872e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1200/training_args.bin b/checkpoint-1200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..938bd31b67d4eb91525b5f5b946519126a3331c7 --- /dev/null +++ b/checkpoint-1200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5ee64ec673c5915c725d59ab09d5567c896e983bbaa61ee563d03df6350d2f +size 3183 diff --git a/checkpoint-200/config.json b/checkpoint-200/config.json new file mode 100644 index 0000000000000000000000000000000000000000..010a39070324d45f3a6fa1fe9997539307c8f44b --- /dev/null +++ b/checkpoint-200/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 64000 +} diff --git a/checkpoint-200/optimizer.pt b/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2086bef33eaffcd8119f3fd9a664c8cdb7804604 --- /dev/null +++ b/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69570f0d843d941da8491c5d608d1c4f7ed6d0693201944b4b80231337f2fcae +size 1080040817 diff --git a/checkpoint-200/pytorch_model.bin b/checkpoint-200/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..3392c2941993cf34fbba0cea4fbfd49df00191af --- /dev/null +++ b/checkpoint-200/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9f133ee7219637c8594a9cf12dc29a56c6075c91e046268d9fedf14d8308b1a +size 552615017 diff --git a/checkpoint-200/rng_state.pth b/checkpoint-200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..22cd0135ffc7ab85d082fc0f85d3f6d6d27aaac2 --- /dev/null +++ b/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0af37345c427ea9f754fbb6c9c6176b458022161879b0d521e1d1ef82b4fa5d6 +size 14567 diff --git a/checkpoint-200/scaler.pt b/checkpoint-200/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..365b52ebf376498237a843f6d7332e5a49b14902 --- /dev/null +++ b/checkpoint-200/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb6982c29cd162f49aeb531674acf574eccd46a8f556bec596040d7c3b95200a +size 559 diff --git a/checkpoint-200/scheduler.pt b/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..36cb8da739c80a63971a62f06f781c40ac0fceb2 --- /dev/null +++ b/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a992625ada6d884e508ff9392d16738b4a4163147f8fcbf9f46be82ecae9888 +size 623 diff --git a/checkpoint-200/special_tokens_map.json b/checkpoint-200/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-200/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-200/tokenizer_config.json b/checkpoint-200/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-200/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-200/trainer_state.json b/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..03849d596c4799734ca19ffd338e971d21a10ee9 --- /dev/null +++ b/checkpoint-200/trainer_state.json @@ -0,0 +1,46 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 6.016666666666667, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.1049, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1845, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 5.9529, + "step": 100 + }, + { + "epoch": 4.03, + "learning_rate": 0.0005833333333333333, + "loss": 5.1965, + "step": 150 + }, + { + "epoch": 6.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.8925, + "step": 200 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 5.4083269951488e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-200/training_args.bin b/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..938bd31b67d4eb91525b5f5b946519126a3331c7 --- /dev/null +++ b/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5ee64ec673c5915c725d59ab09d5567c896e983bbaa61ee563d03df6350d2f +size 3183 diff --git a/checkpoint-300/config.json b/checkpoint-300/config.json new file mode 100644 index 0000000000000000000000000000000000000000..010a39070324d45f3a6fa1fe9997539307c8f44b --- /dev/null +++ b/checkpoint-300/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 64000 +} diff --git a/checkpoint-300/optimizer.pt b/checkpoint-300/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2962a4b02d67a9aa3e734d77716b8d9d927e357d --- /dev/null +++ b/checkpoint-300/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f4169338abeb5c8ba45e441ce758f63f1a058500d8d087a5d9c91ebc9429f3d +size 1080041009 diff --git a/checkpoint-300/pytorch_model.bin b/checkpoint-300/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..286b73ee7e13e3646ae888991882959f4003e835 --- /dev/null +++ b/checkpoint-300/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af9070f9a05180b6cc42a1b5f42009bb2319d340750890de345c906fd921c11a +size 552615017 diff --git a/checkpoint-300/rng_state.pth b/checkpoint-300/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5d62df0c97c01951947779d84d8109d825124411 --- /dev/null +++ b/checkpoint-300/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:018724a2f440f12b5a7054fcfa8060620bc62de303490f66d8ef4547af89fcf6 +size 14567 diff --git a/checkpoint-300/scaler.pt b/checkpoint-300/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..01066cf4761ea9d2f7962f5181762f7b08690b79 --- /dev/null +++ b/checkpoint-300/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0967b9f865f16344c55f5ccc3cf7d6e8e97ca61dda304e931ca6bad130f48dd1 +size 559 diff --git a/checkpoint-300/scheduler.pt b/checkpoint-300/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..681987f21e79cd4685aac32a5e6b74341e25d936 --- /dev/null +++ b/checkpoint-300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1db899b266916f792a0898ceb27a87eaf76647f10c29cc0c13ce22f12a12efd +size 623 diff --git a/checkpoint-300/special_tokens_map.json b/checkpoint-300/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-300/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-300/tokenizer_config.json b/checkpoint-300/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-300/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-300/trainer_state.json b/checkpoint-300/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..40f021c20a54181e69f74fd85acd931a2b207bab --- /dev/null +++ b/checkpoint-300/trainer_state.json @@ -0,0 +1,58 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 9.025, + "global_step": 300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.1049, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1845, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 5.9529, + "step": 100 + }, + { + "epoch": 4.03, + "learning_rate": 0.0005833333333333333, + "loss": 5.1965, + "step": 150 + }, + { + "epoch": 6.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.8925, + "step": 200 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005277777777777777, + "loss": 4.7182, + "step": 250 + }, + { + "epoch": 9.03, + "learning_rate": 0.0005, + "loss": 4.561, + "step": 300 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 8.1124904927232e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-300/training_args.bin b/checkpoint-300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..938bd31b67d4eb91525b5f5b946519126a3331c7 --- /dev/null +++ b/checkpoint-300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5ee64ec673c5915c725d59ab09d5567c896e983bbaa61ee563d03df6350d2f +size 3183 diff --git a/checkpoint-400/config.json b/checkpoint-400/config.json new file mode 100644 index 0000000000000000000000000000000000000000..010a39070324d45f3a6fa1fe9997539307c8f44b --- /dev/null +++ b/checkpoint-400/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 64000 +} diff --git a/checkpoint-400/optimizer.pt b/checkpoint-400/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0d8f0c8393d523ef8ef4f7f412f78dddcb6149a2 --- /dev/null +++ b/checkpoint-400/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec22280f4d6afdc6eef106ada78d494d6e6eb6b6fed495cba3e4899f4212e49b +size 1080041009 diff --git a/checkpoint-400/pytorch_model.bin b/checkpoint-400/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..aaf4abc648081dfad386f053cee4f077073f761d --- /dev/null +++ b/checkpoint-400/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90addc1aca5ac2c2699f9110de6958cc8bfdd1de0567bd2aa257901acab5ee7c +size 552615017 diff --git a/checkpoint-400/rng_state.pth b/checkpoint-400/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..62ef51cb9defb50587c6e4c5bba1b5d9d11199ce --- /dev/null +++ b/checkpoint-400/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ec55f5ec9cf328f19d01b858dd0ea64f7c89eb0969a3126fea48055b9544033 +size 14567 diff --git a/checkpoint-400/scaler.pt b/checkpoint-400/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9c7aef4199e98d81152810b661dbaffc01963383 --- /dev/null +++ b/checkpoint-400/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:476e510c8ea7edbd2b51d1e76a4e037820a5639381c0d8b5d32dafa492795a1e +size 559 diff --git a/checkpoint-400/scheduler.pt b/checkpoint-400/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..967673cfc91836d239beb5a7ede06992232ba309 --- /dev/null +++ b/checkpoint-400/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db087d678047b5c346bbb8511936612c1fdf223c6fd70321e97369bc31ed76a8 +size 623 diff --git a/checkpoint-400/special_tokens_map.json b/checkpoint-400/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-400/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-400/tokenizer_config.json b/checkpoint-400/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-400/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-400/trainer_state.json b/checkpoint-400/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b7d2ba7a904405bd054fe29dce758326c11bf051 --- /dev/null +++ b/checkpoint-400/trainer_state.json @@ -0,0 +1,70 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 13.008333333333333, + "global_step": 400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.1049, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1845, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 5.9529, + "step": 100 + }, + { + "epoch": 4.03, + "learning_rate": 0.0005833333333333333, + "loss": 5.1965, + "step": 150 + }, + { + "epoch": 6.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.8925, + "step": 200 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005277777777777777, + "loss": 4.7182, + "step": 250 + }, + { + "epoch": 9.03, + "learning_rate": 0.0005, + "loss": 4.561, + "step": 300 + }, + { + "epoch": 11.02, + "learning_rate": 0.00047222222222222224, + "loss": 4.4282, + "step": 350 + }, + { + "epoch": 13.01, + "learning_rate": 0.00044444444444444436, + "loss": 4.2476, + "step": 400 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.08261650202624e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-400/training_args.bin b/checkpoint-400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..938bd31b67d4eb91525b5f5b946519126a3331c7 --- /dev/null +++ b/checkpoint-400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5ee64ec673c5915c725d59ab09d5567c896e983bbaa61ee563d03df6350d2f +size 3183 diff --git a/checkpoint-500/config.json b/checkpoint-500/config.json new file mode 100644 index 0000000000000000000000000000000000000000..010a39070324d45f3a6fa1fe9997539307c8f44b --- /dev/null +++ b/checkpoint-500/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 64000 +} diff --git a/checkpoint-500/optimizer.pt b/checkpoint-500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..27bb197ba1c2240eaabe4d956a6e5716c5744ce9 --- /dev/null +++ b/checkpoint-500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba6cc30d0ad0f036be99dfd213d2bf6786fc7118af704a9bfffc4475173eb5a6 +size 1080041009 diff --git a/checkpoint-500/pytorch_model.bin b/checkpoint-500/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..4d56b995519240e1268b7c77ee52b2a8ed526770 --- /dev/null +++ b/checkpoint-500/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6028c895f6f719a4e0be2d5e6d7e0316de431f5add30d79a1a15bd6266ca25db +size 552615017 diff --git a/checkpoint-500/rng_state.pth b/checkpoint-500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a0d910572a29db8e5ea8e0a0767a27a848526939 --- /dev/null +++ b/checkpoint-500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d508ab4e0e8664cbe63918582aa09e378415f9e99a2d7c2ddabbb1abcd680a1c +size 14567 diff --git a/checkpoint-500/scaler.pt b/checkpoint-500/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e8b96c9c2837f2c95b1d07d4fc3f245f9ad1ef62 --- /dev/null +++ b/checkpoint-500/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fa4c7be44c959599b8b43bb9bc3371e9e4e5bbc5758b3ab5afcccfda3e72e67 +size 559 diff --git a/checkpoint-500/scheduler.pt b/checkpoint-500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..77b958181a5b47b022b96b38a6207f274a1b6604 --- /dev/null +++ b/checkpoint-500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:026fae4a90d56c24de94b10dfa7a75b6ba4e43bd5c1a3fdb2d77356b81cd6f8a +size 623 diff --git a/checkpoint-500/special_tokens_map.json b/checkpoint-500/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-500/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-500/tokenizer_config.json b/checkpoint-500/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-500/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-500/trainer_state.json b/checkpoint-500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..323afd6e051776cd780e5168a057b987f6baae06 --- /dev/null +++ b/checkpoint-500/trainer_state.json @@ -0,0 +1,82 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 16.016666666666666, + "global_step": 500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.1049, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1845, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 5.9529, + "step": 100 + }, + { + "epoch": 4.03, + "learning_rate": 0.0005833333333333333, + "loss": 5.1965, + "step": 150 + }, + { + "epoch": 6.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.8925, + "step": 200 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005277777777777777, + "loss": 4.7182, + "step": 250 + }, + { + "epoch": 9.03, + "learning_rate": 0.0005, + "loss": 4.561, + "step": 300 + }, + { + "epoch": 11.02, + "learning_rate": 0.00047222222222222224, + "loss": 4.4282, + "step": 350 + }, + { + "epoch": 13.01, + "learning_rate": 0.00044444444444444436, + "loss": 4.2476, + "step": 400 + }, + { + "epoch": 14.03, + "learning_rate": 0.00041666666666666664, + "loss": 4.0259, + "step": 450 + }, + { + "epoch": 16.02, + "learning_rate": 0.00038888888888888887, + "loss": 3.8942, + "step": 500 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.35303285178368e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-500/training_args.bin b/checkpoint-500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..938bd31b67d4eb91525b5f5b946519126a3331c7 --- /dev/null +++ b/checkpoint-500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5ee64ec673c5915c725d59ab09d5567c896e983bbaa61ee563d03df6350d2f +size 3183 diff --git a/checkpoint-600/config.json b/checkpoint-600/config.json new file mode 100644 index 0000000000000000000000000000000000000000..010a39070324d45f3a6fa1fe9997539307c8f44b --- /dev/null +++ b/checkpoint-600/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 64000 +} diff --git a/checkpoint-600/optimizer.pt b/checkpoint-600/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..42842e1e1c1429914d2400bc9bd9b4136116f567 --- /dev/null +++ b/checkpoint-600/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7935893c43199ba15d40b8eab4e41b16af47f9d694450f6e1d186fddb39816cc +size 1080041009 diff --git a/checkpoint-600/pytorch_model.bin b/checkpoint-600/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..17115c06d4b70b1d50ca3d3bcb9939f05879b818 --- /dev/null +++ b/checkpoint-600/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c8f409958fcaac3812f47a6e7c1064af8fb40e85e21438fcfb224816d17e5ae +size 552615017 diff --git a/checkpoint-600/rng_state.pth b/checkpoint-600/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..b5463d82b0ab2e953af21d85ddb19e80c79d2897 --- /dev/null +++ b/checkpoint-600/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5ad0d794bfbca1c6d54a71b6f1da0f821f83347b55dbf7f04fac7fa41807803 +size 14567 diff --git a/checkpoint-600/scaler.pt b/checkpoint-600/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f95b3e36da01561ec333a83ee8419ad225633e06 --- /dev/null +++ b/checkpoint-600/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e8415b86bbce347c0df306b84a695add049c2a3b2d0b6f4dda3bf036d341150 +size 559 diff --git a/checkpoint-600/scheduler.pt b/checkpoint-600/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..62e5359badd619e7dad2c51d98fa8043d9948f0b --- /dev/null +++ b/checkpoint-600/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:700940432b1c2117248896e2ce5a58d93c051d92ea97707f74d76bf1ef24deee +size 623 diff --git a/checkpoint-600/special_tokens_map.json b/checkpoint-600/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-600/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-600/tokenizer_config.json b/checkpoint-600/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-600/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-600/trainer_state.json b/checkpoint-600/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2abaa85cdd3ea407b94a983927b17d6b8132e266 --- /dev/null +++ b/checkpoint-600/trainer_state.json @@ -0,0 +1,94 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 19.025, + "global_step": 600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.1049, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1845, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 5.9529, + "step": 100 + }, + { + "epoch": 4.03, + "learning_rate": 0.0005833333333333333, + "loss": 5.1965, + "step": 150 + }, + { + "epoch": 6.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.8925, + "step": 200 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005277777777777777, + "loss": 4.7182, + "step": 250 + }, + { + "epoch": 9.03, + "learning_rate": 0.0005, + "loss": 4.561, + "step": 300 + }, + { + "epoch": 11.02, + "learning_rate": 0.00047222222222222224, + "loss": 4.4282, + "step": 350 + }, + { + "epoch": 13.01, + "learning_rate": 0.00044444444444444436, + "loss": 4.2476, + "step": 400 + }, + { + "epoch": 14.03, + "learning_rate": 0.00041666666666666664, + "loss": 4.0259, + "step": 450 + }, + { + "epoch": 16.02, + "learning_rate": 0.00038888888888888887, + "loss": 3.8942, + "step": 500 + }, + { + "epoch": 18.01, + "learning_rate": 0.0003611111111111111, + "loss": 3.7583, + "step": 550 + }, + { + "epoch": 19.02, + "learning_rate": 0.0003333333333333333, + "loss": 3.6224, + "step": 600 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.62344920154112e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-600/training_args.bin b/checkpoint-600/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..938bd31b67d4eb91525b5f5b946519126a3331c7 --- /dev/null +++ b/checkpoint-600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5ee64ec673c5915c725d59ab09d5567c896e983bbaa61ee563d03df6350d2f +size 3183 diff --git a/checkpoint-700/config.json b/checkpoint-700/config.json new file mode 100644 index 0000000000000000000000000000000000000000..010a39070324d45f3a6fa1fe9997539307c8f44b --- /dev/null +++ b/checkpoint-700/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 64000 +} diff --git a/checkpoint-700/optimizer.pt b/checkpoint-700/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f48f1f29c572664007896cf7c75c20c9aeeb1a70 --- /dev/null +++ b/checkpoint-700/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b23383a23155a444d82226704b6a1164ea792940ad769ab4a3acee590537531 +size 1080041009 diff --git a/checkpoint-700/pytorch_model.bin b/checkpoint-700/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..add6b9d48db6cfad4472d61b7aebf2162eeb2f3d --- /dev/null +++ b/checkpoint-700/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ee32adb55a1c21fc9d0bae56d470d156e46b22d14ea188876b63c1bf1dd7018 +size 552615017 diff --git a/checkpoint-700/rng_state.pth b/checkpoint-700/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..4b24352b05f808f03eaafcdecf2084b32c8d212e --- /dev/null +++ b/checkpoint-700/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bfed0babf076b0350b84ee3f036a298bcfc04460a53b3e4edd201918d89c316 +size 14567 diff --git a/checkpoint-700/scaler.pt b/checkpoint-700/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..12f2b8ec834e54a2bd7cfdd0e07b0c6e125b6490 --- /dev/null +++ b/checkpoint-700/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fb213daf5cce18a5f92167ca14da9df084d907f2b9796efc4666630f312b58c +size 559 diff --git a/checkpoint-700/scheduler.pt b/checkpoint-700/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..42408b6253265af34ae78746144fbba9316e0d7e --- /dev/null +++ b/checkpoint-700/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2d4910fd408e002ebeff50d62bfb043dcae5ef658777d0c3ee4a3bbb515ec15 +size 623 diff --git a/checkpoint-700/special_tokens_map.json b/checkpoint-700/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-700/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-700/tokenizer_config.json b/checkpoint-700/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-700/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-700/trainer_state.json b/checkpoint-700/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..70ee114f0119e2fd6a520f684b8d3282a3e185cb --- /dev/null +++ b/checkpoint-700/trainer_state.json @@ -0,0 +1,106 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 23.008333333333333, + "global_step": 700, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.1049, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1845, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 5.9529, + "step": 100 + }, + { + "epoch": 4.03, + "learning_rate": 0.0005833333333333333, + "loss": 5.1965, + "step": 150 + }, + { + "epoch": 6.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.8925, + "step": 200 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005277777777777777, + "loss": 4.7182, + "step": 250 + }, + { + "epoch": 9.03, + "learning_rate": 0.0005, + "loss": 4.561, + "step": 300 + }, + { + "epoch": 11.02, + "learning_rate": 0.00047222222222222224, + "loss": 4.4282, + "step": 350 + }, + { + "epoch": 13.01, + "learning_rate": 0.00044444444444444436, + "loss": 4.2476, + "step": 400 + }, + { + "epoch": 14.03, + "learning_rate": 0.00041666666666666664, + "loss": 4.0259, + "step": 450 + }, + { + "epoch": 16.02, + "learning_rate": 0.00038888888888888887, + "loss": 3.8942, + "step": 500 + }, + { + "epoch": 18.01, + "learning_rate": 0.0003611111111111111, + "loss": 3.7583, + "step": 550 + }, + { + "epoch": 19.02, + "learning_rate": 0.0003333333333333333, + "loss": 3.6224, + "step": 600 + }, + { + "epoch": 21.02, + "learning_rate": 0.00030555555555555555, + "loss": 3.55, + "step": 650 + }, + { + "epoch": 23.01, + "learning_rate": 0.0002777777777777778, + "loss": 3.4646, + "step": 700 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.89481665429504e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-700/training_args.bin b/checkpoint-700/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..938bd31b67d4eb91525b5f5b946519126a3331c7 --- /dev/null +++ b/checkpoint-700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5ee64ec673c5915c725d59ab09d5567c896e983bbaa61ee563d03df6350d2f +size 3183 diff --git a/checkpoint-800/config.json b/checkpoint-800/config.json new file mode 100644 index 0000000000000000000000000000000000000000..010a39070324d45f3a6fa1fe9997539307c8f44b --- /dev/null +++ b/checkpoint-800/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 64000 +} diff --git a/checkpoint-800/optimizer.pt b/checkpoint-800/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..06f5a734744a59ce733d54ba8a0c86ae33f9c327 --- /dev/null +++ b/checkpoint-800/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d1ce6599d35f561e0e5374ba43b5e33cc85db75818ec8cd6b20abd7395859cf +size 1080041009 diff --git a/checkpoint-800/pytorch_model.bin b/checkpoint-800/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..6cb41f3ce0d07231d41aba48f68d953284a16f46 --- /dev/null +++ b/checkpoint-800/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c953ceea7f02656fc9db258867126dae03fbe5c386a223666fa8e2c72c191a5 +size 552615017 diff --git a/checkpoint-800/rng_state.pth b/checkpoint-800/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..425cb1b10425483d71a381de50e4aeffcdbab7c1 --- /dev/null +++ b/checkpoint-800/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f1a7344f6a17d8035cd2eaa557cfecaaa4b61edba5a29de025c4ced841e58f2 +size 14567 diff --git a/checkpoint-800/scaler.pt b/checkpoint-800/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b3c73372264156b02df8dada2192ee3c96dd5fc4 --- /dev/null +++ b/checkpoint-800/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c2074cdcefbaa0a39f736d6b0f7bf018c350d49e85648bc8accc4f756ad816e +size 559 diff --git a/checkpoint-800/scheduler.pt b/checkpoint-800/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..75713dbce3771306ca00343ecc497c4f19a01d03 --- /dev/null +++ b/checkpoint-800/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b27fb255c84833fb6ab5d93679cb236a569de9a1c4f805f72a2f60a2bc7c7499 +size 623 diff --git a/checkpoint-800/special_tokens_map.json b/checkpoint-800/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-800/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-800/tokenizer_config.json b/checkpoint-800/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-800/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-800/trainer_state.json b/checkpoint-800/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..efa79faa80e4abd2ed2692c00f04b4b358c3e930 --- /dev/null +++ b/checkpoint-800/trainer_state.json @@ -0,0 +1,118 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 26.016666666666666, + "global_step": 800, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.1049, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1845, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 5.9529, + "step": 100 + }, + { + "epoch": 4.03, + "learning_rate": 0.0005833333333333333, + "loss": 5.1965, + "step": 150 + }, + { + "epoch": 6.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.8925, + "step": 200 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005277777777777777, + "loss": 4.7182, + "step": 250 + }, + { + "epoch": 9.03, + "learning_rate": 0.0005, + "loss": 4.561, + "step": 300 + }, + { + "epoch": 11.02, + "learning_rate": 0.00047222222222222224, + "loss": 4.4282, + "step": 350 + }, + { + "epoch": 13.01, + "learning_rate": 0.00044444444444444436, + "loss": 4.2476, + "step": 400 + }, + { + "epoch": 14.03, + "learning_rate": 0.00041666666666666664, + "loss": 4.0259, + "step": 450 + }, + { + "epoch": 16.02, + "learning_rate": 0.00038888888888888887, + "loss": 3.8942, + "step": 500 + }, + { + "epoch": 18.01, + "learning_rate": 0.0003611111111111111, + "loss": 3.7583, + "step": 550 + }, + { + "epoch": 19.02, + "learning_rate": 0.0003333333333333333, + "loss": 3.6224, + "step": 600 + }, + { + "epoch": 21.02, + "learning_rate": 0.00030555555555555555, + "loss": 3.55, + "step": 650 + }, + { + "epoch": 23.01, + "learning_rate": 0.0002777777777777778, + "loss": 3.4646, + "step": 700 + }, + { + "epoch": 24.02, + "learning_rate": 0.00025, + "loss": 3.3628, + "step": 750 + }, + { + "epoch": 26.02, + "learning_rate": 0.00022222222222222218, + "loss": 3.3208, + "step": 800 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.16523300405248e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-800/training_args.bin b/checkpoint-800/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..938bd31b67d4eb91525b5f5b946519126a3331c7 --- /dev/null +++ b/checkpoint-800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5ee64ec673c5915c725d59ab09d5567c896e983bbaa61ee563d03df6350d2f +size 3183 diff --git a/checkpoint-900/config.json b/checkpoint-900/config.json new file mode 100644 index 0000000000000000000000000000000000000000..010a39070324d45f3a6fa1fe9997539307c8f44b --- /dev/null +++ b/checkpoint-900/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 64000 +} diff --git a/checkpoint-900/optimizer.pt b/checkpoint-900/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ed3c36f5a502340f5c52e87a2968c43136ae35fa --- /dev/null +++ b/checkpoint-900/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c86827e02ea49bb0429c0c5272644637f795890b27cdd8995a2e2847ca0310d +size 1080041009 diff --git a/checkpoint-900/pytorch_model.bin b/checkpoint-900/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..f2619ea7a83ae3881319f43531f99f4f5c44cb87 --- /dev/null +++ b/checkpoint-900/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df20ef365a0896b40d0ccf8a290563751cb9b669ffacecfe7b2993c989f1c0f9 +size 552615017 diff --git a/checkpoint-900/rng_state.pth b/checkpoint-900/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ca844e93e388c4d0dee3f17dfdc4829ed8422e3b --- /dev/null +++ b/checkpoint-900/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1bf272e1fa66e5dfbfef265fee76d54c70d1c76ca329871eaff37cb1bc2f404 +size 14567 diff --git a/checkpoint-900/scaler.pt b/checkpoint-900/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..be54cb13c777bc6feccb478ff218e7e21fad482a --- /dev/null +++ b/checkpoint-900/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8695f57df923e22b943b0b0f2b9cc7007008e80b53ccee275b3a35963fe67e9 +size 559 diff --git a/checkpoint-900/scheduler.pt b/checkpoint-900/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..539d7c83ea252818dc9cbffac08cf340bb05a454 --- /dev/null +++ b/checkpoint-900/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2cb09e5db72772a15094286e93cbb61d745d9b63863703cf53da0bcb9827821 +size 623 diff --git a/checkpoint-900/special_tokens_map.json b/checkpoint-900/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-900/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-900/tokenizer_config.json b/checkpoint-900/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-900/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-900/trainer_state.json b/checkpoint-900/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..96a9924f497cf8269ea82388fc4873c6483bbd58 --- /dev/null +++ b/checkpoint-900/trainer_state.json @@ -0,0 +1,130 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 29.025, + "global_step": 900, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.1049, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1845, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 5.9529, + "step": 100 + }, + { + "epoch": 4.03, + "learning_rate": 0.0005833333333333333, + "loss": 5.1965, + "step": 150 + }, + { + "epoch": 6.02, + "learning_rate": 0.0005555555555555556, + "loss": 4.8925, + "step": 200 + }, + { + "epoch": 8.01, + "learning_rate": 0.0005277777777777777, + "loss": 4.7182, + "step": 250 + }, + { + "epoch": 9.03, + "learning_rate": 0.0005, + "loss": 4.561, + "step": 300 + }, + { + "epoch": 11.02, + "learning_rate": 0.00047222222222222224, + "loss": 4.4282, + "step": 350 + }, + { + "epoch": 13.01, + "learning_rate": 0.00044444444444444436, + "loss": 4.2476, + "step": 400 + }, + { + "epoch": 14.03, + "learning_rate": 0.00041666666666666664, + "loss": 4.0259, + "step": 450 + }, + { + "epoch": 16.02, + "learning_rate": 0.00038888888888888887, + "loss": 3.8942, + "step": 500 + }, + { + "epoch": 18.01, + "learning_rate": 0.0003611111111111111, + "loss": 3.7583, + "step": 550 + }, + { + "epoch": 19.02, + "learning_rate": 0.0003333333333333333, + "loss": 3.6224, + "step": 600 + }, + { + "epoch": 21.02, + "learning_rate": 0.00030555555555555555, + "loss": 3.55, + "step": 650 + }, + { + "epoch": 23.01, + "learning_rate": 0.0002777777777777778, + "loss": 3.4646, + "step": 700 + }, + { + "epoch": 24.02, + "learning_rate": 0.00025, + "loss": 3.3628, + "step": 750 + }, + { + "epoch": 26.02, + "learning_rate": 0.00022222222222222218, + "loss": 3.3208, + "step": 800 + }, + { + "epoch": 28.01, + "learning_rate": 0.00019444444444444443, + "loss": 3.2586, + "step": 850 + }, + { + "epoch": 29.02, + "learning_rate": 0.00016666666666666666, + "loss": 3.179, + "step": 900 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.43564935380992e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-900/training_args.bin b/checkpoint-900/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..938bd31b67d4eb91525b5f5b946519126a3331c7 --- /dev/null +++ b/checkpoint-900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5ee64ec673c5915c725d59ab09d5567c896e983bbaa61ee563d03df6350d2f +size 3183 diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..010a39070324d45f3a6fa1fe9997539307c8f44b --- /dev/null +++ b/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 64000 +} diff --git a/metrics.json b/metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..ff4164a7c7d0725d0ae7214b1bd14267e794f9c0 --- /dev/null +++ b/metrics.json @@ -0,0 +1,2500 @@ +{"num_parameters": 134994432, "trainable_parameters": 134994432, "step": 0} +{"train_info/time_between_train_steps": 4.106205224990845, "step": 0} +{"info/global_step": 1, "train_info/time_within_train_step": 55.638500690460205, "step": 1} +{"train_info": {"train_info/memory_allocated": 2082.109375, "train_info/memory_max_allocated": 20933.1708984375, "train_info/memory_reserved": 24596.0, "train_info/memory_max_reserved": 24596.0, "_timestamp": 1733956679, "_runtime": 64}, "step": 1} +{"logs": {"train/loss": 11.1049, "train/learning_rate": 4.9999999999999996e-06, "train/epoch": 0.0, "_timestamp": 1733956679, "_runtime": 64}, "step": 1} +{"train_info/time_between_train_steps": 0.015156030654907227, "step": 1} +{"info/global_step": 2, "train_info/time_within_train_step": 44.155834913253784, "step": 2} +{"train_info/time_between_train_steps": 0.006728410720825195, "step": 2} +{"info/global_step": 3, "train_info/time_within_train_step": 27.749959468841553, "step": 3} +{"train_info/time_between_train_steps": 0.0069539546966552734, "step": 3} +{"info/global_step": 4, "train_info/time_within_train_step": 27.94842839241028, "step": 4} +{"train_info/time_between_train_steps": 0.007470369338989258, "step": 4} +{"info/global_step": 5, "train_info/time_within_train_step": 27.790029764175415, "step": 5} +{"train_info/time_between_train_steps": 0.00843501091003418, "step": 5} +{"info/global_step": 6, "train_info/time_within_train_step": 27.957459449768066, "step": 6} +{"train_info/time_between_train_steps": 0.0063550472259521484, "step": 6} +{"info/global_step": 7, "train_info/time_within_train_step": 27.897032260894775, "step": 7} +{"train_info/time_between_train_steps": 0.006489753723144531, "step": 7} +{"info/global_step": 8, "train_info/time_within_train_step": 27.913813591003418, "step": 8} +{"train_info/time_between_train_steps": 0.00612950325012207, "step": 8} +{"info/global_step": 9, "train_info/time_within_train_step": 27.691263437271118, "step": 9} +{"train_info/time_between_train_steps": 0.006307840347290039, "step": 9} +{"info/global_step": 10, "train_info/time_within_train_step": 27.922094345092773, "step": 10} +{"train_info/time_between_train_steps": 0.005937814712524414, "step": 10} +{"info/global_step": 11, "train_info/time_within_train_step": 27.69540524482727, "step": 11} +{"train_info/time_between_train_steps": 0.005867719650268555, "step": 11} +{"info/global_step": 12, "train_info/time_within_train_step": 27.69241976737976, "step": 12} +{"train_info/time_between_train_steps": 0.0063016414642333984, "step": 12} +{"info/global_step": 13, "train_info/time_within_train_step": 27.844805240631104, "step": 13} +{"train_info/time_between_train_steps": 0.006446361541748047, "step": 13} +{"info/global_step": 14, "train_info/time_within_train_step": 27.80065655708313, "step": 14} +{"train_info/time_between_train_steps": 0.007195472717285156, "step": 14} +{"info/global_step": 15, "train_info/time_within_train_step": 27.85075283050537, "step": 15} +{"train_info/time_between_train_steps": 0.006064176559448242, "step": 15} +{"info/global_step": 16, "train_info/time_within_train_step": 27.91296935081482, "step": 16} +{"train_info/time_between_train_steps": 0.005666494369506836, "step": 16} +{"info/global_step": 17, "train_info/time_within_train_step": 27.79947066307068, "step": 17} +{"train_info/time_between_train_steps": 0.005967378616333008, "step": 17} +{"info/global_step": 18, "train_info/time_within_train_step": 27.9170823097229, "step": 18} +{"train_info/time_between_train_steps": 0.015068769454956055, "step": 18} +{"info/global_step": 19, "train_info/time_within_train_step": 27.904056549072266, "step": 19} +{"train_info/time_between_train_steps": 0.005846261978149414, "step": 19} +{"info/global_step": 20, "train_info/time_within_train_step": 27.790440320968628, "step": 20} +{"train_info/time_between_train_steps": 0.013386011123657227, "step": 20} +{"info/global_step": 21, "train_info/time_within_train_step": 27.76068091392517, "step": 21} +{"train_info/time_between_train_steps": 0.0061304569244384766, "step": 21} +{"info/global_step": 22, "train_info/time_within_train_step": 27.698904037475586, "step": 22} +{"train_info/time_between_train_steps": 0.011159896850585938, "step": 22} +{"info/global_step": 23, "train_info/time_within_train_step": 27.75078296661377, "step": 23} +{"train_info/time_between_train_steps": 0.011287689208984375, "step": 23} +{"info/global_step": 24, "train_info/time_within_train_step": 27.832661390304565, "step": 24} +{"train_info/time_between_train_steps": 0.010951042175292969, "step": 24} +{"info/global_step": 25, "train_info/time_within_train_step": 27.743431091308594, "step": 25} +{"train_info/time_between_train_steps": 0.012615203857421875, "step": 25} +{"info/global_step": 26, "train_info/time_within_train_step": 27.741028547286987, "step": 26} +{"train_info/time_between_train_steps": 0.010920286178588867, "step": 26} +{"info/global_step": 27, "train_info/time_within_train_step": 27.809218883514404, "step": 27} +{"train_info/time_between_train_steps": 0.015062570571899414, "step": 27} +{"info/global_step": 28, "train_info/time_within_train_step": 27.674880981445312, "step": 28} +{"train_info/time_between_train_steps": 0.0064237117767333984, "step": 28} +{"info/global_step": 29, "train_info/time_within_train_step": 27.807714223861694, "step": 29} +{"train_info/time_between_train_steps": 0.006396293640136719, "step": 29} +{"info/global_step": 30, "train_info/time_within_train_step": 27.718828439712524, "step": 30} +{"train_info/time_between_train_steps": 0.007136344909667969, "step": 30} +{"train_info/time_between_train_steps": 13.546359539031982, "step": 30} +{"info/global_step": 31, "train_info/time_within_train_step": 27.901495695114136, "step": 31} +{"train_info/time_between_train_steps": 0.006453752517700195, "step": 31} +{"info/global_step": 32, "train_info/time_within_train_step": 27.99795174598694, "step": 32} +{"train_info/time_between_train_steps": 0.013457298278808594, "step": 32} +{"info/global_step": 33, "train_info/time_within_train_step": 27.78549838066101, "step": 33} +{"train_info/time_between_train_steps": 0.0066030025482177734, "step": 33} +{"info/global_step": 34, "train_info/time_within_train_step": 28.078301906585693, "step": 34} +{"train_info/time_between_train_steps": 0.010805606842041016, "step": 34} +{"info/global_step": 35, "train_info/time_within_train_step": 27.838693380355835, "step": 35} +{"train_info/time_between_train_steps": 0.006285667419433594, "step": 35} +{"info/global_step": 36, "train_info/time_within_train_step": 27.80355668067932, "step": 36} +{"train_info/time_between_train_steps": 0.0054988861083984375, "step": 36} +{"info/global_step": 37, "train_info/time_within_train_step": 27.616259336471558, "step": 37} +{"train_info/time_between_train_steps": 0.005904436111450195, "step": 37} +{"info/global_step": 38, "train_info/time_within_train_step": 27.783040046691895, "step": 38} +{"train_info/time_between_train_steps": 0.0058748722076416016, "step": 38} +{"info/global_step": 39, "train_info/time_within_train_step": 27.60239577293396, "step": 39} +{"train_info/time_between_train_steps": 0.00578761100769043, "step": 39} +{"info/global_step": 40, "train_info/time_within_train_step": 27.796000719070435, "step": 40} +{"train_info/time_between_train_steps": 0.0060193538665771484, "step": 40} +{"info/global_step": 41, "train_info/time_within_train_step": 27.97760033607483, "step": 41} +{"train_info/time_between_train_steps": 0.005834817886352539, "step": 41} +{"info/global_step": 42, "train_info/time_within_train_step": 27.87429666519165, "step": 42} +{"train_info/time_between_train_steps": 0.010584831237792969, "step": 42} +{"info/global_step": 43, "train_info/time_within_train_step": 27.754517316818237, "step": 43} +{"train_info/time_between_train_steps": 0.01523280143737793, "step": 43} +{"info/global_step": 44, "train_info/time_within_train_step": 27.675339221954346, "step": 44} +{"train_info/time_between_train_steps": 0.0053632259368896484, "step": 44} +{"info/global_step": 45, "train_info/time_within_train_step": 27.67046093940735, "step": 45} +{"train_info/time_between_train_steps": 0.005659341812133789, "step": 45} +{"info/global_step": 46, "train_info/time_within_train_step": 27.674985885620117, "step": 46} +{"train_info/time_between_train_steps": 0.005831241607666016, "step": 46} +{"info/global_step": 47, "train_info/time_within_train_step": 27.875956296920776, "step": 47} +{"train_info/time_between_train_steps": 0.005751132965087891, "step": 47} +{"info/global_step": 48, "train_info/time_within_train_step": 27.884608030319214, "step": 48} +{"train_info/time_between_train_steps": 0.0056018829345703125, "step": 48} +{"info/global_step": 49, "train_info/time_within_train_step": 27.697052001953125, "step": 49} +{"train_info/time_between_train_steps": 0.0059773921966552734, "step": 49} +{"info/global_step": 50, "train_info/time_within_train_step": 27.700554132461548, "step": 50} +{"train_info": {"train_info/memory_allocated": 2082.109375, "train_info/memory_max_allocated": 21965.9951171875, "train_info/memory_reserved": 24598.0, "train_info/memory_max_reserved": 24598.0, "_timestamp": 1733958072, "_runtime": 1457}, "step": 50} +{"logs": {"train/loss": 8.1845, "train/learning_rate": 0.00025, "train/epoch": 1.02, "_timestamp": 1733958072, "_runtime": 1457}, "step": 50} +{"train_info/time_between_train_steps": 0.012320280075073242, "step": 50} +{"info/global_step": 51, "train_info/time_within_train_step": 27.667256832122803, "step": 51} +{"train_info/time_between_train_steps": 0.01021575927734375, "step": 51} +{"info/global_step": 52, "train_info/time_within_train_step": 27.725027561187744, "step": 52} +{"train_info/time_between_train_steps": 0.010195255279541016, "step": 52} +{"info/global_step": 53, "train_info/time_within_train_step": 27.631553411483765, "step": 53} +{"train_info/time_between_train_steps": 0.0056951045989990234, "step": 53} +{"info/global_step": 54, "train_info/time_within_train_step": 27.768081426620483, "step": 54} +{"train_info/time_between_train_steps": 0.005728721618652344, "step": 54} +{"info/global_step": 55, "train_info/time_within_train_step": 27.747591733932495, "step": 55} +{"train_info/time_between_train_steps": 0.006039142608642578, "step": 55} +{"info/global_step": 56, "train_info/time_within_train_step": 27.805298566818237, "step": 56} +{"train_info/time_between_train_steps": 0.005762338638305664, "step": 56} +{"info/global_step": 57, "train_info/time_within_train_step": 27.780779600143433, "step": 57} +{"train_info/time_between_train_steps": 0.008799314498901367, "step": 57} +{"info/global_step": 58, "train_info/time_within_train_step": 27.70436930656433, "step": 58} +{"train_info/time_between_train_steps": 0.01125478744506836, "step": 58} +{"info/global_step": 59, "train_info/time_within_train_step": 27.89015007019043, "step": 59} +{"train_info/time_between_train_steps": 0.006330728530883789, "step": 59} +{"info/global_step": 60, "train_info/time_within_train_step": 27.70854425430298, "step": 60} +{"train_info/time_between_train_steps": 0.007187604904174805, "step": 60} +{"train_info/time_between_train_steps": 13.70842957496643, "step": 60} +{"info/global_step": 61, "train_info/time_within_train_step": 27.778825759887695, "step": 61} +{"train_info/time_between_train_steps": 0.048720359802246094, "step": 61} +{"info/global_step": 62, "train_info/time_within_train_step": 51.81101989746094, "step": 62} +{"train_info/time_between_train_steps": 0.006165027618408203, "step": 62} +{"info/global_step": 63, "train_info/time_within_train_step": 54.42509365081787, "step": 63} +{"train_info/time_between_train_steps": 0.006384849548339844, "step": 63} +{"info/global_step": 64, "train_info/time_within_train_step": 54.35990834236145, "step": 64} +{"train_info/time_between_train_steps": 0.006142377853393555, "step": 64} +{"info/global_step": 65, "train_info/time_within_train_step": 53.184218883514404, "step": 65} +{"train_info/time_between_train_steps": 0.016016244888305664, "step": 65} +{"info/global_step": 66, "train_info/time_within_train_step": 54.377769231796265, "step": 66} +{"train_info/time_between_train_steps": 0.00587153434753418, "step": 66} +{"info/global_step": 67, "train_info/time_within_train_step": 54.01585578918457, "step": 67} +{"train_info/time_between_train_steps": 0.008272409439086914, "step": 67} +{"info/global_step": 68, "train_info/time_within_train_step": 54.263906478881836, "step": 68} +{"train_info/time_between_train_steps": 0.017701148986816406, "step": 68} +{"info/global_step": 69, "train_info/time_within_train_step": 53.92489409446716, "step": 69} +{"train_info/time_between_train_steps": 0.006424665451049805, "step": 69} +{"info/global_step": 70, "train_info/time_within_train_step": 54.56406617164612, "step": 70} +{"train_info/time_between_train_steps": 0.006624460220336914, "step": 70} +{"info/global_step": 71, "train_info/time_within_train_step": 54.12198829650879, "step": 71} +{"train_info/time_between_train_steps": 0.005784273147583008, "step": 71} +{"info/global_step": 72, "train_info/time_within_train_step": 54.26566815376282, "step": 72} +{"train_info/time_between_train_steps": 0.005818843841552734, "step": 72} +{"info/global_step": 73, "train_info/time_within_train_step": 54.444339990615845, "step": 73} +{"train_info/time_between_train_steps": 0.005994319915771484, "step": 73} +{"info/global_step": 74, "train_info/time_within_train_step": 31.408061504364014, "step": 74} +{"train_info/time_between_train_steps": 0.005980014801025391, "step": 74} +{"info/global_step": 75, "train_info/time_within_train_step": 27.75102162361145, "step": 75} +{"train_info/time_between_train_steps": 0.005534648895263672, "step": 75} +{"info/global_step": 76, "train_info/time_within_train_step": 27.81391406059265, "step": 76} +{"train_info/time_between_train_steps": 0.010278940200805664, "step": 76} +{"info/global_step": 77, "train_info/time_within_train_step": 27.837222814559937, "step": 77} +{"train_info/time_between_train_steps": 0.010209321975708008, "step": 77} +{"info/global_step": 78, "train_info/time_within_train_step": 27.86638355255127, "step": 78} +{"train_info/time_between_train_steps": 0.006301403045654297, "step": 78} +{"info/global_step": 79, "train_info/time_within_train_step": 27.71842646598816, "step": 79} +{"train_info/time_between_train_steps": 0.005865812301635742, "step": 79} +{"info/global_step": 80, "train_info/time_within_train_step": 27.64134645462036, "step": 80} +{"train_info/time_between_train_steps": 0.005696296691894531, "step": 80} +{"info/global_step": 81, "train_info/time_within_train_step": 27.841002941131592, "step": 81} +{"train_info/time_between_train_steps": 0.005906581878662109, "step": 81} +{"info/global_step": 82, "train_info/time_within_train_step": 27.80877184867859, "step": 82} +{"train_info/time_between_train_steps": 0.005754232406616211, "step": 82} +{"info/global_step": 83, "train_info/time_within_train_step": 27.78103518486023, "step": 83} +{"train_info/time_between_train_steps": 0.006067991256713867, "step": 83} +{"info/global_step": 84, "train_info/time_within_train_step": 27.745314121246338, "step": 84} +{"train_info/time_between_train_steps": 0.006033182144165039, "step": 84} +{"info/global_step": 85, "train_info/time_within_train_step": 27.759237051010132, "step": 85} +{"train_info/time_between_train_steps": 0.006045103073120117, "step": 85} +{"info/global_step": 86, "train_info/time_within_train_step": 27.86037015914917, "step": 86} +{"train_info/time_between_train_steps": 0.010587215423583984, "step": 86} +{"info/global_step": 87, "train_info/time_within_train_step": 27.811142683029175, "step": 87} +{"train_info/time_between_train_steps": 0.007851362228393555, "step": 87} +{"info/global_step": 88, "train_info/time_within_train_step": 27.91913414001465, "step": 88} +{"train_info/time_between_train_steps": 0.009026050567626953, "step": 88} +{"info/global_step": 89, "train_info/time_within_train_step": 27.85753607749939, "step": 89} +{"train_info/time_between_train_steps": 0.01610541343688965, "step": 89} +{"info/global_step": 90, "train_info/time_within_train_step": 27.81490421295166, "step": 90} +{"train_info/time_between_train_steps": 0.006623029708862305, "step": 90} +{"train_info/time_between_train_steps": 13.237101316452026, "step": 90} +{"info/global_step": 91, "train_info/time_within_train_step": 27.776098251342773, "step": 91} +{"train_info/time_between_train_steps": 0.0064890384674072266, "step": 91} +{"info/global_step": 92, "train_info/time_within_train_step": 27.912343978881836, "step": 92} +{"train_info/time_between_train_steps": 0.010866403579711914, "step": 92} +{"info/global_step": 93, "train_info/time_within_train_step": 27.92115092277527, "step": 93} +{"train_info/time_between_train_steps": 0.006035804748535156, "step": 93} +{"info/global_step": 94, "train_info/time_within_train_step": 28.062545776367188, "step": 94} +{"train_info/time_between_train_steps": 0.0062673091888427734, "step": 94} +{"info/global_step": 95, "train_info/time_within_train_step": 27.81592059135437, "step": 95} +{"train_info/time_between_train_steps": 0.007888317108154297, "step": 95} +{"info/global_step": 96, "train_info/time_within_train_step": 28.125441789627075, "step": 96} +{"train_info/time_between_train_steps": 0.0175168514251709, "step": 96} +{"info/global_step": 97, "train_info/time_within_train_step": 27.842041730880737, "step": 97} +{"train_info/time_between_train_steps": 0.005887031555175781, "step": 97} +{"info/global_step": 98, "train_info/time_within_train_step": 27.972550630569458, "step": 98} +{"train_info/time_between_train_steps": 0.006175041198730469, "step": 98} +{"info/global_step": 99, "train_info/time_within_train_step": 27.67421269416809, "step": 99} +{"train_info/time_between_train_steps": 0.00634765625, "step": 99} +{"info/global_step": 100, "train_info/time_within_train_step": 27.941592693328857, "step": 100} +{"train_info": {"train_info/memory_allocated": 2082.109375, "train_info/memory_max_allocated": 21965.9951171875, "train_info/memory_reserved": 24598.0, "train_info/memory_max_reserved": 24598.0, "_timestamp": 1733959808, "_runtime": 3193}, "step": 100} +{"logs": {"train/loss": 5.9529, "train/learning_rate": 0.0005, "train/epoch": 3.01, "_timestamp": 1733959808, "_runtime": 3193}, "step": 100} +{"train_info/time_between_train_steps": 3.314655065536499, "step": 100} +{"info/global_step": 101, "train_info/time_within_train_step": 27.69287419319153, "step": 101} +{"train_info/time_between_train_steps": 0.006288766860961914, "step": 101} +{"info/global_step": 102, "train_info/time_within_train_step": 27.829042673110962, "step": 102} +{"train_info/time_between_train_steps": 0.006063222885131836, "step": 102} +{"info/global_step": 103, "train_info/time_within_train_step": 27.774369716644287, "step": 103} +{"train_info/time_between_train_steps": 0.014254331588745117, "step": 103} +{"info/global_step": 104, "train_info/time_within_train_step": 27.800966501235962, "step": 104} +{"train_info/time_between_train_steps": 0.011023521423339844, "step": 104} +{"info/global_step": 105, "train_info/time_within_train_step": 27.823532342910767, "step": 105} +{"train_info/time_between_train_steps": 0.005832195281982422, "step": 105} +{"info/global_step": 106, "train_info/time_within_train_step": 27.709506034851074, "step": 106} +{"train_info/time_between_train_steps": 0.009172201156616211, "step": 106} +{"info/global_step": 107, "train_info/time_within_train_step": 27.837818145751953, "step": 107} +{"train_info/time_between_train_steps": 0.008645057678222656, "step": 107} +{"info/global_step": 108, "train_info/time_within_train_step": 27.812119483947754, "step": 108} +{"train_info/time_between_train_steps": 0.005430936813354492, "step": 108} +{"info/global_step": 109, "train_info/time_within_train_step": 27.850063800811768, "step": 109} +{"train_info/time_between_train_steps": 0.005965471267700195, "step": 109} +{"info/global_step": 110, "train_info/time_within_train_step": 27.841004133224487, "step": 110} +{"train_info/time_between_train_steps": 0.006174802780151367, "step": 110} +{"info/global_step": 111, "train_info/time_within_train_step": 27.817366123199463, "step": 111} +{"train_info/time_between_train_steps": 0.005971431732177734, "step": 111} +{"info/global_step": 112, "train_info/time_within_train_step": 27.828080892562866, "step": 112} +{"train_info/time_between_train_steps": 0.0062084197998046875, "step": 112} +{"info/global_step": 113, "train_info/time_within_train_step": 27.739346742630005, "step": 113} +{"train_info/time_between_train_steps": 0.0061872005462646484, "step": 113} +{"info/global_step": 114, "train_info/time_within_train_step": 27.855175971984863, "step": 114} +{"train_info/time_between_train_steps": 0.01163172721862793, "step": 114} +{"info/global_step": 115, "train_info/time_within_train_step": 27.80539059638977, "step": 115} +{"train_info/time_between_train_steps": 0.00648188591003418, "step": 115} +{"info/global_step": 116, "train_info/time_within_train_step": 27.785078763961792, "step": 116} +{"train_info/time_between_train_steps": 0.005668163299560547, "step": 116} +{"info/global_step": 117, "train_info/time_within_train_step": 27.85329270362854, "step": 117} +{"train_info/time_between_train_steps": 0.008055448532104492, "step": 117} +{"info/global_step": 118, "train_info/time_within_train_step": 27.815340042114258, "step": 118} +{"train_info/time_between_train_steps": 0.0060842037200927734, "step": 118} +{"info/global_step": 119, "train_info/time_within_train_step": 27.82577085494995, "step": 119} +{"train_info/time_between_train_steps": 0.010663509368896484, "step": 119} +{"info/global_step": 120, "train_info/time_within_train_step": 27.899940013885498, "step": 120} +{"train_info/time_between_train_steps": 0.007538318634033203, "step": 120} +{"train_info/time_between_train_steps": 13.380017280578613, "step": 120} +{"info/global_step": 121, "train_info/time_within_train_step": 27.75424575805664, "step": 121} +{"train_info/time_between_train_steps": 0.005679607391357422, "step": 121} +{"info/global_step": 122, "train_info/time_within_train_step": 27.94442892074585, "step": 122} +{"train_info/time_between_train_steps": 0.006423473358154297, "step": 122} +{"info/global_step": 123, "train_info/time_within_train_step": 27.85331392288208, "step": 123} +{"train_info/time_between_train_steps": 0.005940675735473633, "step": 123} +{"info/global_step": 124, "train_info/time_within_train_step": 27.988090753555298, "step": 124} +{"train_info/time_between_train_steps": 0.015156984329223633, "step": 124} +{"info/global_step": 125, "train_info/time_within_train_step": 27.654208183288574, "step": 125} +{"train_info/time_between_train_steps": 0.00587153434753418, "step": 125} +{"info/global_step": 126, "train_info/time_within_train_step": 28.012844562530518, "step": 126} +{"train_info/time_between_train_steps": 0.006111621856689453, "step": 126} +{"info/global_step": 127, "train_info/time_within_train_step": 27.78542709350586, "step": 127} +{"train_info/time_between_train_steps": 0.011827707290649414, "step": 127} +{"info/global_step": 128, "train_info/time_within_train_step": 39.110700845718384, "step": 128} +{"train_info/time_between_train_steps": 0.006665468215942383, "step": 128} +{"info/global_step": 129, "train_info/time_within_train_step": 53.88328981399536, "step": 129} +{"train_info/time_between_train_steps": 0.010950803756713867, "step": 129} +{"info/global_step": 130, "train_info/time_within_train_step": 54.45392632484436, "step": 130} +{"train_info/time_between_train_steps": 0.006281137466430664, "step": 130} +{"info/global_step": 131, "train_info/time_within_train_step": 53.857338666915894, "step": 131} +{"train_info/time_between_train_steps": 0.0075550079345703125, "step": 131} +{"info/global_step": 132, "train_info/time_within_train_step": 53.44796109199524, "step": 132} +{"train_info/time_between_train_steps": 0.005778074264526367, "step": 132} +{"info/global_step": 133, "train_info/time_within_train_step": 53.15878987312317, "step": 133} +{"train_info/time_between_train_steps": 0.005812644958496094, "step": 133} +{"info/global_step": 134, "train_info/time_within_train_step": 53.876200437545776, "step": 134} +{"train_info/time_between_train_steps": 0.0058367252349853516, "step": 134} +{"info/global_step": 135, "train_info/time_within_train_step": 53.25370264053345, "step": 135} +{"train_info/time_between_train_steps": 0.005783796310424805, "step": 135} +{"info/global_step": 136, "train_info/time_within_train_step": 52.92320442199707, "step": 136} +{"train_info/time_between_train_steps": 0.0060901641845703125, "step": 136} +{"info/global_step": 137, "train_info/time_within_train_step": 54.35932445526123, "step": 137} +{"train_info/time_between_train_steps": 0.006820201873779297, "step": 137} +{"info/global_step": 138, "train_info/time_within_train_step": 54.323583126068115, "step": 138} +{"train_info/time_between_train_steps": 0.005935192108154297, "step": 138} +{"info/global_step": 139, "train_info/time_within_train_step": 54.03407430648804, "step": 139} +{"train_info/time_between_train_steps": 0.006556034088134766, "step": 139} +{"info/global_step": 140, "train_info/time_within_train_step": 49.80035138130188, "step": 140} +{"train_info/time_between_train_steps": 0.0054836273193359375, "step": 140} +{"info/global_step": 141, "train_info/time_within_train_step": 27.68494415283203, "step": 141} +{"train_info/time_between_train_steps": 0.005647420883178711, "step": 141} +{"info/global_step": 142, "train_info/time_within_train_step": 27.861995458602905, "step": 142} +{"train_info/time_between_train_steps": 0.010114908218383789, "step": 142} +{"info/global_step": 143, "train_info/time_within_train_step": 27.84875178337097, "step": 143} +{"train_info/time_between_train_steps": 0.0060689449310302734, "step": 143} +{"info/global_step": 144, "train_info/time_within_train_step": 27.985862016677856, "step": 144} +{"train_info/time_between_train_steps": 0.016655445098876953, "step": 144} +{"info/global_step": 145, "train_info/time_within_train_step": 27.75160551071167, "step": 145} +{"train_info/time_between_train_steps": 0.010397911071777344, "step": 145} +{"info/global_step": 146, "train_info/time_within_train_step": 27.918733596801758, "step": 146} +{"train_info/time_between_train_steps": 0.010675191879272461, "step": 146} +{"info/global_step": 147, "train_info/time_within_train_step": 28.027594804763794, "step": 147} +{"train_info/time_between_train_steps": 0.010971784591674805, "step": 147} +{"info/global_step": 148, "train_info/time_within_train_step": 27.906408548355103, "step": 148} +{"train_info/time_between_train_steps": 0.016373634338378906, "step": 148} +{"info/global_step": 149, "train_info/time_within_train_step": 27.791343212127686, "step": 149} +{"train_info/time_between_train_steps": 0.010632514953613281, "step": 149} +{"info/global_step": 150, "train_info/time_within_train_step": 27.854562282562256, "step": 150} +{"train_info": {"train_info/memory_allocated": 2082.109375, "train_info/memory_max_allocated": 21965.9951171875, "train_info/memory_reserved": 24598.0, "train_info/memory_max_reserved": 24598.0, "_timestamp": 1733961535, "_runtime": 4920}, "step": 150} +{"logs": {"train/loss": 5.1965, "train/learning_rate": 0.0005833333333333333, "train/epoch": 4.03, "_timestamp": 1733961535, "_runtime": 4920}, "step": 150} +{"train_info/time_between_train_steps": 0.00867009162902832, "step": 150} +{"train_info/time_between_train_steps": 13.5707106590271, "step": 150} +{"info/global_step": 151, "train_info/time_within_train_step": 27.812833309173584, "step": 151} +{"train_info/time_between_train_steps": 0.00642704963684082, "step": 151} +{"info/global_step": 152, "train_info/time_within_train_step": 28.08791160583496, "step": 152} +{"train_info/time_between_train_steps": 0.01153874397277832, "step": 152} +{"info/global_step": 153, "train_info/time_within_train_step": 27.791382789611816, "step": 153} +{"train_info/time_between_train_steps": 0.006551980972290039, "step": 153} +{"info/global_step": 154, "train_info/time_within_train_step": 28.016944408416748, "step": 154} +{"train_info/time_between_train_steps": 0.015728473663330078, "step": 154} +{"info/global_step": 155, "train_info/time_within_train_step": 27.924715995788574, "step": 155} +{"train_info/time_between_train_steps": 0.00705718994140625, "step": 155} +{"info/global_step": 156, "train_info/time_within_train_step": 28.094449043273926, "step": 156} +{"train_info/time_between_train_steps": 0.005786418914794922, "step": 156} +{"info/global_step": 157, "train_info/time_within_train_step": 27.788631439208984, "step": 157} +{"train_info/time_between_train_steps": 0.011610746383666992, "step": 157} +{"info/global_step": 158, "train_info/time_within_train_step": 27.897611141204834, "step": 158} +{"train_info/time_between_train_steps": 0.005773782730102539, "step": 158} +{"info/global_step": 159, "train_info/time_within_train_step": 27.688583374023438, "step": 159} +{"train_info/time_between_train_steps": 0.005872488021850586, "step": 159} +{"info/global_step": 160, "train_info/time_within_train_step": 27.810391664505005, "step": 160} +{"train_info/time_between_train_steps": 0.010644674301147461, "step": 160} +{"info/global_step": 161, "train_info/time_within_train_step": 27.779433488845825, "step": 161} +{"train_info/time_between_train_steps": 0.005665779113769531, "step": 161} +{"info/global_step": 162, "train_info/time_within_train_step": 27.885613441467285, "step": 162} +{"train_info/time_between_train_steps": 0.005633115768432617, "step": 162} +{"info/global_step": 163, "train_info/time_within_train_step": 27.699871301651, "step": 163} +{"train_info/time_between_train_steps": 0.005846977233886719, "step": 163} +{"info/global_step": 164, "train_info/time_within_train_step": 27.779162406921387, "step": 164} +{"train_info/time_between_train_steps": 0.007500648498535156, "step": 164} +{"info/global_step": 165, "train_info/time_within_train_step": 27.828368186950684, "step": 165} +{"train_info/time_between_train_steps": 0.005623579025268555, "step": 165} +{"info/global_step": 166, "train_info/time_within_train_step": 27.67505121231079, "step": 166} +{"train_info/time_between_train_steps": 0.01784825325012207, "step": 166} +{"info/global_step": 167, "train_info/time_within_train_step": 28.001372575759888, "step": 167} +{"train_info/time_between_train_steps": 0.006084918975830078, "step": 167} +{"info/global_step": 168, "train_info/time_within_train_step": 27.721922159194946, "step": 168} +{"train_info/time_between_train_steps": 0.0059549808502197266, "step": 168} +{"info/global_step": 169, "train_info/time_within_train_step": 27.781718730926514, "step": 169} +{"train_info/time_between_train_steps": 0.016095638275146484, "step": 169} +{"info/global_step": 170, "train_info/time_within_train_step": 46.85828447341919, "step": 170} +{"train_info/time_between_train_steps": 0.0055904388427734375, "step": 170} +{"info/global_step": 171, "train_info/time_within_train_step": 54.246044874191284, "step": 171} +{"train_info/time_between_train_steps": 0.006341457366943359, "step": 171} +{"info/global_step": 172, "train_info/time_within_train_step": 54.21403980255127, "step": 172} +{"train_info/time_between_train_steps": 0.016600847244262695, "step": 172} +{"info/global_step": 173, "train_info/time_within_train_step": 54.089211225509644, "step": 173} +{"train_info/time_between_train_steps": 0.006027936935424805, "step": 173} +{"info/global_step": 174, "train_info/time_within_train_step": 53.79531764984131, "step": 174} +{"train_info/time_between_train_steps": 0.006109952926635742, "step": 174} +{"info/global_step": 175, "train_info/time_within_train_step": 54.12112832069397, "step": 175} +{"train_info/time_between_train_steps": 0.006002187728881836, "step": 175} +{"info/global_step": 176, "train_info/time_within_train_step": 53.97997260093689, "step": 176} +{"train_info/time_between_train_steps": 0.009214401245117188, "step": 176} +{"info/global_step": 177, "train_info/time_within_train_step": 53.5904324054718, "step": 177} +{"train_info/time_between_train_steps": 0.01183772087097168, "step": 177} +{"info/global_step": 178, "train_info/time_within_train_step": 54.29189133644104, "step": 178} +{"train_info/time_between_train_steps": 0.006339073181152344, "step": 178} +{"info/global_step": 179, "train_info/time_within_train_step": 54.562190532684326, "step": 179} +{"train_info/time_between_train_steps": 0.016388416290283203, "step": 179} +{"info/global_step": 180, "train_info/time_within_train_step": 53.397642612457275, "step": 180} +{"train_info/time_between_train_steps": 0.00659632682800293, "step": 180} +{"train_info/time_between_train_steps": 22.500911235809326, "step": 180} +{"info/global_step": 181, "train_info/time_within_train_step": 52.34414982795715, "step": 181} +{"train_info/time_between_train_steps": 0.015932798385620117, "step": 181} +{"info/global_step": 182, "train_info/time_within_train_step": 27.99179744720459, "step": 182} +{"train_info/time_between_train_steps": 0.0062253475189208984, "step": 182} +{"info/global_step": 183, "train_info/time_within_train_step": 27.759420156478882, "step": 183} +{"train_info/time_between_train_steps": 0.011705636978149414, "step": 183} +{"info/global_step": 184, "train_info/time_within_train_step": 27.998771905899048, "step": 184} +{"train_info/time_between_train_steps": 0.005980014801025391, "step": 184} +{"info/global_step": 185, "train_info/time_within_train_step": 27.7993106842041, "step": 185} +{"train_info/time_between_train_steps": 0.008838176727294922, "step": 185} +{"info/global_step": 186, "train_info/time_within_train_step": 28.20785403251648, "step": 186} +{"train_info/time_between_train_steps": 0.011545181274414062, "step": 186} +{"info/global_step": 187, "train_info/time_within_train_step": 27.835708379745483, "step": 187} +{"train_info/time_between_train_steps": 0.016390323638916016, "step": 187} +{"info/global_step": 188, "train_info/time_within_train_step": 27.84834337234497, "step": 188} +{"train_info/time_between_train_steps": 0.005542278289794922, "step": 188} +{"info/global_step": 189, "train_info/time_within_train_step": 27.815850496292114, "step": 189} +{"train_info/time_between_train_steps": 0.0063283443450927734, "step": 189} +{"info/global_step": 190, "train_info/time_within_train_step": 27.99030113220215, "step": 190} +{"train_info/time_between_train_steps": 0.012018442153930664, "step": 190} +{"info/global_step": 191, "train_info/time_within_train_step": 27.97746467590332, "step": 191} +{"train_info/time_between_train_steps": 0.005896806716918945, "step": 191} +{"info/global_step": 192, "train_info/time_within_train_step": 27.73121166229248, "step": 192} +{"train_info/time_between_train_steps": 0.0058901309967041016, "step": 192} +{"info/global_step": 193, "train_info/time_within_train_step": 27.655580520629883, "step": 193} +{"train_info/time_between_train_steps": 0.005944252014160156, "step": 193} +{"info/global_step": 194, "train_info/time_within_train_step": 27.6849946975708, "step": 194} +{"train_info/time_between_train_steps": 0.005884885787963867, "step": 194} +{"info/global_step": 195, "train_info/time_within_train_step": 27.844414949417114, "step": 195} +{"train_info/time_between_train_steps": 0.009695291519165039, "step": 195} +{"info/global_step": 196, "train_info/time_within_train_step": 38.303139448165894, "step": 196} +{"train_info/time_between_train_steps": 0.00562286376953125, "step": 196} +{"info/global_step": 197, "train_info/time_within_train_step": 54.316733598709106, "step": 197} +{"train_info/time_between_train_steps": 0.010778665542602539, "step": 197} +{"info/global_step": 198, "train_info/time_within_train_step": 54.487852811813354, "step": 198} +{"train_info/time_between_train_steps": 0.005962371826171875, "step": 198} +{"info/global_step": 199, "train_info/time_within_train_step": 54.318706035614014, "step": 199} +{"train_info/time_between_train_steps": 0.0064697265625, "step": 199} +{"info/global_step": 200, "train_info/time_within_train_step": 54.31040382385254, "step": 200} +{"train_info": {"train_info/memory_allocated": 2082.109375, "train_info/memory_max_allocated": 21965.9951171875, "train_info/memory_reserved": 24598.0, "train_info/memory_max_reserved": 24598.0, "_timestamp": 1733963386, "_runtime": 6771}, "step": 200} +{"logs": {"train/loss": 4.8925, "train/learning_rate": 0.0005555555555555556, "train/epoch": 6.02, "_timestamp": 1733963386, "_runtime": 6771}, "step": 200} +{"train_info/time_between_train_steps": 3.1863906383514404, "step": 200} +{"info/global_step": 201, "train_info/time_within_train_step": 54.83799505233765, "step": 201} +{"train_info/time_between_train_steps": 0.009647607803344727, "step": 201} +{"info/global_step": 202, "train_info/time_within_train_step": 54.12679052352905, "step": 202} +{"train_info/time_between_train_steps": 0.010491132736206055, "step": 202} +{"info/global_step": 203, "train_info/time_within_train_step": 54.47493863105774, "step": 203} +{"train_info/time_between_train_steps": 0.005967617034912109, "step": 203} +{"info/global_step": 204, "train_info/time_within_train_step": 54.41353464126587, "step": 204} +{"train_info/time_between_train_steps": 0.00905752182006836, "step": 204} +{"info/global_step": 205, "train_info/time_within_train_step": 54.29864978790283, "step": 205} +{"train_info/time_between_train_steps": 0.006131410598754883, "step": 205} +{"info/global_step": 206, "train_info/time_within_train_step": 54.47253084182739, "step": 206} +{"train_info/time_between_train_steps": 0.014448165893554688, "step": 206} +{"info/global_step": 207, "train_info/time_within_train_step": 54.49239897727966, "step": 207} +{"train_info/time_between_train_steps": 0.010034322738647461, "step": 207} +{"info/global_step": 208, "train_info/time_within_train_step": 40.44391369819641, "step": 208} +{"train_info/time_between_train_steps": 0.006513833999633789, "step": 208} +{"info/global_step": 209, "train_info/time_within_train_step": 27.7657949924469, "step": 209} +{"train_info/time_between_train_steps": 0.006357669830322266, "step": 209} +{"info/global_step": 210, "train_info/time_within_train_step": 27.73036217689514, "step": 210} +{"train_info/time_between_train_steps": 0.012407541275024414, "step": 210} +{"train_info/time_between_train_steps": 13.243790626525879, "step": 210} +{"info/global_step": 211, "train_info/time_within_train_step": 27.62899684906006, "step": 211} +{"train_info/time_between_train_steps": 0.005076885223388672, "step": 211} +{"info/global_step": 212, "train_info/time_within_train_step": 27.746211767196655, "step": 212} +{"train_info/time_between_train_steps": 0.005815744400024414, "step": 212} +{"info/global_step": 213, "train_info/time_within_train_step": 27.757548093795776, "step": 213} +{"train_info/time_between_train_steps": 0.0060749053955078125, "step": 213} +{"info/global_step": 214, "train_info/time_within_train_step": 27.80703854560852, "step": 214} +{"train_info/time_between_train_steps": 0.011651992797851562, "step": 214} +{"info/global_step": 215, "train_info/time_within_train_step": 27.829574823379517, "step": 215} +{"train_info/time_between_train_steps": 0.005515575408935547, "step": 215} +{"info/global_step": 216, "train_info/time_within_train_step": 27.89458703994751, "step": 216} +{"train_info/time_between_train_steps": 0.0058786869049072266, "step": 216} +{"info/global_step": 217, "train_info/time_within_train_step": 27.811165809631348, "step": 217} +{"train_info/time_between_train_steps": 0.005663871765136719, "step": 217} +{"info/global_step": 218, "train_info/time_within_train_step": 27.851775884628296, "step": 218} +{"train_info/time_between_train_steps": 0.005741596221923828, "step": 218} +{"info/global_step": 219, "train_info/time_within_train_step": 27.696925163269043, "step": 219} +{"train_info/time_between_train_steps": 0.006080150604248047, "step": 219} +{"info/global_step": 220, "train_info/time_within_train_step": 27.734262466430664, "step": 220} +{"train_info/time_between_train_steps": 0.0057446956634521484, "step": 220} +{"info/global_step": 221, "train_info/time_within_train_step": 27.829602241516113, "step": 221} +{"train_info/time_between_train_steps": 0.00521087646484375, "step": 221} +{"info/global_step": 222, "train_info/time_within_train_step": 27.778075218200684, "step": 222} +{"train_info/time_between_train_steps": 0.005871772766113281, "step": 222} +{"info/global_step": 223, "train_info/time_within_train_step": 27.713732481002808, "step": 223} +{"train_info/time_between_train_steps": 0.010820388793945312, "step": 223} +{"info/global_step": 224, "train_info/time_within_train_step": 27.755195140838623, "step": 224} +{"train_info/time_between_train_steps": 0.005722761154174805, "step": 224} +{"info/global_step": 225, "train_info/time_within_train_step": 27.777117252349854, "step": 225} +{"train_info/time_between_train_steps": 0.005641460418701172, "step": 225} +{"info/global_step": 226, "train_info/time_within_train_step": 27.73544931411743, "step": 226} +{"train_info/time_between_train_steps": 0.0053942203521728516, "step": 226} +{"info/global_step": 227, "train_info/time_within_train_step": 27.74194359779358, "step": 227} +{"train_info/time_between_train_steps": 0.007765054702758789, "step": 227} +{"info/global_step": 228, "train_info/time_within_train_step": 27.60815668106079, "step": 228} +{"train_info/time_between_train_steps": 0.005348920822143555, "step": 228} +{"info/global_step": 229, "train_info/time_within_train_step": 27.687494039535522, "step": 229} +{"train_info/time_between_train_steps": 0.006696224212646484, "step": 229} +{"info/global_step": 230, "train_info/time_within_train_step": 27.69114923477173, "step": 230} +{"train_info/time_between_train_steps": 0.005860328674316406, "step": 230} +{"info/global_step": 231, "train_info/time_within_train_step": 27.708129405975342, "step": 231} +{"train_info/time_between_train_steps": 0.016385316848754883, "step": 231} +{"info/global_step": 232, "train_info/time_within_train_step": 27.822168588638306, "step": 232} +{"train_info/time_between_train_steps": 0.0054209232330322266, "step": 232} +{"info/global_step": 233, "train_info/time_within_train_step": 27.71164870262146, "step": 233} +{"train_info/time_between_train_steps": 0.006044149398803711, "step": 233} +{"info/global_step": 234, "train_info/time_within_train_step": 27.80525803565979, "step": 234} +{"train_info/time_between_train_steps": 0.015587091445922852, "step": 234} +{"info/global_step": 235, "train_info/time_within_train_step": 27.73707151412964, "step": 235} +{"train_info/time_between_train_steps": 0.005854606628417969, "step": 235} +{"info/global_step": 236, "train_info/time_within_train_step": 27.657917976379395, "step": 236} +{"train_info/time_between_train_steps": 0.005720615386962891, "step": 236} +{"info/global_step": 237, "train_info/time_within_train_step": 27.716636180877686, "step": 237} +{"train_info/time_between_train_steps": 0.005875587463378906, "step": 237} +{"info/global_step": 238, "train_info/time_within_train_step": 27.714812517166138, "step": 238} +{"train_info/time_between_train_steps": 0.0060765743255615234, "step": 238} +{"info/global_step": 239, "train_info/time_within_train_step": 27.69533348083496, "step": 239} +{"train_info/time_between_train_steps": 0.00732111930847168, "step": 239} +{"info/global_step": 240, "train_info/time_within_train_step": 27.73845887184143, "step": 240} +{"train_info/time_between_train_steps": 0.00683283805847168, "step": 240} +{"train_info/time_between_train_steps": 13.52467942237854, "step": 240} +{"info/global_step": 241, "train_info/time_within_train_step": 27.738077402114868, "step": 241} +{"train_info/time_between_train_steps": 0.006529569625854492, "step": 241} +{"info/global_step": 242, "train_info/time_within_train_step": 28.010706424713135, "step": 242} +{"train_info/time_between_train_steps": 0.006075382232666016, "step": 242} +{"info/global_step": 243, "train_info/time_within_train_step": 27.74017596244812, "step": 243} +{"train_info/time_between_train_steps": 0.005839824676513672, "step": 243} +{"info/global_step": 244, "train_info/time_within_train_step": 28.02750563621521, "step": 244} +{"train_info/time_between_train_steps": 0.006679534912109375, "step": 244} +{"info/global_step": 245, "train_info/time_within_train_step": 27.73571515083313, "step": 245} +{"train_info/time_between_train_steps": 0.006352901458740234, "step": 245} +{"info/global_step": 246, "train_info/time_within_train_step": 27.933237552642822, "step": 246} +{"train_info/time_between_train_steps": 0.010570764541625977, "step": 246} +{"info/global_step": 247, "train_info/time_within_train_step": 27.76559591293335, "step": 247} +{"train_info/time_between_train_steps": 0.011055707931518555, "step": 247} +{"info/global_step": 248, "train_info/time_within_train_step": 27.901782274246216, "step": 248} +{"train_info/time_between_train_steps": 0.005847930908203125, "step": 248} +{"info/global_step": 249, "train_info/time_within_train_step": 27.804653644561768, "step": 249} +{"train_info/time_between_train_steps": 0.011582136154174805, "step": 249} +{"info/global_step": 250, "train_info/time_within_train_step": 27.915799617767334, "step": 250} +{"train_info": {"train_info/memory_allocated": 2082.109375, "train_info/memory_max_allocated": 21965.9951171875, "train_info/memory_reserved": 24598.0, "train_info/memory_max_reserved": 24598.0, "_timestamp": 1733965004, "_runtime": 8389}, "step": 250} +{"logs": {"train/loss": 4.7182, "train/learning_rate": 0.0005277777777777777, "train/epoch": 8.01, "_timestamp": 1733965004, "_runtime": 8389}, "step": 250} +{"train_info/time_between_train_steps": 0.00833892822265625, "step": 250} +{"info/global_step": 251, "train_info/time_within_train_step": 27.72269105911255, "step": 251} +{"train_info/time_between_train_steps": 0.005638599395751953, "step": 251} +{"info/global_step": 252, "train_info/time_within_train_step": 27.64944815635681, "step": 252} +{"train_info/time_between_train_steps": 0.005771160125732422, "step": 252} +{"info/global_step": 253, "train_info/time_within_train_step": 27.619992971420288, "step": 253} +{"train_info/time_between_train_steps": 0.005660057067871094, "step": 253} +{"info/global_step": 254, "train_info/time_within_train_step": 44.69500946998596, "step": 254} +{"train_info/time_between_train_steps": 0.008535623550415039, "step": 254} +{"info/global_step": 255, "train_info/time_within_train_step": 54.30206847190857, "step": 255} +{"train_info/time_between_train_steps": 0.015364885330200195, "step": 255} +{"info/global_step": 256, "train_info/time_within_train_step": 54.20555233955383, "step": 256} +{"train_info/time_between_train_steps": 0.00590205192565918, "step": 256} +{"info/global_step": 257, "train_info/time_within_train_step": 54.21761918067932, "step": 257} +{"train_info/time_between_train_steps": 0.005814790725708008, "step": 257} +{"info/global_step": 258, "train_info/time_within_train_step": 37.673126220703125, "step": 258} +{"train_info/time_between_train_steps": 0.005945920944213867, "step": 258} +{"info/global_step": 259, "train_info/time_within_train_step": 27.638524532318115, "step": 259} +{"train_info/time_between_train_steps": 0.015553951263427734, "step": 259} +{"info/global_step": 260, "train_info/time_within_train_step": 27.833617687225342, "step": 260} +{"train_info/time_between_train_steps": 0.014345645904541016, "step": 260} +{"info/global_step": 261, "train_info/time_within_train_step": 27.817631721496582, "step": 261} +{"train_info/time_between_train_steps": 0.006115436553955078, "step": 261} +{"info/global_step": 262, "train_info/time_within_train_step": 27.6916606426239, "step": 262} +{"train_info/time_between_train_steps": 0.005962371826171875, "step": 262} +{"info/global_step": 263, "train_info/time_within_train_step": 27.72902250289917, "step": 263} +{"train_info/time_between_train_steps": 0.00589442253112793, "step": 263} +{"info/global_step": 264, "train_info/time_within_train_step": 27.74381685256958, "step": 264} +{"train_info/time_between_train_steps": 0.010672807693481445, "step": 264} +{"info/global_step": 265, "train_info/time_within_train_step": 27.79442548751831, "step": 265} +{"train_info/time_between_train_steps": 0.010669469833374023, "step": 265} +{"info/global_step": 266, "train_info/time_within_train_step": 27.76536226272583, "step": 266} +{"train_info/time_between_train_steps": 0.005953550338745117, "step": 266} +{"info/global_step": 267, "train_info/time_within_train_step": 27.73024010658264, "step": 267} +{"train_info/time_between_train_steps": 0.011027812957763672, "step": 267} +{"info/global_step": 268, "train_info/time_within_train_step": 27.69843363761902, "step": 268} +{"train_info/time_between_train_steps": 0.006147146224975586, "step": 268} +{"info/global_step": 269, "train_info/time_within_train_step": 27.791983127593994, "step": 269} +{"train_info/time_between_train_steps": 0.006003379821777344, "step": 269} +{"info/global_step": 270, "train_info/time_within_train_step": 27.962215662002563, "step": 270} +{"train_info/time_between_train_steps": 0.017444133758544922, "step": 270} +{"train_info/time_between_train_steps": 14.001527070999146, "step": 270} +{"info/global_step": 271, "train_info/time_within_train_step": 27.764512062072754, "step": 271} +{"train_info/time_between_train_steps": 0.005460262298583984, "step": 271} +{"info/global_step": 272, "train_info/time_within_train_step": 27.754381895065308, "step": 272} +{"train_info/time_between_train_steps": 0.016681194305419922, "step": 272} +{"info/global_step": 273, "train_info/time_within_train_step": 27.68398928642273, "step": 273} +{"train_info/time_between_train_steps": 0.005322456359863281, "step": 273} +{"info/global_step": 274, "train_info/time_within_train_step": 27.809432983398438, "step": 274} +{"train_info/time_between_train_steps": 0.005563974380493164, "step": 274} +{"info/global_step": 275, "train_info/time_within_train_step": 27.724205017089844, "step": 275} +{"train_info/time_between_train_steps": 0.01119375228881836, "step": 275} +{"info/global_step": 276, "train_info/time_within_train_step": 27.93588900566101, "step": 276} +{"train_info/time_between_train_steps": 0.005966663360595703, "step": 276} +{"info/global_step": 277, "train_info/time_within_train_step": 27.8436119556427, "step": 277} +{"train_info/time_between_train_steps": 0.01618671417236328, "step": 277} +{"info/global_step": 278, "train_info/time_within_train_step": 27.943702220916748, "step": 278} +{"train_info/time_between_train_steps": 0.006134510040283203, "step": 278} +{"info/global_step": 279, "train_info/time_within_train_step": 27.88318395614624, "step": 279} +{"train_info/time_between_train_steps": 0.010977983474731445, "step": 279} +{"info/global_step": 280, "train_info/time_within_train_step": 27.878722667694092, "step": 280} +{"train_info/time_between_train_steps": 0.006528139114379883, "step": 280} +{"info/global_step": 281, "train_info/time_within_train_step": 27.78325343132019, "step": 281} +{"train_info/time_between_train_steps": 0.00615692138671875, "step": 281} +{"info/global_step": 282, "train_info/time_within_train_step": 27.73922300338745, "step": 282} +{"train_info/time_between_train_steps": 0.0060002803802490234, "step": 282} +{"info/global_step": 283, "train_info/time_within_train_step": 27.829620838165283, "step": 283} +{"train_info/time_between_train_steps": 0.010543346405029297, "step": 283} +{"info/global_step": 284, "train_info/time_within_train_step": 41.94568967819214, "step": 284} +{"train_info/time_between_train_steps": 0.010660171508789062, "step": 284} +{"info/global_step": 285, "train_info/time_within_train_step": 54.33507704734802, "step": 285} +{"train_info/time_between_train_steps": 0.006143093109130859, "step": 285} +{"info/global_step": 286, "train_info/time_within_train_step": 54.36223316192627, "step": 286} +{"train_info/time_between_train_steps": 0.006963968276977539, "step": 286} +{"info/global_step": 287, "train_info/time_within_train_step": 54.031171560287476, "step": 287} +{"train_info/time_between_train_steps": 0.00634002685546875, "step": 287} +{"info/global_step": 288, "train_info/time_within_train_step": 39.930402994155884, "step": 288} +{"train_info/time_between_train_steps": 0.011588811874389648, "step": 288} +{"info/global_step": 289, "train_info/time_within_train_step": 27.76127290725708, "step": 289} +{"train_info/time_between_train_steps": 0.010579109191894531, "step": 289} +{"info/global_step": 290, "train_info/time_within_train_step": 27.788963556289673, "step": 290} +{"train_info/time_between_train_steps": 0.00647735595703125, "step": 290} +{"info/global_step": 291, "train_info/time_within_train_step": 27.66207504272461, "step": 291} +{"train_info/time_between_train_steps": 0.0061321258544921875, "step": 291} +{"info/global_step": 292, "train_info/time_within_train_step": 27.764567136764526, "step": 292} +{"train_info/time_between_train_steps": 0.006444454193115234, "step": 292} +{"info/global_step": 293, "train_info/time_within_train_step": 27.63410711288452, "step": 293} +{"train_info/time_between_train_steps": 0.006563425064086914, "step": 293} +{"info/global_step": 294, "train_info/time_within_train_step": 27.765902042388916, "step": 294} +{"train_info/time_between_train_steps": 0.006217002868652344, "step": 294} +{"info/global_step": 295, "train_info/time_within_train_step": 27.74680233001709, "step": 295} +{"train_info/time_between_train_steps": 0.015888690948486328, "step": 295} +{"info/global_step": 296, "train_info/time_within_train_step": 27.80127501487732, "step": 296} +{"train_info/time_between_train_steps": 0.005919218063354492, "step": 296} +{"info/global_step": 297, "train_info/time_within_train_step": 27.750747203826904, "step": 297} +{"train_info/time_between_train_steps": 0.006156444549560547, "step": 297} +{"info/global_step": 298, "train_info/time_within_train_step": 27.737479209899902, "step": 298} +{"train_info/time_between_train_steps": 0.011828422546386719, "step": 298} +{"info/global_step": 299, "train_info/time_within_train_step": 27.73746371269226, "step": 299} +{"train_info/time_between_train_steps": 0.00691533088684082, "step": 299} +{"info/global_step": 300, "train_info/time_within_train_step": 27.7461154460907, "step": 300} +{"train_info": {"train_info/memory_allocated": 2082.109375, "train_info/memory_max_allocated": 21965.9951171875, "train_info/memory_reserved": 24598.0, "train_info/memory_max_reserved": 24598.0, "_timestamp": 1733966619, "_runtime": 10004}, "step": 300} +{"logs": {"train/loss": 4.561, "train/learning_rate": 0.0005, "train/epoch": 9.03, "_timestamp": 1733966619, "_runtime": 10004}, "step": 300} +{"train_info/time_between_train_steps": 2.6562917232513428, "step": 300} +{"train_info/time_between_train_steps": 15.841637372970581, "step": 300} +{"info/global_step": 301, "train_info/time_within_train_step": 27.688621997833252, "step": 301} +{"train_info/time_between_train_steps": 0.011479616165161133, "step": 301} +{"info/global_step": 302, "train_info/time_within_train_step": 27.922460794448853, "step": 302} +{"train_info/time_between_train_steps": 0.0061876773834228516, "step": 302} +{"info/global_step": 303, "train_info/time_within_train_step": 27.776045322418213, "step": 303} +{"train_info/time_between_train_steps": 0.006347179412841797, "step": 303} +{"info/global_step": 304, "train_info/time_within_train_step": 27.852389812469482, "step": 304} +{"train_info/time_between_train_steps": 0.0059642791748046875, "step": 304} +{"info/global_step": 305, "train_info/time_within_train_step": 27.75718641281128, "step": 305} +{"train_info/time_between_train_steps": 0.011015892028808594, "step": 305} +{"info/global_step": 306, "train_info/time_within_train_step": 27.850164890289307, "step": 306} +{"train_info/time_between_train_steps": 0.008825540542602539, "step": 306} +{"info/global_step": 307, "train_info/time_within_train_step": 27.789681673049927, "step": 307} +{"train_info/time_between_train_steps": 0.007130146026611328, "step": 307} +{"info/global_step": 308, "train_info/time_within_train_step": 49.82365918159485, "step": 308} +{"train_info/time_between_train_steps": 0.0063092708587646484, "step": 308} +{"info/global_step": 309, "train_info/time_within_train_step": 54.95783448219299, "step": 309} +{"train_info/time_between_train_steps": 0.005985260009765625, "step": 309} +{"info/global_step": 310, "train_info/time_within_train_step": 54.50884509086609, "step": 310} +{"train_info/time_between_train_steps": 0.006071329116821289, "step": 310} +{"info/global_step": 311, "train_info/time_within_train_step": 54.376550912857056, "step": 311} +{"train_info/time_between_train_steps": 0.006373405456542969, "step": 311} +{"info/global_step": 312, "train_info/time_within_train_step": 34.87429141998291, "step": 312} +{"train_info/time_between_train_steps": 0.006195545196533203, "step": 312} +{"info/global_step": 313, "train_info/time_within_train_step": 54.030680894851685, "step": 313} +{"train_info/time_between_train_steps": 0.005946636199951172, "step": 313} +{"info/global_step": 314, "train_info/time_within_train_step": 54.45879817008972, "step": 314} +{"train_info/time_between_train_steps": 0.0058917999267578125, "step": 314} +{"info/global_step": 315, "train_info/time_within_train_step": 54.39293122291565, "step": 315} +{"train_info/time_between_train_steps": 0.00606226921081543, "step": 315} +{"info/global_step": 316, "train_info/time_within_train_step": 49.984798192977905, "step": 316} +{"train_info/time_between_train_steps": 0.006064414978027344, "step": 316} +{"info/global_step": 317, "train_info/time_within_train_step": 27.64174485206604, "step": 317} +{"train_info/time_between_train_steps": 0.00570225715637207, "step": 317} +{"info/global_step": 318, "train_info/time_within_train_step": 27.723054885864258, "step": 318} +{"train_info/time_between_train_steps": 0.011948108673095703, "step": 318} +{"info/global_step": 319, "train_info/time_within_train_step": 27.81644320487976, "step": 319} +{"train_info/time_between_train_steps": 0.006142377853393555, "step": 319} +{"info/global_step": 320, "train_info/time_within_train_step": 27.71909236907959, "step": 320} +{"train_info/time_between_train_steps": 0.005850315093994141, "step": 320} +{"info/global_step": 321, "train_info/time_within_train_step": 27.9124653339386, "step": 321} +{"train_info/time_between_train_steps": 0.009892463684082031, "step": 321} +{"info/global_step": 322, "train_info/time_within_train_step": 27.722506284713745, "step": 322} +{"train_info/time_between_train_steps": 0.005423069000244141, "step": 322} +{"info/global_step": 323, "train_info/time_within_train_step": 27.683324337005615, "step": 323} +{"train_info/time_between_train_steps": 0.006015777587890625, "step": 323} +{"info/global_step": 324, "train_info/time_within_train_step": 27.69211983680725, "step": 324} +{"train_info/time_between_train_steps": 0.005483388900756836, "step": 324} +{"info/global_step": 325, "train_info/time_within_train_step": 27.69292664527893, "step": 325} +{"train_info/time_between_train_steps": 0.00838613510131836, "step": 325} +{"info/global_step": 326, "train_info/time_within_train_step": 27.70467257499695, "step": 326} +{"train_info/time_between_train_steps": 0.00566411018371582, "step": 326} +{"info/global_step": 327, "train_info/time_within_train_step": 27.691073417663574, "step": 327} +{"train_info/time_between_train_steps": 0.0059926509857177734, "step": 327} +{"info/global_step": 328, "train_info/time_within_train_step": 27.79425311088562, "step": 328} +{"train_info/time_between_train_steps": 0.01595306396484375, "step": 328} +{"info/global_step": 329, "train_info/time_within_train_step": 27.86769723892212, "step": 329} +{"train_info/time_between_train_steps": 0.009947061538696289, "step": 329} +{"info/global_step": 330, "train_info/time_within_train_step": 27.925908088684082, "step": 330} +{"train_info/time_between_train_steps": 0.007344961166381836, "step": 330} +{"train_info/time_between_train_steps": 13.250865697860718, "step": 330} +{"info/global_step": 331, "train_info/time_within_train_step": 27.6057288646698, "step": 331} +{"train_info/time_between_train_steps": 0.005429267883300781, "step": 331} +{"info/global_step": 332, "train_info/time_within_train_step": 27.869133949279785, "step": 332} +{"train_info/time_between_train_steps": 0.010701894760131836, "step": 332} +{"info/global_step": 333, "train_info/time_within_train_step": 27.85802936553955, "step": 333} +{"train_info/time_between_train_steps": 0.014362573623657227, "step": 333} +{"info/global_step": 334, "train_info/time_within_train_step": 28.091150522232056, "step": 334} +{"train_info/time_between_train_steps": 0.005916595458984375, "step": 334} +{"info/global_step": 335, "train_info/time_within_train_step": 27.71184539794922, "step": 335} +{"train_info/time_between_train_steps": 0.006114482879638672, "step": 335} +{"info/global_step": 336, "train_info/time_within_train_step": 27.947248458862305, "step": 336} +{"train_info/time_between_train_steps": 0.0058095455169677734, "step": 336} +{"info/global_step": 337, "train_info/time_within_train_step": 27.838329315185547, "step": 337} +{"train_info/time_between_train_steps": 0.005889892578125, "step": 337} +{"info/global_step": 338, "train_info/time_within_train_step": 27.942563772201538, "step": 338} +{"train_info/time_between_train_steps": 0.010648012161254883, "step": 338} +{"info/global_step": 339, "train_info/time_within_train_step": 27.860271692276, "step": 339} +{"train_info/time_between_train_steps": 0.011496543884277344, "step": 339} +{"info/global_step": 340, "train_info/time_within_train_step": 28.066102504730225, "step": 340} +{"train_info/time_between_train_steps": 0.010134696960449219, "step": 340} +{"info/global_step": 341, "train_info/time_within_train_step": 27.76882839202881, "step": 341} +{"train_info/time_between_train_steps": 0.005846977233886719, "step": 341} +{"info/global_step": 342, "train_info/time_within_train_step": 27.691083669662476, "step": 342} +{"train_info/time_between_train_steps": 0.015765905380249023, "step": 342} +{"info/global_step": 343, "train_info/time_within_train_step": 27.765966415405273, "step": 343} +{"train_info/time_between_train_steps": 0.00542449951171875, "step": 343} +{"info/global_step": 344, "train_info/time_within_train_step": 27.812068223953247, "step": 344} +{"train_info/time_between_train_steps": 0.006027936935424805, "step": 344} +{"info/global_step": 345, "train_info/time_within_train_step": 27.862281799316406, "step": 345} +{"train_info/time_between_train_steps": 0.01032400131225586, "step": 345} +{"info/global_step": 346, "train_info/time_within_train_step": 27.714285135269165, "step": 346} +{"train_info/time_between_train_steps": 0.005877017974853516, "step": 346} +{"info/global_step": 347, "train_info/time_within_train_step": 27.768972158432007, "step": 347} +{"train_info/time_between_train_steps": 0.0058667659759521484, "step": 347} +{"info/global_step": 348, "train_info/time_within_train_step": 27.71208119392395, "step": 348} +{"train_info/time_between_train_steps": 0.0060138702392578125, "step": 348} +{"info/global_step": 349, "train_info/time_within_train_step": 27.740867137908936, "step": 349} +{"train_info/time_between_train_steps": 0.006530046463012695, "step": 349} +{"info/global_step": 350, "train_info/time_within_train_step": 27.789271116256714, "step": 350} +{"train_info": {"train_info/memory_allocated": 2082.109375, "train_info/memory_max_allocated": 21965.9951171875, "train_info/memory_reserved": 24598.0, "train_info/memory_max_reserved": 24598.0, "_timestamp": 1733968250, "_runtime": 11635}, "step": 350} +{"logs": {"train/loss": 4.4282, "train/learning_rate": 0.00047222222222222224, "train/epoch": 11.02, "_timestamp": 1733968250, "_runtime": 11635}, "step": 350} +{"train_info/time_between_train_steps": 0.00809621810913086, "step": 350} +{"info/global_step": 351, "train_info/time_within_train_step": 27.928489446640015, "step": 351} +{"train_info/time_between_train_steps": 0.015790224075317383, "step": 351} +{"info/global_step": 352, "train_info/time_within_train_step": 27.6972599029541, "step": 352} +{"train_info/time_between_train_steps": 0.005921363830566406, "step": 352} +{"info/global_step": 353, "train_info/time_within_train_step": 45.822428941726685, "step": 353} +{"train_info/time_between_train_steps": 0.006464719772338867, "step": 353} +{"info/global_step": 354, "train_info/time_within_train_step": 54.60851287841797, "step": 354} +{"train_info/time_between_train_steps": 0.006296873092651367, "step": 354} +{"info/global_step": 355, "train_info/time_within_train_step": 54.31814360618591, "step": 355} +{"train_info/time_between_train_steps": 0.011215925216674805, "step": 355} +{"info/global_step": 356, "train_info/time_within_train_step": 54.35549449920654, "step": 356} +{"train_info/time_between_train_steps": 0.015149116516113281, "step": 356} +{"info/global_step": 357, "train_info/time_within_train_step": 35.80047392845154, "step": 357} +{"train_info/time_between_train_steps": 0.00566411018371582, "step": 357} +{"info/global_step": 358, "train_info/time_within_train_step": 54.34126877784729, "step": 358} +{"train_info/time_between_train_steps": 0.012398958206176758, "step": 358} +{"info/global_step": 359, "train_info/time_within_train_step": 54.428139209747314, "step": 359} +{"train_info/time_between_train_steps": 0.012626171112060547, "step": 359} +{"info/global_step": 360, "train_info/time_within_train_step": 54.404032945632935, "step": 360} +{"train_info/time_between_train_steps": 0.0069427490234375, "step": 360} +{"train_info/time_between_train_steps": 23.18848705291748, "step": 360} +{"info/global_step": 361, "train_info/time_within_train_step": 40.927688121795654, "step": 361} +{"train_info/time_between_train_steps": 0.0066792964935302734, "step": 361} +{"info/global_step": 362, "train_info/time_within_train_step": 28.077155590057373, "step": 362} +{"train_info/time_between_train_steps": 0.00603175163269043, "step": 362} +{"info/global_step": 363, "train_info/time_within_train_step": 27.700568675994873, "step": 363} +{"train_info/time_between_train_steps": 0.0062596797943115234, "step": 363} +{"info/global_step": 364, "train_info/time_within_train_step": 27.96953272819519, "step": 364} +{"train_info/time_between_train_steps": 0.005868673324584961, "step": 364} +{"info/global_step": 365, "train_info/time_within_train_step": 27.74070382118225, "step": 365} +{"train_info/time_between_train_steps": 0.006406068801879883, "step": 365} +{"info/global_step": 366, "train_info/time_within_train_step": 28.008312940597534, "step": 366} +{"train_info/time_between_train_steps": 0.006344795227050781, "step": 366} +{"info/global_step": 367, "train_info/time_within_train_step": 27.813109636306763, "step": 367} +{"train_info/time_between_train_steps": 0.010907173156738281, "step": 367} +{"info/global_step": 368, "train_info/time_within_train_step": 28.622294425964355, "step": 368} +{"train_info/time_between_train_steps": 0.006330013275146484, "step": 368} +{"info/global_step": 369, "train_info/time_within_train_step": 27.884724617004395, "step": 369} +{"train_info/time_between_train_steps": 0.008777141571044922, "step": 369} +{"info/global_step": 370, "train_info/time_within_train_step": 27.99242639541626, "step": 370} +{"train_info/time_between_train_steps": 0.0069620609283447266, "step": 370} +{"info/global_step": 371, "train_info/time_within_train_step": 27.888184309005737, "step": 371} +{"train_info/time_between_train_steps": 0.005877256393432617, "step": 371} +{"info/global_step": 372, "train_info/time_within_train_step": 27.667420387268066, "step": 372} +{"train_info/time_between_train_steps": 0.006439924240112305, "step": 372} +{"info/global_step": 373, "train_info/time_within_train_step": 27.804932594299316, "step": 373} +{"train_info/time_between_train_steps": 0.006670475006103516, "step": 373} +{"info/global_step": 374, "train_info/time_within_train_step": 27.8652184009552, "step": 374} +{"train_info/time_between_train_steps": 0.011247396469116211, "step": 374} +{"info/global_step": 375, "train_info/time_within_train_step": 27.78943395614624, "step": 375} +{"train_info/time_between_train_steps": 0.005663633346557617, "step": 375} +{"info/global_step": 376, "train_info/time_within_train_step": 27.652876138687134, "step": 376} +{"train_info/time_between_train_steps": 0.006306886672973633, "step": 376} +{"info/global_step": 377, "train_info/time_within_train_step": 27.828999757766724, "step": 377} +{"train_info/time_between_train_steps": 0.006449460983276367, "step": 377} +{"info/global_step": 378, "train_info/time_within_train_step": 27.794545650482178, "step": 378} +{"train_info/time_between_train_steps": 0.011348724365234375, "step": 378} +{"info/global_step": 379, "train_info/time_within_train_step": 27.759270191192627, "step": 379} +{"train_info/time_between_train_steps": 0.013381481170654297, "step": 379} +{"info/global_step": 380, "train_info/time_within_train_step": 27.75152897834778, "step": 380} +{"train_info/time_between_train_steps": 0.0061228275299072266, "step": 380} +{"info/global_step": 381, "train_info/time_within_train_step": 27.74927806854248, "step": 381} +{"train_info/time_between_train_steps": 0.0061414241790771484, "step": 381} +{"info/global_step": 382, "train_info/time_within_train_step": 27.845038175582886, "step": 382} +{"train_info/time_between_train_steps": 0.006204843521118164, "step": 382} +{"info/global_step": 383, "train_info/time_within_train_step": 27.674057722091675, "step": 383} +{"train_info/time_between_train_steps": 0.006104469299316406, "step": 383} +{"info/global_step": 384, "train_info/time_within_train_step": 27.715273141860962, "step": 384} +{"train_info/time_between_train_steps": 0.0060198307037353516, "step": 384} +{"info/global_step": 385, "train_info/time_within_train_step": 27.74674105644226, "step": 385} +{"train_info/time_between_train_steps": 0.006419658660888672, "step": 385} +{"info/global_step": 386, "train_info/time_within_train_step": 27.672905921936035, "step": 386} +{"train_info/time_between_train_steps": 0.006100654602050781, "step": 386} +{"info/global_step": 387, "train_info/time_within_train_step": 27.859463691711426, "step": 387} +{"train_info/time_between_train_steps": 0.01663351058959961, "step": 387} +{"info/global_step": 388, "train_info/time_within_train_step": 27.904030323028564, "step": 388} +{"train_info/time_between_train_steps": 0.0064160823822021484, "step": 388} +{"info/global_step": 389, "train_info/time_within_train_step": 27.821632146835327, "step": 389} +{"train_info/time_between_train_steps": 0.011303424835205078, "step": 389} +{"info/global_step": 390, "train_info/time_within_train_step": 27.861936569213867, "step": 390} +{"train_info/time_between_train_steps": 0.009689807891845703, "step": 390} +{"train_info/time_between_train_steps": 13.518119096755981, "step": 390} +{"info/global_step": 391, "train_info/time_within_train_step": 27.729304790496826, "step": 391} +{"train_info/time_between_train_steps": 0.006691455841064453, "step": 391} +{"info/global_step": 392, "train_info/time_within_train_step": 28.059534311294556, "step": 392} +{"train_info/time_between_train_steps": 0.006161212921142578, "step": 392} +{"info/global_step": 393, "train_info/time_within_train_step": 27.704055070877075, "step": 393} +{"train_info/time_between_train_steps": 0.006600379943847656, "step": 393} +{"info/global_step": 394, "train_info/time_within_train_step": 28.006839752197266, "step": 394} +{"train_info/time_between_train_steps": 0.010818958282470703, "step": 394} +{"info/global_step": 395, "train_info/time_within_train_step": 27.818893909454346, "step": 395} +{"train_info/time_between_train_steps": 0.010761260986328125, "step": 395} +{"info/global_step": 396, "train_info/time_within_train_step": 27.90057349205017, "step": 396} +{"train_info/time_between_train_steps": 0.005815982818603516, "step": 396} +{"info/global_step": 397, "train_info/time_within_train_step": 27.708080053329468, "step": 397} +{"train_info/time_between_train_steps": 0.006330728530883789, "step": 397} +{"info/global_step": 398, "train_info/time_within_train_step": 27.877116441726685, "step": 398} +{"train_info/time_between_train_steps": 0.005963325500488281, "step": 398} +{"info/global_step": 399, "train_info/time_within_train_step": 27.871591806411743, "step": 399} +{"train_info/time_between_train_steps": 0.00626063346862793, "step": 399} +{"info/global_step": 400, "train_info/time_within_train_step": 27.757174253463745, "step": 400} +{"train_info": {"train_info/memory_allocated": 2082.109375, "train_info/memory_max_allocated": 21965.9951171875, "train_info/memory_reserved": 24598.0, "train_info/memory_max_reserved": 24598.0, "_timestamp": 1733969878, "_runtime": 13263}, "step": 400} +{"logs": {"train/loss": 4.2476, "train/learning_rate": 0.00044444444444444436, "train/epoch": 13.01, "_timestamp": 1733969878, "_runtime": 13263}, "step": 400} +{"train_info/time_between_train_steps": 2.868028402328491, "step": 400} +{"info/global_step": 401, "train_info/time_within_train_step": 27.809846878051758, "step": 401} +{"train_info/time_between_train_steps": 0.005440950393676758, "step": 401} +{"info/global_step": 402, "train_info/time_within_train_step": 27.880712270736694, "step": 402} +{"train_info/time_between_train_steps": 0.010190725326538086, "step": 402} +{"info/global_step": 403, "train_info/time_within_train_step": 27.874064207077026, "step": 403} +{"train_info/time_between_train_steps": 0.005667686462402344, "step": 403} +{"info/global_step": 404, "train_info/time_within_train_step": 27.61025381088257, "step": 404} +{"train_info/time_between_train_steps": 0.005692243576049805, "step": 404} +{"info/global_step": 405, "train_info/time_within_train_step": 27.62332272529602, "step": 405} +{"train_info/time_between_train_steps": 0.0056569576263427734, "step": 405} +{"info/global_step": 406, "train_info/time_within_train_step": 27.77263045310974, "step": 406} +{"train_info/time_between_train_steps": 0.006319522857666016, "step": 406} +{"info/global_step": 407, "train_info/time_within_train_step": 27.759774208068848, "step": 407} +{"train_info/time_between_train_steps": 0.009692192077636719, "step": 407} +{"info/global_step": 408, "train_info/time_within_train_step": 27.818920373916626, "step": 408} +{"train_info/time_between_train_steps": 0.005978107452392578, "step": 408} +{"info/global_step": 409, "train_info/time_within_train_step": 27.701004028320312, "step": 409} +{"train_info/time_between_train_steps": 0.005799055099487305, "step": 409} +{"info/global_step": 410, "train_info/time_within_train_step": 27.72974705696106, "step": 410} +{"train_info/time_between_train_steps": 0.012514352798461914, "step": 410} +{"info/global_step": 411, "train_info/time_within_train_step": 27.706538438796997, "step": 411} +{"train_info/time_between_train_steps": 0.011153459548950195, "step": 411} +{"info/global_step": 412, "train_info/time_within_train_step": 27.70378565788269, "step": 412} +{"train_info/time_between_train_steps": 0.005995988845825195, "step": 412} +{"info/global_step": 413, "train_info/time_within_train_step": 27.713860988616943, "step": 413} +{"train_info/time_between_train_steps": 0.011234760284423828, "step": 413} +{"info/global_step": 414, "train_info/time_within_train_step": 27.79231309890747, "step": 414} +{"train_info/time_between_train_steps": 0.005655527114868164, "step": 414} +{"info/global_step": 415, "train_info/time_within_train_step": 27.72198486328125, "step": 415} +{"train_info/time_between_train_steps": 0.00568079948425293, "step": 415} +{"info/global_step": 416, "train_info/time_within_train_step": 27.745269775390625, "step": 416} +{"train_info/time_between_train_steps": 0.005858182907104492, "step": 416} +{"info/global_step": 417, "train_info/time_within_train_step": 27.668802499771118, "step": 417} +{"train_info/time_between_train_steps": 0.00610041618347168, "step": 417} +{"info/global_step": 418, "train_info/time_within_train_step": 27.878153800964355, "step": 418} +{"train_info/time_between_train_steps": 0.010271310806274414, "step": 418} +{"info/global_step": 419, "train_info/time_within_train_step": 27.794392585754395, "step": 419} +{"train_info/time_between_train_steps": 0.010573148727416992, "step": 419} +{"info/global_step": 420, "train_info/time_within_train_step": 27.84177541732788, "step": 420} +{"train_info/time_between_train_steps": 0.006578683853149414, "step": 420} +{"train_info/time_between_train_steps": 13.453232049942017, "step": 420} +{"info/global_step": 421, "train_info/time_within_train_step": 27.73345160484314, "step": 421} +{"train_info/time_between_train_steps": 0.005237102508544922, "step": 421} +{"info/global_step": 422, "train_info/time_within_train_step": 27.89042592048645, "step": 422} +{"train_info/time_between_train_steps": 0.016015052795410156, "step": 422} +{"info/global_step": 423, "train_info/time_within_train_step": 27.769320726394653, "step": 423} +{"train_info/time_between_train_steps": 0.005889892578125, "step": 423} +{"info/global_step": 424, "train_info/time_within_train_step": 27.830569982528687, "step": 424} +{"train_info/time_between_train_steps": 0.0059757232666015625, "step": 424} +{"info/global_step": 425, "train_info/time_within_train_step": 27.70796489715576, "step": 425} +{"train_info/time_between_train_steps": 0.010316133499145508, "step": 425} +{"info/global_step": 426, "train_info/time_within_train_step": 27.889817714691162, "step": 426} +{"train_info/time_between_train_steps": 0.00591731071472168, "step": 426} +{"info/global_step": 427, "train_info/time_within_train_step": 27.756574630737305, "step": 427} +{"train_info/time_between_train_steps": 0.015305757522583008, "step": 427} +{"info/global_step": 428, "train_info/time_within_train_step": 27.865479230880737, "step": 428} +{"train_info/time_between_train_steps": 0.005743503570556641, "step": 428} +{"info/global_step": 429, "train_info/time_within_train_step": 27.69153356552124, "step": 429} +{"train_info/time_between_train_steps": 0.00534367561340332, "step": 429} +{"info/global_step": 430, "train_info/time_within_train_step": 27.737135410308838, "step": 430} +{"train_info/time_between_train_steps": 0.0053195953369140625, "step": 430} +{"info/global_step": 431, "train_info/time_within_train_step": 27.784223556518555, "step": 431} +{"train_info/time_between_train_steps": 0.010534286499023438, "step": 431} +{"info/global_step": 432, "train_info/time_within_train_step": 27.768905639648438, "step": 432} +{"train_info/time_between_train_steps": 0.010363340377807617, "step": 432} +{"info/global_step": 433, "train_info/time_within_train_step": 27.785654067993164, "step": 433} +{"train_info/time_between_train_steps": 0.005596637725830078, "step": 433} +{"info/global_step": 434, "train_info/time_within_train_step": 27.734164476394653, "step": 434} +{"train_info/time_between_train_steps": 0.01097249984741211, "step": 434} +{"info/global_step": 435, "train_info/time_within_train_step": 27.760231733322144, "step": 435} +{"train_info/time_between_train_steps": 0.008650064468383789, "step": 435} +{"info/global_step": 436, "train_info/time_within_train_step": 27.730223417282104, "step": 436} +{"train_info/time_between_train_steps": 0.005744218826293945, "step": 436} +{"info/global_step": 437, "train_info/time_within_train_step": 27.628295183181763, "step": 437} +{"train_info/time_between_train_steps": 0.0056324005126953125, "step": 437} +{"info/global_step": 438, "train_info/time_within_train_step": 27.698010206222534, "step": 438} +{"train_info/time_between_train_steps": 0.005697488784790039, "step": 438} +{"info/global_step": 439, "train_info/time_within_train_step": 27.784691095352173, "step": 439} +{"train_info/time_between_train_steps": 0.005978584289550781, "step": 439} +{"info/global_step": 440, "train_info/time_within_train_step": 27.746862649917603, "step": 440} +{"train_info/time_between_train_steps": 0.011119365692138672, "step": 440} +{"info/global_step": 441, "train_info/time_within_train_step": 27.764392137527466, "step": 441} +{"train_info/time_between_train_steps": 0.014762639999389648, "step": 441} +{"info/global_step": 442, "train_info/time_within_train_step": 27.63633155822754, "step": 442} +{"train_info/time_between_train_steps": 0.012098073959350586, "step": 442} +{"info/global_step": 443, "train_info/time_within_train_step": 27.780639171600342, "step": 443} +{"train_info/time_between_train_steps": 0.009796619415283203, "step": 443} +{"info/global_step": 444, "train_info/time_within_train_step": 27.654852867126465, "step": 444} +{"train_info/time_between_train_steps": 0.006026744842529297, "step": 444} +{"info/global_step": 445, "train_info/time_within_train_step": 27.664255380630493, "step": 445} +{"train_info/time_between_train_steps": 0.005451202392578125, "step": 445} +{"info/global_step": 446, "train_info/time_within_train_step": 27.768407106399536, "step": 446} +{"train_info/time_between_train_steps": 0.005561351776123047, "step": 446} +{"info/global_step": 447, "train_info/time_within_train_step": 27.718058109283447, "step": 447} +{"train_info/time_between_train_steps": 0.006095409393310547, "step": 447} +{"info/global_step": 448, "train_info/time_within_train_step": 27.75456476211548, "step": 448} +{"train_info/time_between_train_steps": 0.006163358688354492, "step": 448} +{"info/global_step": 449, "train_info/time_within_train_step": 27.86053967475891, "step": 449} +{"train_info/time_between_train_steps": 0.006144046783447266, "step": 449} +{"info/global_step": 450, "train_info/time_within_train_step": 27.80449652671814, "step": 450} +{"train_info": {"train_info/memory_allocated": 2082.109375, "train_info/memory_max_allocated": 21965.9951171875, "train_info/memory_reserved": 24598.0, "train_info/memory_max_reserved": 24598.0, "_timestamp": 1733971282, "_runtime": 14667}, "step": 450} +{"logs": {"train/loss": 4.0259, "train/learning_rate": 0.00041666666666666664, "train/epoch": 14.03, "_timestamp": 1733971282, "_runtime": 14667}, "step": 450} +{"train_info/time_between_train_steps": 0.015682220458984375, "step": 450} +{"train_info/time_between_train_steps": 13.723492860794067, "step": 450} +{"info/global_step": 451, "train_info/time_within_train_step": 27.864367485046387, "step": 451} +{"train_info/time_between_train_steps": 0.00969076156616211, "step": 451} +{"info/global_step": 452, "train_info/time_within_train_step": 27.85411787033081, "step": 452} +{"train_info/time_between_train_steps": 0.015432119369506836, "step": 452} +{"info/global_step": 453, "train_info/time_within_train_step": 27.824738025665283, "step": 453} +{"train_info/time_between_train_steps": 0.005974531173706055, "step": 453} +{"info/global_step": 454, "train_info/time_within_train_step": 27.967385053634644, "step": 454} +{"train_info/time_between_train_steps": 0.005872488021850586, "step": 454} +{"info/global_step": 455, "train_info/time_within_train_step": 27.73613953590393, "step": 455} +{"train_info/time_between_train_steps": 0.005959987640380859, "step": 455} +{"info/global_step": 456, "train_info/time_within_train_step": 27.826356410980225, "step": 456} +{"train_info/time_between_train_steps": 0.005793571472167969, "step": 456} +{"info/global_step": 457, "train_info/time_within_train_step": 27.714534044265747, "step": 457} +{"train_info/time_between_train_steps": 0.0054836273193359375, "step": 457} +{"info/global_step": 458, "train_info/time_within_train_step": 28.09937834739685, "step": 458} +{"train_info/time_between_train_steps": 0.006148815155029297, "step": 458} +{"info/global_step": 459, "train_info/time_within_train_step": 27.899020433425903, "step": 459} +{"train_info/time_between_train_steps": 0.01045536994934082, "step": 459} +{"info/global_step": 460, "train_info/time_within_train_step": 27.849149703979492, "step": 460} +{"train_info/time_between_train_steps": 0.005298614501953125, "step": 460} +{"info/global_step": 461, "train_info/time_within_train_step": 27.76468849182129, "step": 461} +{"train_info/time_between_train_steps": 0.0055735111236572266, "step": 461} +{"info/global_step": 462, "train_info/time_within_train_step": 27.77669382095337, "step": 462} +{"train_info/time_between_train_steps": 0.00982809066772461, "step": 462} +{"info/global_step": 463, "train_info/time_within_train_step": 27.703883171081543, "step": 463} +{"train_info/time_between_train_steps": 0.01000070571899414, "step": 463} +{"info/global_step": 464, "train_info/time_within_train_step": 27.739341497421265, "step": 464} +{"train_info/time_between_train_steps": 0.005412578582763672, "step": 464} +{"info/global_step": 465, "train_info/time_within_train_step": 27.643715858459473, "step": 465} +{"train_info/time_between_train_steps": 0.005573749542236328, "step": 465} +{"info/global_step": 466, "train_info/time_within_train_step": 27.803536653518677, "step": 466} +{"train_info/time_between_train_steps": 0.005731105804443359, "step": 466} +{"info/global_step": 467, "train_info/time_within_train_step": 27.720780849456787, "step": 467} +{"train_info/time_between_train_steps": 0.005358457565307617, "step": 467} +{"info/global_step": 468, "train_info/time_within_train_step": 27.736836671829224, "step": 468} +{"train_info/time_between_train_steps": 0.008594274520874023, "step": 468} +{"info/global_step": 469, "train_info/time_within_train_step": 27.704147815704346, "step": 469} +{"train_info/time_between_train_steps": 0.005725860595703125, "step": 469} +{"info/global_step": 470, "train_info/time_within_train_step": 27.666210412979126, "step": 470} +{"train_info/time_between_train_steps": 0.005663633346557617, "step": 470} +{"info/global_step": 471, "train_info/time_within_train_step": 27.70126223564148, "step": 471} +{"train_info/time_between_train_steps": 0.014991283416748047, "step": 471} +{"info/global_step": 472, "train_info/time_within_train_step": 27.674954175949097, "step": 472} +{"train_info/time_between_train_steps": 0.005982875823974609, "step": 472} +{"info/global_step": 473, "train_info/time_within_train_step": 27.730921268463135, "step": 473} +{"train_info/time_between_train_steps": 0.010573148727416992, "step": 473} +{"info/global_step": 474, "train_info/time_within_train_step": 27.740994215011597, "step": 474} +{"train_info/time_between_train_steps": 0.00586390495300293, "step": 474} +{"info/global_step": 475, "train_info/time_within_train_step": 27.784860849380493, "step": 475} +{"train_info/time_between_train_steps": 0.00582432746887207, "step": 475} +{"info/global_step": 476, "train_info/time_within_train_step": 27.730767488479614, "step": 476} +{"train_info/time_between_train_steps": 0.0054264068603515625, "step": 476} +{"info/global_step": 477, "train_info/time_within_train_step": 27.754969596862793, "step": 477} +{"train_info/time_between_train_steps": 0.0059473514556884766, "step": 477} +{"info/global_step": 478, "train_info/time_within_train_step": 27.677706480026245, "step": 478} +{"train_info/time_between_train_steps": 0.01115870475769043, "step": 478} +{"info/global_step": 479, "train_info/time_within_train_step": 27.8808491230011, "step": 479} +{"train_info/time_between_train_steps": 0.01067972183227539, "step": 479} +{"info/global_step": 480, "train_info/time_within_train_step": 27.826266527175903, "step": 480} +{"train_info/time_between_train_steps": 0.0070073604583740234, "step": 480} +{"train_info/time_between_train_steps": 13.347284317016602, "step": 480} +{"info/global_step": 481, "train_info/time_within_train_step": 27.70461392402649, "step": 481} +{"train_info/time_between_train_steps": 0.005587577819824219, "step": 481} +{"info/global_step": 482, "train_info/time_within_train_step": 27.874757766723633, "step": 482} +{"train_info/time_between_train_steps": 0.005838871002197266, "step": 482} +{"info/global_step": 483, "train_info/time_within_train_step": 27.681884765625, "step": 483} +{"train_info/time_between_train_steps": 0.010296344757080078, "step": 483} +{"info/global_step": 484, "train_info/time_within_train_step": 27.954837560653687, "step": 484} +{"train_info/time_between_train_steps": 0.010417461395263672, "step": 484} +{"info/global_step": 485, "train_info/time_within_train_step": 27.763883113861084, "step": 485} +{"train_info/time_between_train_steps": 0.007025480270385742, "step": 485} +{"info/global_step": 486, "train_info/time_within_train_step": 27.869061946868896, "step": 486} +{"train_info/time_between_train_steps": 0.005827903747558594, "step": 486} +{"info/global_step": 487, "train_info/time_within_train_step": 27.733506441116333, "step": 487} +{"train_info/time_between_train_steps": 0.00592803955078125, "step": 487} +{"info/global_step": 488, "train_info/time_within_train_step": 28.089458465576172, "step": 488} +{"train_info/time_between_train_steps": 0.005454063415527344, "step": 488} +{"info/global_step": 489, "train_info/time_within_train_step": 27.677315711975098, "step": 489} +{"train_info/time_between_train_steps": 0.00608372688293457, "step": 489} +{"info/global_step": 490, "train_info/time_within_train_step": 27.873469352722168, "step": 490} +{"train_info/time_between_train_steps": 0.005683183670043945, "step": 490} +{"info/global_step": 491, "train_info/time_within_train_step": 27.833791732788086, "step": 491} +{"train_info/time_between_train_steps": 0.005745410919189453, "step": 491} +{"info/global_step": 492, "train_info/time_within_train_step": 27.772815227508545, "step": 492} +{"train_info/time_between_train_steps": 0.0058896541595458984, "step": 492} +{"info/global_step": 493, "train_info/time_within_train_step": 27.819809675216675, "step": 493} +{"train_info/time_between_train_steps": 0.010570526123046875, "step": 493} +{"info/global_step": 494, "train_info/time_within_train_step": 27.770687103271484, "step": 494} +{"train_info/time_between_train_steps": 0.010813713073730469, "step": 494} +{"info/global_step": 495, "train_info/time_within_train_step": 27.857892990112305, "step": 495} +{"train_info/time_between_train_steps": 0.005818367004394531, "step": 495} +{"info/global_step": 496, "train_info/time_within_train_step": 27.694514989852905, "step": 496} +{"train_info/time_between_train_steps": 0.0055217742919921875, "step": 496} +{"info/global_step": 497, "train_info/time_within_train_step": 27.706361770629883, "step": 497} +{"train_info/time_between_train_steps": 0.005552768707275391, "step": 497} +{"info/global_step": 498, "train_info/time_within_train_step": 27.68122100830078, "step": 498} +{"train_info/time_between_train_steps": 0.005776405334472656, "step": 498} +{"info/global_step": 499, "train_info/time_within_train_step": 27.803741216659546, "step": 499} +{"train_info/time_between_train_steps": 0.010495424270629883, "step": 499} +{"info/global_step": 500, "train_info/time_within_train_step": 27.797527551651, "step": 500} +{"train_info": {"train_info/memory_allocated": 2082.109375, "train_info/memory_max_allocated": 21965.9951171875, "train_info/memory_reserved": 24598.0, "train_info/memory_max_reserved": 24598.0, "_timestamp": 1733972699, "_runtime": 16084}, "step": 500} +{"logs": {"train/loss": 3.8942, "train/learning_rate": 0.00038888888888888887, "train/epoch": 16.02, "_timestamp": 1733972699, "_runtime": 16084}, "step": 500} +{"train_info/time_between_train_steps": 2.682391881942749, "step": 500} +{"info/global_step": 501, "train_info/time_within_train_step": 27.674190998077393, "step": 501} +{"train_info/time_between_train_steps": 0.005510807037353516, "step": 501} +{"info/global_step": 502, "train_info/time_within_train_step": 27.700421810150146, "step": 502} +{"train_info/time_between_train_steps": 0.011112451553344727, "step": 502} +{"info/global_step": 503, "train_info/time_within_train_step": 27.768426418304443, "step": 503} +{"train_info/time_between_train_steps": 0.0056324005126953125, "step": 503} +{"info/global_step": 504, "train_info/time_within_train_step": 27.755396604537964, "step": 504} +{"train_info/time_between_train_steps": 0.005741596221923828, "step": 504} +{"info/global_step": 505, "train_info/time_within_train_step": 27.732746601104736, "step": 505} +{"train_info/time_between_train_steps": 0.005627870559692383, "step": 505} +{"info/global_step": 506, "train_info/time_within_train_step": 27.733548402786255, "step": 506} +{"train_info/time_between_train_steps": 0.005593538284301758, "step": 506} +{"info/global_step": 507, "train_info/time_within_train_step": 27.698081970214844, "step": 507} +{"train_info/time_between_train_steps": 0.006011486053466797, "step": 507} +{"info/global_step": 508, "train_info/time_within_train_step": 27.76463270187378, "step": 508} +{"train_info/time_between_train_steps": 0.010307073593139648, "step": 508} +{"info/global_step": 509, "train_info/time_within_train_step": 27.74957036972046, "step": 509} +{"train_info/time_between_train_steps": 0.006256103515625, "step": 509} +{"info/global_step": 510, "train_info/time_within_train_step": 27.901662349700928, "step": 510} +{"train_info/time_between_train_steps": 0.006492137908935547, "step": 510} +{"train_info/time_between_train_steps": 13.41673731803894, "step": 510} +{"info/global_step": 511, "train_info/time_within_train_step": 27.695417642593384, "step": 511} +{"train_info/time_between_train_steps": 0.005941867828369141, "step": 511} +{"info/global_step": 512, "train_info/time_within_train_step": 27.88218069076538, "step": 512} +{"train_info/time_between_train_steps": 0.005922555923461914, "step": 512} +{"info/global_step": 513, "train_info/time_within_train_step": 27.723479747772217, "step": 513} +{"train_info/time_between_train_steps": 0.005432844161987305, "step": 513} +{"info/global_step": 514, "train_info/time_within_train_step": 27.860788106918335, "step": 514} +{"train_info/time_between_train_steps": 0.011337518692016602, "step": 514} +{"info/global_step": 515, "train_info/time_within_train_step": 27.800992250442505, "step": 515} +{"train_info/time_between_train_steps": 0.005911111831665039, "step": 515} +{"info/global_step": 516, "train_info/time_within_train_step": 27.938113927841187, "step": 516} +{"train_info/time_between_train_steps": 0.006131410598754883, "step": 516} +{"info/global_step": 517, "train_info/time_within_train_step": 27.653474807739258, "step": 517} +{"train_info/time_between_train_steps": 0.005385398864746094, "step": 517} +{"info/global_step": 518, "train_info/time_within_train_step": 27.86531901359558, "step": 518} +{"train_info/time_between_train_steps": 0.006119489669799805, "step": 518} +{"info/global_step": 519, "train_info/time_within_train_step": 27.734635591506958, "step": 519} +{"train_info/time_between_train_steps": 0.0058460235595703125, "step": 519} +{"info/global_step": 520, "train_info/time_within_train_step": 27.918188095092773, "step": 520} +{"train_info/time_between_train_steps": 0.005760908126831055, "step": 520} +{"info/global_step": 521, "train_info/time_within_train_step": 27.794466972351074, "step": 521} +{"train_info/time_between_train_steps": 0.005849361419677734, "step": 521} +{"info/global_step": 522, "train_info/time_within_train_step": 27.805307865142822, "step": 522} +{"train_info/time_between_train_steps": 0.009900331497192383, "step": 522} +{"info/global_step": 523, "train_info/time_within_train_step": 27.674083948135376, "step": 523} +{"train_info/time_between_train_steps": 0.007156848907470703, "step": 523} +{"info/global_step": 524, "train_info/time_within_train_step": 27.775588512420654, "step": 524} +{"train_info/time_between_train_steps": 0.010339736938476562, "step": 524} +{"info/global_step": 525, "train_info/time_within_train_step": 27.746413469314575, "step": 525} +{"train_info/time_between_train_steps": 0.005799055099487305, "step": 525} +{"info/global_step": 526, "train_info/time_within_train_step": 27.82020878791809, "step": 526} +{"train_info/time_between_train_steps": 0.005740642547607422, "step": 526} +{"info/global_step": 527, "train_info/time_within_train_step": 27.717740058898926, "step": 527} +{"train_info/time_between_train_steps": 0.006518125534057617, "step": 527} +{"info/global_step": 528, "train_info/time_within_train_step": 27.745776891708374, "step": 528} +{"train_info/time_between_train_steps": 0.005945444107055664, "step": 528} +{"info/global_step": 529, "train_info/time_within_train_step": 27.662747859954834, "step": 529} +{"train_info/time_between_train_steps": 0.010205507278442383, "step": 529} +{"info/global_step": 530, "train_info/time_within_train_step": 27.783283710479736, "step": 530} +{"train_info/time_between_train_steps": 0.005711078643798828, "step": 530} +{"info/global_step": 531, "train_info/time_within_train_step": 27.78277611732483, "step": 531} +{"train_info/time_between_train_steps": 0.005406856536865234, "step": 531} +{"info/global_step": 532, "train_info/time_within_train_step": 27.759464502334595, "step": 532} +{"train_info/time_between_train_steps": 0.005697011947631836, "step": 532} +{"info/global_step": 533, "train_info/time_within_train_step": 27.739286422729492, "step": 533} +{"train_info/time_between_train_steps": 0.011017322540283203, "step": 533} +{"info/global_step": 534, "train_info/time_within_train_step": 27.716500997543335, "step": 534} +{"train_info/time_between_train_steps": 0.005639791488647461, "step": 534} +{"info/global_step": 535, "train_info/time_within_train_step": 27.765321969985962, "step": 535} +{"train_info/time_between_train_steps": 0.009623289108276367, "step": 535} +{"info/global_step": 536, "train_info/time_within_train_step": 27.71252703666687, "step": 536} +{"train_info/time_between_train_steps": 0.005630016326904297, "step": 536} +{"info/global_step": 537, "train_info/time_within_train_step": 27.81237292289734, "step": 537} +{"train_info/time_between_train_steps": 0.014818429946899414, "step": 537} +{"info/global_step": 538, "train_info/time_within_train_step": 27.794668436050415, "step": 538} +{"train_info/time_between_train_steps": 0.010638952255249023, "step": 538} +{"info/global_step": 539, "train_info/time_within_train_step": 27.652684211730957, "step": 539} +{"train_info/time_between_train_steps": 0.005858182907104492, "step": 539} +{"info/global_step": 540, "train_info/time_within_train_step": 27.862979412078857, "step": 540} +{"train_info/time_between_train_steps": 0.016155242919921875, "step": 540} +{"train_info/time_between_train_steps": 13.632779598236084, "step": 540} +{"info/global_step": 541, "train_info/time_within_train_step": 27.79082441329956, "step": 541} +{"train_info/time_between_train_steps": 0.0053424835205078125, "step": 541} +{"info/global_step": 542, "train_info/time_within_train_step": 27.832223653793335, "step": 542} +{"train_info/time_between_train_steps": 0.005748748779296875, "step": 542} +{"info/global_step": 543, "train_info/time_within_train_step": 27.794845819473267, "step": 543} +{"train_info/time_between_train_steps": 0.005951642990112305, "step": 543} +{"info/global_step": 544, "train_info/time_within_train_step": 27.933107376098633, "step": 544} +{"train_info/time_between_train_steps": 0.005513668060302734, "step": 544} +{"info/global_step": 545, "train_info/time_within_train_step": 27.749638080596924, "step": 545} +{"train_info/time_between_train_steps": 0.0058939456939697266, "step": 545} +{"info/global_step": 546, "train_info/time_within_train_step": 27.862308979034424, "step": 546} +{"train_info/time_between_train_steps": 0.011556625366210938, "step": 546} +{"info/global_step": 547, "train_info/time_within_train_step": 27.77101182937622, "step": 547} +{"train_info/time_between_train_steps": 0.005891084671020508, "step": 547} +{"info/global_step": 548, "train_info/time_within_train_step": 27.87828540802002, "step": 548} +{"train_info/time_between_train_steps": 0.00597834587097168, "step": 548} +{"info/global_step": 549, "train_info/time_within_train_step": 27.673101663589478, "step": 549} +{"train_info/time_between_train_steps": 0.011039018630981445, "step": 549} +{"info/global_step": 550, "train_info/time_within_train_step": 28.06051278114319, "step": 550} +{"train_info": {"train_info/memory_allocated": 2082.109375, "train_info/memory_max_allocated": 21965.9951171875, "train_info/memory_reserved": 24598.0, "train_info/memory_max_reserved": 24598.0, "_timestamp": 1733974118, "_runtime": 17503}, "step": 550} +{"logs": {"train/loss": 3.7583, "train/learning_rate": 0.0003611111111111111, "train/epoch": 18.01, "_timestamp": 1733974118, "_runtime": 17503}, "step": 550} +{"train_info/time_between_train_steps": 0.007516145706176758, "step": 550} +{"info/global_step": 551, "train_info/time_within_train_step": 27.805784463882446, "step": 551} +{"train_info/time_between_train_steps": 0.0055179595947265625, "step": 551} +{"info/global_step": 552, "train_info/time_within_train_step": 27.77694010734558, "step": 552} +{"train_info/time_between_train_steps": 0.0067179203033447266, "step": 552} +{"info/global_step": 553, "train_info/time_within_train_step": 27.732990264892578, "step": 553} +{"train_info/time_between_train_steps": 0.005652427673339844, "step": 553} +{"info/global_step": 554, "train_info/time_within_train_step": 27.73584532737732, "step": 554} +{"train_info/time_between_train_steps": 0.005927085876464844, "step": 554} +{"info/global_step": 555, "train_info/time_within_train_step": 27.72602391242981, "step": 555} +{"train_info/time_between_train_steps": 0.010283470153808594, "step": 555} +{"info/global_step": 556, "train_info/time_within_train_step": 27.830185890197754, "step": 556} +{"train_info/time_between_train_steps": 0.005517721176147461, "step": 556} +{"info/global_step": 557, "train_info/time_within_train_step": 27.8612220287323, "step": 557} +{"train_info/time_between_train_steps": 0.0058743953704833984, "step": 557} +{"info/global_step": 558, "train_info/time_within_train_step": 27.756335496902466, "step": 558} +{"train_info/time_between_train_steps": 0.01099538803100586, "step": 558} +{"info/global_step": 559, "train_info/time_within_train_step": 27.669925212860107, "step": 559} +{"train_info/time_between_train_steps": 0.00534367561340332, "step": 559} +{"info/global_step": 560, "train_info/time_within_train_step": 27.648232221603394, "step": 560} +{"train_info/time_between_train_steps": 0.005404949188232422, "step": 560} +{"info/global_step": 561, "train_info/time_within_train_step": 27.709359884262085, "step": 561} +{"train_info/time_between_train_steps": 0.011250495910644531, "step": 561} +{"info/global_step": 562, "train_info/time_within_train_step": 27.853013515472412, "step": 562} +{"train_info/time_between_train_steps": 0.010210037231445312, "step": 562} +{"info/global_step": 563, "train_info/time_within_train_step": 27.77680277824402, "step": 563} +{"train_info/time_between_train_steps": 0.005774736404418945, "step": 563} +{"info/global_step": 564, "train_info/time_within_train_step": 27.70507574081421, "step": 564} +{"train_info/time_between_train_steps": 0.0070781707763671875, "step": 564} +{"info/global_step": 565, "train_info/time_within_train_step": 27.86456274986267, "step": 565} +{"train_info/time_between_train_steps": 0.0052907466888427734, "step": 565} +{"info/global_step": 566, "train_info/time_within_train_step": 27.729377031326294, "step": 566} +{"train_info/time_between_train_steps": 0.006084442138671875, "step": 566} +{"info/global_step": 567, "train_info/time_within_train_step": 27.632325649261475, "step": 567} +{"train_info/time_between_train_steps": 0.0060231685638427734, "step": 567} +{"info/global_step": 568, "train_info/time_within_train_step": 27.75538420677185, "step": 568} +{"train_info/time_between_train_steps": 0.005857229232788086, "step": 568} +{"info/global_step": 569, "train_info/time_within_train_step": 27.85464119911194, "step": 569} +{"train_info/time_between_train_steps": 0.010679006576538086, "step": 569} +{"info/global_step": 570, "train_info/time_within_train_step": 27.698501348495483, "step": 570} +{"train_info/time_between_train_steps": 0.0065593719482421875, "step": 570} +{"train_info/time_between_train_steps": 13.720562934875488, "step": 570} +{"info/global_step": 571, "train_info/time_within_train_step": 27.69323754310608, "step": 571} +{"train_info/time_between_train_steps": 0.00552821159362793, "step": 571} +{"info/global_step": 572, "train_info/time_within_train_step": 28.079623460769653, "step": 572} +{"train_info/time_between_train_steps": 0.006306886672973633, "step": 572} +{"info/global_step": 573, "train_info/time_within_train_step": 27.795382022857666, "step": 573} +{"train_info/time_between_train_steps": 0.012556791305541992, "step": 573} +{"info/global_step": 574, "train_info/time_within_train_step": 27.896806001663208, "step": 574} +{"train_info/time_between_train_steps": 0.006288051605224609, "step": 574} +{"info/global_step": 575, "train_info/time_within_train_step": 27.690058708190918, "step": 575} +{"train_info/time_between_train_steps": 0.006327390670776367, "step": 575} +{"info/global_step": 576, "train_info/time_within_train_step": 27.864694595336914, "step": 576} +{"train_info/time_between_train_steps": 0.0061495304107666016, "step": 576} +{"info/global_step": 577, "train_info/time_within_train_step": 27.757843255996704, "step": 577} +{"train_info/time_between_train_steps": 0.006207704544067383, "step": 577} +{"info/global_step": 578, "train_info/time_within_train_step": 27.847391605377197, "step": 578} +{"train_info/time_between_train_steps": 0.005860567092895508, "step": 578} +{"info/global_step": 579, "train_info/time_within_train_step": 27.712068796157837, "step": 579} +{"train_info/time_between_train_steps": 0.006116628646850586, "step": 579} +{"info/global_step": 580, "train_info/time_within_train_step": 27.840449333190918, "step": 580} +{"train_info/time_between_train_steps": 0.005800008773803711, "step": 580} +{"info/global_step": 581, "train_info/time_within_train_step": 27.673487424850464, "step": 581} +{"train_info/time_between_train_steps": 0.005202293395996094, "step": 581} +{"info/global_step": 582, "train_info/time_within_train_step": 27.726983547210693, "step": 582} +{"train_info/time_between_train_steps": 0.005491733551025391, "step": 582} +{"info/global_step": 583, "train_info/time_within_train_step": 27.740482807159424, "step": 583} +{"train_info/time_between_train_steps": 0.005866289138793945, "step": 583} +{"info/global_step": 584, "train_info/time_within_train_step": 27.62533736228943, "step": 584} +{"train_info/time_between_train_steps": 0.005537748336791992, "step": 584} +{"info/global_step": 585, "train_info/time_within_train_step": 27.695152044296265, "step": 585} +{"train_info/time_between_train_steps": 0.005745410919189453, "step": 585} +{"info/global_step": 586, "train_info/time_within_train_step": 27.756885051727295, "step": 586} +{"train_info/time_between_train_steps": 0.007770538330078125, "step": 586} +{"info/global_step": 587, "train_info/time_within_train_step": 27.909470796585083, "step": 587} +{"train_info/time_between_train_steps": 0.015439033508300781, "step": 587} +{"info/global_step": 588, "train_info/time_within_train_step": 27.656196117401123, "step": 588} +{"train_info/time_between_train_steps": 0.007134675979614258, "step": 588} +{"info/global_step": 589, "train_info/time_within_train_step": 27.732350826263428, "step": 589} +{"train_info/time_between_train_steps": 0.01141357421875, "step": 589} +{"info/global_step": 590, "train_info/time_within_train_step": 27.63142418861389, "step": 590} +{"train_info/time_between_train_steps": 0.011248588562011719, "step": 590} +{"info/global_step": 591, "train_info/time_within_train_step": 27.618940114974976, "step": 591} +{"train_info/time_between_train_steps": 0.005420684814453125, "step": 591} +{"info/global_step": 592, "train_info/time_within_train_step": 27.70654821395874, "step": 592} +{"train_info/time_between_train_steps": 0.006140708923339844, "step": 592} +{"info/global_step": 593, "train_info/time_within_train_step": 27.714311122894287, "step": 593} +{"train_info/time_between_train_steps": 0.005385637283325195, "step": 593} +{"info/global_step": 594, "train_info/time_within_train_step": 27.73839259147644, "step": 594} +{"train_info/time_between_train_steps": 0.007935762405395508, "step": 594} +{"info/global_step": 595, "train_info/time_within_train_step": 27.679608821868896, "step": 595} +{"train_info/time_between_train_steps": 0.01562643051147461, "step": 595} +{"info/global_step": 596, "train_info/time_within_train_step": 27.683271646499634, "step": 596} +{"train_info/time_between_train_steps": 0.0056760311126708984, "step": 596} +{"info/global_step": 597, "train_info/time_within_train_step": 27.76629328727722, "step": 597} +{"train_info/time_between_train_steps": 0.006464242935180664, "step": 597} +{"info/global_step": 598, "train_info/time_within_train_step": 27.752265691757202, "step": 598} +{"train_info/time_between_train_steps": 0.014307022094726562, "step": 598} +{"info/global_step": 599, "train_info/time_within_train_step": 27.710466623306274, "step": 599} +{"train_info/time_between_train_steps": 0.006049394607543945, "step": 599} +{"info/global_step": 600, "train_info/time_within_train_step": 27.70166254043579, "step": 600} +{"train_info": {"train_info/memory_allocated": 2082.109375, "train_info/memory_max_allocated": 21965.9951171875, "train_info/memory_reserved": 24598.0, "train_info/memory_max_reserved": 24598.0, "_timestamp": 1733975520, "_runtime": 18905}, "step": 600} +{"logs": {"train/loss": 3.6224, "train/learning_rate": 0.0003333333333333333, "train/epoch": 19.02, "_timestamp": 1733975520, "_runtime": 18905}, "step": 600} +{"train_info/time_between_train_steps": 2.551180124282837, "step": 600} +{"train_info/time_between_train_steps": 15.795550107955933, "step": 600} +{"info/global_step": 601, "train_info/time_within_train_step": 27.74175500869751, "step": 601} +{"train_info/time_between_train_steps": 0.005598306655883789, "step": 601} +{"info/global_step": 602, "train_info/time_within_train_step": 27.732016801834106, "step": 602} +{"train_info/time_between_train_steps": 0.0056192874908447266, "step": 602} +{"info/global_step": 603, "train_info/time_within_train_step": 27.899710655212402, "step": 603} +{"train_info/time_between_train_steps": 0.006135225296020508, "step": 603} +{"info/global_step": 604, "train_info/time_within_train_step": 27.907378673553467, "step": 604} +{"train_info/time_between_train_steps": 0.005543947219848633, "step": 604} +{"info/global_step": 605, "train_info/time_within_train_step": 27.705161809921265, "step": 605} +{"train_info/time_between_train_steps": 0.0059375762939453125, "step": 605} +{"info/global_step": 606, "train_info/time_within_train_step": 27.871654272079468, "step": 606} +{"train_info/time_between_train_steps": 0.009852170944213867, "step": 606} +{"info/global_step": 607, "train_info/time_within_train_step": 27.702194929122925, "step": 607} +{"train_info/time_between_train_steps": 0.006136655807495117, "step": 607} +{"info/global_step": 608, "train_info/time_within_train_step": 27.78289484977722, "step": 608} +{"train_info/time_between_train_steps": 0.005718708038330078, "step": 608} +{"info/global_step": 609, "train_info/time_within_train_step": 27.70877194404602, "step": 609} +{"train_info/time_between_train_steps": 0.010715484619140625, "step": 609} +{"info/global_step": 610, "train_info/time_within_train_step": 27.77180004119873, "step": 610} +{"train_info/time_between_train_steps": 0.005639076232910156, "step": 610} +{"info/global_step": 611, "train_info/time_within_train_step": 27.724162101745605, "step": 611} +{"train_info/time_between_train_steps": 0.005840778350830078, "step": 611} +{"info/global_step": 612, "train_info/time_within_train_step": 27.58842897415161, "step": 612} +{"train_info/time_between_train_steps": 0.0057697296142578125, "step": 612} +{"info/global_step": 613, "train_info/time_within_train_step": 27.62577986717224, "step": 613} +{"train_info/time_between_train_steps": 0.010208368301391602, "step": 613} +{"info/global_step": 614, "train_info/time_within_train_step": 27.86578941345215, "step": 614} +{"train_info/time_between_train_steps": 0.011387109756469727, "step": 614} +{"info/global_step": 615, "train_info/time_within_train_step": 27.663437366485596, "step": 615} +{"train_info/time_between_train_steps": 0.005250692367553711, "step": 615} +{"info/global_step": 616, "train_info/time_within_train_step": 27.77779269218445, "step": 616} +{"train_info/time_between_train_steps": 0.005858421325683594, "step": 616} +{"info/global_step": 617, "train_info/time_within_train_step": 27.7773494720459, "step": 617} +{"train_info/time_between_train_steps": 0.00525665283203125, "step": 617} +{"info/global_step": 618, "train_info/time_within_train_step": 27.787336587905884, "step": 618} +{"train_info/time_between_train_steps": 0.005653858184814453, "step": 618} +{"info/global_step": 619, "train_info/time_within_train_step": 27.747944355010986, "step": 619} +{"train_info/time_between_train_steps": 0.010644197463989258, "step": 619} +{"info/global_step": 620, "train_info/time_within_train_step": 27.651322841644287, "step": 620} +{"train_info/time_between_train_steps": 0.005586385726928711, "step": 620} +{"info/global_step": 621, "train_info/time_within_train_step": 27.625121116638184, "step": 621} +{"train_info/time_between_train_steps": 0.005759000778198242, "step": 621} +{"info/global_step": 622, "train_info/time_within_train_step": 27.702893018722534, "step": 622} +{"train_info/time_between_train_steps": 0.011204719543457031, "step": 622} +{"info/global_step": 623, "train_info/time_within_train_step": 27.69914722442627, "step": 623} +{"train_info/time_between_train_steps": 0.005916118621826172, "step": 623} +{"info/global_step": 624, "train_info/time_within_train_step": 27.771838665008545, "step": 624} +{"train_info/time_between_train_steps": 0.011671066284179688, "step": 624} +{"info/global_step": 625, "train_info/time_within_train_step": 27.74812412261963, "step": 625} +{"train_info/time_between_train_steps": 0.006168842315673828, "step": 625} +{"info/global_step": 626, "train_info/time_within_train_step": 27.72345209121704, "step": 626} +{"train_info/time_between_train_steps": 0.005791425704956055, "step": 626} +{"info/global_step": 627, "train_info/time_within_train_step": 27.74526572227478, "step": 627} +{"train_info/time_between_train_steps": 0.0059545040130615234, "step": 627} +{"info/global_step": 628, "train_info/time_within_train_step": 27.69171714782715, "step": 628} +{"train_info/time_between_train_steps": 0.006159782409667969, "step": 628} +{"info/global_step": 629, "train_info/time_within_train_step": 27.65793251991272, "step": 629} +{"train_info/time_between_train_steps": 0.005990505218505859, "step": 629} +{"info/global_step": 630, "train_info/time_within_train_step": 27.701083421707153, "step": 630} +{"train_info/time_between_train_steps": 0.011880874633789062, "step": 630} +{"train_info/time_between_train_steps": 13.414457082748413, "step": 630} +{"info/global_step": 631, "train_info/time_within_train_step": 27.746797800064087, "step": 631} +{"train_info/time_between_train_steps": 0.005522727966308594, "step": 631} +{"info/global_step": 632, "train_info/time_within_train_step": 27.929680109024048, "step": 632} +{"train_info/time_between_train_steps": 0.005890369415283203, "step": 632} +{"info/global_step": 633, "train_info/time_within_train_step": 27.635141611099243, "step": 633} +{"train_info/time_between_train_steps": 0.01225733757019043, "step": 633} +{"info/global_step": 634, "train_info/time_within_train_step": 27.9134361743927, "step": 634} +{"train_info/time_between_train_steps": 0.006171703338623047, "step": 634} +{"info/global_step": 635, "train_info/time_within_train_step": 27.62370228767395, "step": 635} +{"train_info/time_between_train_steps": 0.0059833526611328125, "step": 635} +{"info/global_step": 636, "train_info/time_within_train_step": 27.862972736358643, "step": 636} +{"train_info/time_between_train_steps": 0.005753517150878906, "step": 636} +{"info/global_step": 637, "train_info/time_within_train_step": 27.739132404327393, "step": 637} +{"train_info/time_between_train_steps": 0.006323814392089844, "step": 637} +{"info/global_step": 638, "train_info/time_within_train_step": 27.83002495765686, "step": 638} +{"train_info/time_between_train_steps": 0.005972385406494141, "step": 638} +{"info/global_step": 639, "train_info/time_within_train_step": 27.759262323379517, "step": 639} +{"train_info/time_between_train_steps": 0.0062716007232666016, "step": 639} +{"info/global_step": 640, "train_info/time_within_train_step": 27.89661931991577, "step": 640} +{"train_info/time_between_train_steps": 0.00624394416809082, "step": 640} +{"info/global_step": 641, "train_info/time_within_train_step": 27.849056482315063, "step": 641} +{"train_info/time_between_train_steps": 0.0057680606842041016, "step": 641} +{"info/global_step": 642, "train_info/time_within_train_step": 27.807401418685913, "step": 642} +{"train_info/time_between_train_steps": 0.01005864143371582, "step": 642} +{"info/global_step": 643, "train_info/time_within_train_step": 27.689054012298584, "step": 643} +{"train_info/time_between_train_steps": 0.0058612823486328125, "step": 643} +{"info/global_step": 644, "train_info/time_within_train_step": 27.70509171485901, "step": 644} +{"train_info/time_between_train_steps": 0.005541563034057617, "step": 644} +{"info/global_step": 645, "train_info/time_within_train_step": 27.611226081848145, "step": 645} +{"train_info/time_between_train_steps": 0.0056955814361572266, "step": 645} +{"info/global_step": 646, "train_info/time_within_train_step": 27.616312742233276, "step": 646} +{"train_info/time_between_train_steps": 0.00561213493347168, "step": 646} +{"info/global_step": 647, "train_info/time_within_train_step": 27.795820236206055, "step": 647} +{"train_info/time_between_train_steps": 0.011059761047363281, "step": 647} +{"info/global_step": 648, "train_info/time_within_train_step": 27.688057899475098, "step": 648} +{"train_info/time_between_train_steps": 0.0056726932525634766, "step": 648} +{"info/global_step": 649, "train_info/time_within_train_step": 27.914807558059692, "step": 649} +{"train_info/time_between_train_steps": 0.005798816680908203, "step": 649} +{"info/global_step": 650, "train_info/time_within_train_step": 27.740928173065186, "step": 650} +{"train_info": {"train_info/memory_allocated": 2082.109375, "train_info/memory_max_allocated": 21965.9951171875, "train_info/memory_reserved": 24598.0, "train_info/memory_max_reserved": 24598.0, "_timestamp": 1733976937, "_runtime": 20322}, "step": 650} +{"logs": {"train/loss": 3.55, "train/learning_rate": 0.00030555555555555555, "train/epoch": 21.02, "_timestamp": 1733976937, "_runtime": 20322}, "step": 650} +{"train_info/time_between_train_steps": 0.012084722518920898, "step": 650} +{"info/global_step": 651, "train_info/time_within_train_step": 27.697684288024902, "step": 651} +{"train_info/time_between_train_steps": 0.005689859390258789, "step": 651} +{"info/global_step": 652, "train_info/time_within_train_step": 27.716506719589233, "step": 652} +{"train_info/time_between_train_steps": 0.0056536197662353516, "step": 652} +{"info/global_step": 653, "train_info/time_within_train_step": 27.691770792007446, "step": 653} +{"train_info/time_between_train_steps": 0.010854959487915039, "step": 653} +{"info/global_step": 654, "train_info/time_within_train_step": 27.71286106109619, "step": 654} +{"train_info/time_between_train_steps": 0.007861614227294922, "step": 654} +{"info/global_step": 655, "train_info/time_within_train_step": 27.667850494384766, "step": 655} +{"train_info/time_between_train_steps": 0.005708456039428711, "step": 655} +{"info/global_step": 656, "train_info/time_within_train_step": 27.599660396575928, "step": 656} +{"train_info/time_between_train_steps": 0.005692243576049805, "step": 656} +{"info/global_step": 657, "train_info/time_within_train_step": 27.628103256225586, "step": 657} +{"train_info/time_between_train_steps": 0.005658864974975586, "step": 657} +{"info/global_step": 658, "train_info/time_within_train_step": 27.742066621780396, "step": 658} +{"train_info/time_between_train_steps": 0.006135225296020508, "step": 658} +{"info/global_step": 659, "train_info/time_within_train_step": 27.684698581695557, "step": 659} +{"train_info/time_between_train_steps": 0.0058362483978271484, "step": 659} +{"info/global_step": 660, "train_info/time_within_train_step": 27.722445487976074, "step": 660} +{"train_info/time_between_train_steps": 0.01164555549621582, "step": 660} +{"train_info/time_between_train_steps": 13.580663442611694, "step": 660} +{"info/global_step": 661, "train_info/time_within_train_step": 27.656134128570557, "step": 661} +{"train_info/time_between_train_steps": 0.005370378494262695, "step": 661} +{"info/global_step": 662, "train_info/time_within_train_step": 27.768903255462646, "step": 662} +{"train_info/time_between_train_steps": 0.01024317741394043, "step": 662} +{"info/global_step": 663, "train_info/time_within_train_step": 27.68340015411377, "step": 663} +{"train_info/time_between_train_steps": 0.005367279052734375, "step": 663} +{"info/global_step": 664, "train_info/time_within_train_step": 27.968320608139038, "step": 664} +{"train_info/time_between_train_steps": 0.005833864212036133, "step": 664} +{"info/global_step": 665, "train_info/time_within_train_step": 27.734649419784546, "step": 665} +{"train_info/time_between_train_steps": 0.016719818115234375, "step": 665} +{"info/global_step": 666, "train_info/time_within_train_step": 27.867435693740845, "step": 666} +{"train_info/time_between_train_steps": 0.011445760726928711, "step": 666} +{"info/global_step": 667, "train_info/time_within_train_step": 27.691325187683105, "step": 667} +{"train_info/time_between_train_steps": 0.005799055099487305, "step": 667} +{"info/global_step": 668, "train_info/time_within_train_step": 27.94595241546631, "step": 668} +{"train_info/time_between_train_steps": 0.005512714385986328, "step": 668} +{"info/global_step": 669, "train_info/time_within_train_step": 27.74989128112793, "step": 669} +{"train_info/time_between_train_steps": 0.005750894546508789, "step": 669} +{"info/global_step": 670, "train_info/time_within_train_step": 27.752474069595337, "step": 670} +{"train_info/time_between_train_steps": 0.00609278678894043, "step": 670} +{"info/global_step": 671, "train_info/time_within_train_step": 27.663702726364136, "step": 671} +{"train_info/time_between_train_steps": 0.005574941635131836, "step": 671} +{"info/global_step": 672, "train_info/time_within_train_step": 27.68216896057129, "step": 672} +{"train_info/time_between_train_steps": 0.005479097366333008, "step": 672} +{"info/global_step": 673, "train_info/time_within_train_step": 27.736604690551758, "step": 673} +{"train_info/time_between_train_steps": 0.0056073665618896484, "step": 673} +{"info/global_step": 674, "train_info/time_within_train_step": 27.656479597091675, "step": 674} +{"train_info/time_between_train_steps": 0.011174917221069336, "step": 674} +{"info/global_step": 675, "train_info/time_within_train_step": 27.76354479789734, "step": 675} +{"train_info/time_between_train_steps": 0.005688667297363281, "step": 675} +{"info/global_step": 676, "train_info/time_within_train_step": 27.740622520446777, "step": 676} +{"train_info/time_between_train_steps": 0.0057332515716552734, "step": 676} +{"info/global_step": 677, "train_info/time_within_train_step": 27.660950899124146, "step": 677} +{"train_info/time_between_train_steps": 0.0053653717041015625, "step": 677} +{"info/global_step": 678, "train_info/time_within_train_step": 27.726885080337524, "step": 678} +{"train_info/time_between_train_steps": 0.015278100967407227, "step": 678} +{"info/global_step": 679, "train_info/time_within_train_step": 27.6668438911438, "step": 679} +{"train_info/time_between_train_steps": 0.0058329105377197266, "step": 679} +{"info/global_step": 680, "train_info/time_within_train_step": 27.78157353401184, "step": 680} +{"train_info/time_between_train_steps": 0.01054835319519043, "step": 680} +{"info/global_step": 681, "train_info/time_within_train_step": 27.757325887680054, "step": 681} +{"train_info/time_between_train_steps": 0.005791425704956055, "step": 681} +{"info/global_step": 682, "train_info/time_within_train_step": 27.647804737091064, "step": 682} +{"train_info/time_between_train_steps": 0.005562305450439453, "step": 682} +{"info/global_step": 683, "train_info/time_within_train_step": 27.776491403579712, "step": 683} +{"train_info/time_between_train_steps": 0.005650043487548828, "step": 683} +{"info/global_step": 684, "train_info/time_within_train_step": 27.69125533103943, "step": 684} +{"train_info/time_between_train_steps": 0.01059722900390625, "step": 684} +{"info/global_step": 685, "train_info/time_within_train_step": 27.730567693710327, "step": 685} +{"train_info/time_between_train_steps": 0.005892753601074219, "step": 685} +{"info/global_step": 686, "train_info/time_within_train_step": 27.665796279907227, "step": 686} +{"train_info/time_between_train_steps": 0.005689144134521484, "step": 686} +{"info/global_step": 687, "train_info/time_within_train_step": 27.760876417160034, "step": 687} +{"train_info/time_between_train_steps": 0.006653785705566406, "step": 687} +{"info/global_step": 688, "train_info/time_within_train_step": 27.73122262954712, "step": 688} +{"train_info/time_between_train_steps": 0.006009578704833984, "step": 688} +{"info/global_step": 689, "train_info/time_within_train_step": 27.77959895133972, "step": 689} +{"train_info/time_between_train_steps": 0.008211135864257812, "step": 689} +{"info/global_step": 690, "train_info/time_within_train_step": 27.77787184715271, "step": 690} +{"train_info/time_between_train_steps": 0.007442951202392578, "step": 690} +{"train_info/time_between_train_steps": 13.172189235687256, "step": 690} +{"info/global_step": 691, "train_info/time_within_train_step": 27.68615460395813, "step": 691} +{"train_info/time_between_train_steps": 0.006181478500366211, "step": 691} +{"info/global_step": 692, "train_info/time_within_train_step": 27.82380175590515, "step": 692} +{"train_info/time_between_train_steps": 0.007072925567626953, "step": 692} +{"info/global_step": 693, "train_info/time_within_train_step": 27.632786512374878, "step": 693} +{"train_info/time_between_train_steps": 0.005632162094116211, "step": 693} +{"info/global_step": 694, "train_info/time_within_train_step": 27.935469388961792, "step": 694} +{"train_info/time_between_train_steps": 0.010999202728271484, "step": 694} +{"info/global_step": 695, "train_info/time_within_train_step": 27.873196363449097, "step": 695} +{"train_info/time_between_train_steps": 0.011419534683227539, "step": 695} +{"info/global_step": 696, "train_info/time_within_train_step": 27.871302127838135, "step": 696} +{"train_info/time_between_train_steps": 0.005671262741088867, "step": 696} +{"info/global_step": 697, "train_info/time_within_train_step": 27.743674278259277, "step": 697} +{"train_info/time_between_train_steps": 0.0055544376373291016, "step": 697} +{"info/global_step": 698, "train_info/time_within_train_step": 27.807862997055054, "step": 698} +{"train_info/time_between_train_steps": 0.005815982818603516, "step": 698} +{"info/global_step": 699, "train_info/time_within_train_step": 27.645262718200684, "step": 699} +{"train_info/time_between_train_steps": 0.005584239959716797, "step": 699} +{"info/global_step": 700, "train_info/time_within_train_step": 27.818116903305054, "step": 700} +{"train_info": {"train_info/memory_allocated": 2082.109375, "train_info/memory_max_allocated": 21965.9951171875, "train_info/memory_reserved": 24598.0, "train_info/memory_max_reserved": 24598.0, "_timestamp": 1733978351, "_runtime": 21736}, "step": 700} +{"logs": {"train/loss": 3.4646, "train/learning_rate": 0.0002777777777777778, "train/epoch": 23.01, "_timestamp": 1733978351, "_runtime": 21736}, "step": 700} +{"train_info/time_between_train_steps": 2.9651315212249756, "step": 700} +{"info/global_step": 701, "train_info/time_within_train_step": 27.818822383880615, "step": 701} +{"train_info/time_between_train_steps": 0.006815433502197266, "step": 701} +{"info/global_step": 702, "train_info/time_within_train_step": 27.680028915405273, "step": 702} +{"train_info/time_between_train_steps": 0.005414009094238281, "step": 702} +{"info/global_step": 703, "train_info/time_within_train_step": 27.6410813331604, "step": 703} +{"train_info/time_between_train_steps": 0.0054547786712646484, "step": 703} +{"info/global_step": 704, "train_info/time_within_train_step": 27.716320514678955, "step": 704} +{"train_info/time_between_train_steps": 0.005757808685302734, "step": 704} +{"info/global_step": 705, "train_info/time_within_train_step": 27.69986367225647, "step": 705} +{"train_info/time_between_train_steps": 0.009846687316894531, "step": 705} +{"info/global_step": 706, "train_info/time_within_train_step": 27.66827964782715, "step": 706} +{"train_info/time_between_train_steps": 0.007157087326049805, "step": 706} +{"info/global_step": 707, "train_info/time_within_train_step": 27.67715334892273, "step": 707} +{"train_info/time_between_train_steps": 0.01078176498413086, "step": 707} +{"info/global_step": 708, "train_info/time_within_train_step": 27.79299259185791, "step": 708} +{"train_info/time_between_train_steps": 0.005725860595703125, "step": 708} +{"info/global_step": 709, "train_info/time_within_train_step": 27.73767900466919, "step": 709} +{"train_info/time_between_train_steps": 0.005747795104980469, "step": 709} +{"info/global_step": 710, "train_info/time_within_train_step": 27.65120220184326, "step": 710} +{"train_info/time_between_train_steps": 0.00568842887878418, "step": 710} +{"info/global_step": 711, "train_info/time_within_train_step": 27.781054258346558, "step": 711} +{"train_info/time_between_train_steps": 0.010595083236694336, "step": 711} +{"info/global_step": 712, "train_info/time_within_train_step": 27.658878564834595, "step": 712} +{"train_info/time_between_train_steps": 0.00552058219909668, "step": 712} +{"info/global_step": 713, "train_info/time_within_train_step": 27.656041860580444, "step": 713} +{"train_info/time_between_train_steps": 0.0054547786712646484, "step": 713} +{"info/global_step": 714, "train_info/time_within_train_step": 27.63808512687683, "step": 714} +{"train_info/time_between_train_steps": 0.005865573883056641, "step": 714} +{"info/global_step": 715, "train_info/time_within_train_step": 27.679398775100708, "step": 715} +{"train_info/time_between_train_steps": 0.005925893783569336, "step": 715} +{"info/global_step": 716, "train_info/time_within_train_step": 27.641112089157104, "step": 716} +{"train_info/time_between_train_steps": 0.005628347396850586, "step": 716} +{"info/global_step": 717, "train_info/time_within_train_step": 27.682828426361084, "step": 717} +{"train_info/time_between_train_steps": 0.005957365036010742, "step": 717} +{"info/global_step": 718, "train_info/time_within_train_step": 27.709927558898926, "step": 718} +{"train_info/time_between_train_steps": 0.006089210510253906, "step": 718} +{"info/global_step": 719, "train_info/time_within_train_step": 27.799814701080322, "step": 719} +{"train_info/time_between_train_steps": 0.006195068359375, "step": 719} +{"info/global_step": 720, "train_info/time_within_train_step": 27.75649333000183, "step": 720} +{"train_info/time_between_train_steps": 0.0062847137451171875, "step": 720} +{"train_info/time_between_train_steps": 13.424046277999878, "step": 720} +{"info/global_step": 721, "train_info/time_within_train_step": 27.73721718788147, "step": 721} +{"train_info/time_between_train_steps": 0.005574226379394531, "step": 721} +{"info/global_step": 722, "train_info/time_within_train_step": 27.800751209259033, "step": 722} +{"train_info/time_between_train_steps": 0.00558781623840332, "step": 722} +{"info/global_step": 723, "train_info/time_within_train_step": 27.669225215911865, "step": 723} +{"train_info/time_between_train_steps": 0.005864143371582031, "step": 723} +{"info/global_step": 724, "train_info/time_within_train_step": 27.82817840576172, "step": 724} +{"train_info/time_between_train_steps": 0.005826234817504883, "step": 724} +{"info/global_step": 725, "train_info/time_within_train_step": 27.828222036361694, "step": 725} +{"train_info/time_between_train_steps": 0.008164167404174805, "step": 725} +{"info/global_step": 726, "train_info/time_within_train_step": 27.962713718414307, "step": 726} +{"train_info/time_between_train_steps": 0.006028652191162109, "step": 726} +{"info/global_step": 727, "train_info/time_within_train_step": 27.647329092025757, "step": 727} +{"train_info/time_between_train_steps": 0.005920886993408203, "step": 727} +{"info/global_step": 728, "train_info/time_within_train_step": 27.81523323059082, "step": 728} +{"train_info/time_between_train_steps": 0.006032466888427734, "step": 728} +{"info/global_step": 729, "train_info/time_within_train_step": 27.67135739326477, "step": 729} +{"train_info/time_between_train_steps": 0.0056765079498291016, "step": 729} +{"info/global_step": 730, "train_info/time_within_train_step": 27.830031871795654, "step": 730} +{"train_info/time_between_train_steps": 0.010217666625976562, "step": 730} +{"info/global_step": 731, "train_info/time_within_train_step": 27.747998476028442, "step": 731} +{"train_info/time_between_train_steps": 0.005622148513793945, "step": 731} +{"info/global_step": 732, "train_info/time_within_train_step": 27.73781991004944, "step": 732} +{"train_info/time_between_train_steps": 0.00585174560546875, "step": 732} +{"info/global_step": 733, "train_info/time_within_train_step": 27.70465064048767, "step": 733} +{"train_info/time_between_train_steps": 0.005742788314819336, "step": 733} +{"info/global_step": 734, "train_info/time_within_train_step": 27.691694259643555, "step": 734} +{"train_info/time_between_train_steps": 0.005377531051635742, "step": 734} +{"info/global_step": 735, "train_info/time_within_train_step": 27.694148302078247, "step": 735} +{"train_info/time_between_train_steps": 0.005883216857910156, "step": 735} +{"info/global_step": 736, "train_info/time_within_train_step": 27.683672189712524, "step": 736} +{"train_info/time_between_train_steps": 0.005945920944213867, "step": 736} +{"info/global_step": 737, "train_info/time_within_train_step": 27.622187852859497, "step": 737} +{"train_info/time_between_train_steps": 0.011194705963134766, "step": 737} +{"info/global_step": 738, "train_info/time_within_train_step": 27.629430770874023, "step": 738} +{"train_info/time_between_train_steps": 0.005686759948730469, "step": 738} +{"info/global_step": 739, "train_info/time_within_train_step": 27.6383113861084, "step": 739} +{"train_info/time_between_train_steps": 0.0056896209716796875, "step": 739} +{"info/global_step": 740, "train_info/time_within_train_step": 27.660311460494995, "step": 740} +{"train_info/time_between_train_steps": 0.010091066360473633, "step": 740} +{"info/global_step": 741, "train_info/time_within_train_step": 27.76412796974182, "step": 741} +{"train_info/time_between_train_steps": 0.01573967933654785, "step": 741} +{"info/global_step": 742, "train_info/time_within_train_step": 27.887744665145874, "step": 742} +{"train_info/time_between_train_steps": 0.010259389877319336, "step": 742} +{"info/global_step": 743, "train_info/time_within_train_step": 27.646349906921387, "step": 743} +{"train_info/time_between_train_steps": 0.005904197692871094, "step": 743} +{"info/global_step": 744, "train_info/time_within_train_step": 27.680718898773193, "step": 744} +{"train_info/time_between_train_steps": 0.010364294052124023, "step": 744} +{"info/global_step": 745, "train_info/time_within_train_step": 27.689696073532104, "step": 745} +{"train_info/time_between_train_steps": 0.005568027496337891, "step": 745} +{"info/global_step": 746, "train_info/time_within_train_step": 27.75922465324402, "step": 746} +{"train_info/time_between_train_steps": 0.010212898254394531, "step": 746} +{"info/global_step": 747, "train_info/time_within_train_step": 27.669562816619873, "step": 747} +{"train_info/time_between_train_steps": 0.005788564682006836, "step": 747} +{"info/global_step": 748, "train_info/time_within_train_step": 27.694612741470337, "step": 748} +{"train_info/time_between_train_steps": 0.006340980529785156, "step": 748} +{"info/global_step": 749, "train_info/time_within_train_step": 27.825511693954468, "step": 749} +{"train_info/time_between_train_steps": 0.006199836730957031, "step": 749} +{"info/global_step": 750, "train_info/time_within_train_step": 27.77820920944214, "step": 750} +{"train_info": {"train_info/memory_allocated": 2082.109375, "train_info/memory_max_allocated": 21965.9951171875, "train_info/memory_reserved": 24598.0, "train_info/memory_max_reserved": 24598.0, "_timestamp": 1733979754, "_runtime": 23139}, "step": 750} +{"logs": {"train/loss": 3.3628, "train/learning_rate": 0.00025, "train/epoch": 24.02, "_timestamp": 1733979754, "_runtime": 23139}, "step": 750} +{"train_info/time_between_train_steps": 0.008583784103393555, "step": 750} +{"train_info/time_between_train_steps": 13.490075826644897, "step": 750} +{"info/global_step": 751, "train_info/time_within_train_step": 27.661409616470337, "step": 751} +{"train_info/time_between_train_steps": 0.0054471492767333984, "step": 751} +{"info/global_step": 752, "train_info/time_within_train_step": 27.89215087890625, "step": 752} +{"train_info/time_between_train_steps": 0.005647897720336914, "step": 752} +{"info/global_step": 753, "train_info/time_within_train_step": 27.680787086486816, "step": 753} +{"train_info/time_between_train_steps": 0.006018638610839844, "step": 753} +{"info/global_step": 754, "train_info/time_within_train_step": 27.789219617843628, "step": 754} +{"train_info/time_between_train_steps": 0.010463714599609375, "step": 754} +{"info/global_step": 755, "train_info/time_within_train_step": 27.804949522018433, "step": 755} +{"train_info/time_between_train_steps": 0.00578618049621582, "step": 755} +{"info/global_step": 756, "train_info/time_within_train_step": 27.879997491836548, "step": 756} +{"train_info/time_between_train_steps": 0.00597381591796875, "step": 756} +{"info/global_step": 757, "train_info/time_within_train_step": 27.805830717086792, "step": 757} +{"train_info/time_between_train_steps": 0.005838632583618164, "step": 757} +{"info/global_step": 758, "train_info/time_within_train_step": 27.792192459106445, "step": 758} +{"train_info/time_between_train_steps": 0.005338430404663086, "step": 758} +{"info/global_step": 759, "train_info/time_within_train_step": 27.65488314628601, "step": 759} +{"train_info/time_between_train_steps": 0.01081705093383789, "step": 759} +{"info/global_step": 760, "train_info/time_within_train_step": 27.742547035217285, "step": 760} +{"train_info/time_between_train_steps": 0.005876302719116211, "step": 760} +{"info/global_step": 761, "train_info/time_within_train_step": 27.71707010269165, "step": 761} +{"train_info/time_between_train_steps": 0.005749940872192383, "step": 761} +{"info/global_step": 762, "train_info/time_within_train_step": 27.594311237335205, "step": 762} +{"train_info/time_between_train_steps": 0.005743741989135742, "step": 762} +{"info/global_step": 763, "train_info/time_within_train_step": 27.688180923461914, "step": 763} +{"train_info/time_between_train_steps": 0.010639667510986328, "step": 763} +{"info/global_step": 764, "train_info/time_within_train_step": 27.62977147102356, "step": 764} +{"train_info/time_between_train_steps": 0.005557537078857422, "step": 764} +{"info/global_step": 765, "train_info/time_within_train_step": 27.63027596473694, "step": 765} +{"train_info/time_between_train_steps": 0.005704641342163086, "step": 765} +{"info/global_step": 766, "train_info/time_within_train_step": 27.704169511795044, "step": 766} +{"train_info/time_between_train_steps": 0.005877256393432617, "step": 766} +{"info/global_step": 767, "train_info/time_within_train_step": 27.636005878448486, "step": 767} +{"train_info/time_between_train_steps": 0.005545616149902344, "step": 767} +{"info/global_step": 768, "train_info/time_within_train_step": 27.68924331665039, "step": 768} +{"train_info/time_between_train_steps": 0.0055162906646728516, "step": 768} +{"info/global_step": 769, "train_info/time_within_train_step": 27.64470887184143, "step": 769} +{"train_info/time_between_train_steps": 0.005898952484130859, "step": 769} +{"info/global_step": 770, "train_info/time_within_train_step": 27.779354333877563, "step": 770} +{"train_info/time_between_train_steps": 0.005662202835083008, "step": 770} +{"info/global_step": 771, "train_info/time_within_train_step": 27.681386470794678, "step": 771} +{"train_info/time_between_train_steps": 0.005681037902832031, "step": 771} +{"info/global_step": 772, "train_info/time_within_train_step": 27.62212610244751, "step": 772} +{"train_info/time_between_train_steps": 0.010466814041137695, "step": 772} +{"info/global_step": 773, "train_info/time_within_train_step": 27.79470205307007, "step": 773} +{"train_info/time_between_train_steps": 0.011869430541992188, "step": 773} +{"info/global_step": 774, "train_info/time_within_train_step": 27.653990983963013, "step": 774} +{"train_info/time_between_train_steps": 0.005621194839477539, "step": 774} +{"info/global_step": 775, "train_info/time_within_train_step": 27.719215393066406, "step": 775} +{"train_info/time_between_train_steps": 0.0054967403411865234, "step": 775} +{"info/global_step": 776, "train_info/time_within_train_step": 27.719305753707886, "step": 776} +{"train_info/time_between_train_steps": 0.005440950393676758, "step": 776} +{"info/global_step": 777, "train_info/time_within_train_step": 27.79801082611084, "step": 777} +{"train_info/time_between_train_steps": 0.010783195495605469, "step": 777} +{"info/global_step": 778, "train_info/time_within_train_step": 27.631476879119873, "step": 778} +{"train_info/time_between_train_steps": 0.006345272064208984, "step": 778} +{"info/global_step": 779, "train_info/time_within_train_step": 27.6759295463562, "step": 779} +{"train_info/time_between_train_steps": 0.006864786148071289, "step": 779} +{"info/global_step": 780, "train_info/time_within_train_step": 27.728647470474243, "step": 780} +{"train_info/time_between_train_steps": 0.006793022155761719, "step": 780} +{"train_info/time_between_train_steps": 13.199149131774902, "step": 780} +{"info/global_step": 781, "train_info/time_within_train_step": 27.682620763778687, "step": 781} +{"train_info/time_between_train_steps": 0.0059757232666015625, "step": 781} +{"info/global_step": 782, "train_info/time_within_train_step": 27.86619544029236, "step": 782} +{"train_info/time_between_train_steps": 0.005902528762817383, "step": 782} +{"info/global_step": 783, "train_info/time_within_train_step": 27.6522855758667, "step": 783} +{"train_info/time_between_train_steps": 0.006068229675292969, "step": 783} +{"info/global_step": 784, "train_info/time_within_train_step": 27.727379083633423, "step": 784} +{"train_info/time_between_train_steps": 0.0056688785552978516, "step": 784} +{"info/global_step": 785, "train_info/time_within_train_step": 27.606261014938354, "step": 785} +{"train_info/time_between_train_steps": 0.010955810546875, "step": 785} +{"info/global_step": 786, "train_info/time_within_train_step": 27.94230031967163, "step": 786} +{"train_info/time_between_train_steps": 0.0059850215911865234, "step": 786} +{"info/global_step": 787, "train_info/time_within_train_step": 27.803919076919556, "step": 787} +{"train_info/time_between_train_steps": 0.005986213684082031, "step": 787} +{"info/global_step": 788, "train_info/time_within_train_step": 27.96482563018799, "step": 788} +{"train_info/time_between_train_steps": 0.005823850631713867, "step": 788} +{"info/global_step": 789, "train_info/time_within_train_step": 27.640608072280884, "step": 789} +{"train_info/time_between_train_steps": 0.005741596221923828, "step": 789} +{"info/global_step": 790, "train_info/time_within_train_step": 27.899807929992676, "step": 790} +{"train_info/time_between_train_steps": 0.005795478820800781, "step": 790} +{"info/global_step": 791, "train_info/time_within_train_step": 27.749598503112793, "step": 791} +{"train_info/time_between_train_steps": 0.005647182464599609, "step": 791} +{"info/global_step": 792, "train_info/time_within_train_step": 27.745633125305176, "step": 792} +{"train_info/time_between_train_steps": 0.01067972183227539, "step": 792} +{"info/global_step": 793, "train_info/time_within_train_step": 27.667644739151, "step": 793} +{"train_info/time_between_train_steps": 0.0054569244384765625, "step": 793} +{"info/global_step": 794, "train_info/time_within_train_step": 27.662135124206543, "step": 794} +{"train_info/time_between_train_steps": 0.009972095489501953, "step": 794} +{"info/global_step": 795, "train_info/time_within_train_step": 27.725597381591797, "step": 795} +{"train_info/time_between_train_steps": 0.005742073059082031, "step": 795} +{"info/global_step": 796, "train_info/time_within_train_step": 27.630472421646118, "step": 796} +{"train_info/time_between_train_steps": 0.005643367767333984, "step": 796} +{"info/global_step": 797, "train_info/time_within_train_step": 27.64261245727539, "step": 797} +{"train_info/time_between_train_steps": 0.0056765079498291016, "step": 797} +{"info/global_step": 798, "train_info/time_within_train_step": 27.701451778411865, "step": 798} +{"train_info/time_between_train_steps": 0.005730628967285156, "step": 798} +{"info/global_step": 799, "train_info/time_within_train_step": 27.74445152282715, "step": 799} +{"train_info/time_between_train_steps": 0.005496978759765625, "step": 799} +{"info/global_step": 800, "train_info/time_within_train_step": 27.696613311767578, "step": 800} +{"train_info": {"train_info/memory_allocated": 2082.109375, "train_info/memory_max_allocated": 21965.9951171875, "train_info/memory_reserved": 24598.0, "train_info/memory_max_reserved": 24598.0, "_timestamp": 1733981167, "_runtime": 24552}, "step": 800} +{"logs": {"train/loss": 3.3208, "train/learning_rate": 0.00022222222222222218, "train/epoch": 26.02, "_timestamp": 1733981167, "_runtime": 24552}, "step": 800} +{"train_info/time_between_train_steps": 2.8211076259613037, "step": 800} +{"info/global_step": 801, "train_info/time_within_train_step": 27.66553568840027, "step": 801} +{"train_info/time_between_train_steps": 0.005776166915893555, "step": 801} +{"info/global_step": 802, "train_info/time_within_train_step": 27.718165159225464, "step": 802} +{"train_info/time_between_train_steps": 0.010930061340332031, "step": 802} +{"info/global_step": 803, "train_info/time_within_train_step": 27.701069116592407, "step": 803} +{"train_info/time_between_train_steps": 0.010230064392089844, "step": 803} +{"info/global_step": 804, "train_info/time_within_train_step": 27.867878675460815, "step": 804} +{"train_info/time_between_train_steps": 0.005820035934448242, "step": 804} +{"info/global_step": 805, "train_info/time_within_train_step": 27.76823592185974, "step": 805} +{"train_info/time_between_train_steps": 0.005801200866699219, "step": 805} +{"info/global_step": 806, "train_info/time_within_train_step": 27.651950359344482, "step": 806} +{"train_info/time_between_train_steps": 0.005278587341308594, "step": 806} +{"info/global_step": 807, "train_info/time_within_train_step": 27.704047918319702, "step": 807} +{"train_info/time_between_train_steps": 0.010220527648925781, "step": 807} +{"info/global_step": 808, "train_info/time_within_train_step": 27.637772798538208, "step": 808} +{"train_info/time_between_train_steps": 0.005970478057861328, "step": 808} +{"info/global_step": 809, "train_info/time_within_train_step": 27.650770902633667, "step": 809} +{"train_info/time_between_train_steps": 0.006140947341918945, "step": 809} +{"info/global_step": 810, "train_info/time_within_train_step": 27.763694047927856, "step": 810} +{"train_info/time_between_train_steps": 0.006695985794067383, "step": 810} +{"train_info/time_between_train_steps": 13.345008134841919, "step": 810} +{"info/global_step": 811, "train_info/time_within_train_step": 27.643246173858643, "step": 811} +{"train_info/time_between_train_steps": 0.005951881408691406, "step": 811} +{"info/global_step": 812, "train_info/time_within_train_step": 27.840428590774536, "step": 812} +{"train_info/time_between_train_steps": 0.0054492950439453125, "step": 812} +{"info/global_step": 813, "train_info/time_within_train_step": 27.636142015457153, "step": 813} +{"train_info/time_between_train_steps": 0.005530118942260742, "step": 813} +{"info/global_step": 814, "train_info/time_within_train_step": 27.729491472244263, "step": 814} +{"train_info/time_between_train_steps": 0.005982637405395508, "step": 814} +{"info/global_step": 815, "train_info/time_within_train_step": 27.71056818962097, "step": 815} +{"train_info/time_between_train_steps": 0.005341291427612305, "step": 815} +{"info/global_step": 816, "train_info/time_within_train_step": 27.758596658706665, "step": 816} +{"train_info/time_between_train_steps": 0.010914325714111328, "step": 816} +{"info/global_step": 817, "train_info/time_within_train_step": 27.692230224609375, "step": 817} +{"train_info/time_between_train_steps": 0.0059871673583984375, "step": 817} +{"info/global_step": 818, "train_info/time_within_train_step": 27.935131311416626, "step": 818} +{"train_info/time_between_train_steps": 0.005904674530029297, "step": 818} +{"info/global_step": 819, "train_info/time_within_train_step": 27.816017389297485, "step": 819} +{"train_info/time_between_train_steps": 0.007085561752319336, "step": 819} +{"info/global_step": 820, "train_info/time_within_train_step": 27.81762933731079, "step": 820} +{"train_info/time_between_train_steps": 0.0057027339935302734, "step": 820} +{"info/global_step": 821, "train_info/time_within_train_step": 27.81919503211975, "step": 821} +{"train_info/time_between_train_steps": 0.005158901214599609, "step": 821} +{"info/global_step": 822, "train_info/time_within_train_step": 27.67662239074707, "step": 822} +{"train_info/time_between_train_steps": 0.005769491195678711, "step": 822} +{"info/global_step": 823, "train_info/time_within_train_step": 27.709397792816162, "step": 823} +{"train_info/time_between_train_steps": 0.006427288055419922, "step": 823} +{"info/global_step": 824, "train_info/time_within_train_step": 27.64100432395935, "step": 824} +{"train_info/time_between_train_steps": 0.005760908126831055, "step": 824} +{"info/global_step": 825, "train_info/time_within_train_step": 27.646275520324707, "step": 825} +{"train_info/time_between_train_steps": 0.015575408935546875, "step": 825} +{"info/global_step": 826, "train_info/time_within_train_step": 27.631701469421387, "step": 826} +{"train_info/time_between_train_steps": 0.005713939666748047, "step": 826} +{"info/global_step": 827, "train_info/time_within_train_step": 27.708857536315918, "step": 827} +{"train_info/time_between_train_steps": 0.005609750747680664, "step": 827} +{"info/global_step": 828, "train_info/time_within_train_step": 27.62601923942566, "step": 828} +{"train_info/time_between_train_steps": 0.010581731796264648, "step": 828} +{"info/global_step": 829, "train_info/time_within_train_step": 27.653817653656006, "step": 829} +{"train_info/time_between_train_steps": 0.005776166915893555, "step": 829} +{"info/global_step": 830, "train_info/time_within_train_step": 27.670743703842163, "step": 830} +{"train_info/time_between_train_steps": 0.005165815353393555, "step": 830} +{"info/global_step": 831, "train_info/time_within_train_step": 27.674190998077393, "step": 831} +{"train_info/time_between_train_steps": 0.0052416324615478516, "step": 831} +{"info/global_step": 832, "train_info/time_within_train_step": 27.657660484313965, "step": 832} +{"train_info/time_between_train_steps": 0.0053958892822265625, "step": 832} +{"info/global_step": 833, "train_info/time_within_train_step": 27.697426557540894, "step": 833} +{"train_info/time_between_train_steps": 0.005934715270996094, "step": 833} +{"info/global_step": 834, "train_info/time_within_train_step": 27.62494921684265, "step": 834} +{"train_info/time_between_train_steps": 0.0056629180908203125, "step": 834} +{"info/global_step": 835, "train_info/time_within_train_step": 27.77689003944397, "step": 835} +{"train_info/time_between_train_steps": 0.005703926086425781, "step": 835} +{"info/global_step": 836, "train_info/time_within_train_step": 27.690714359283447, "step": 836} +{"train_info/time_between_train_steps": 0.008344650268554688, "step": 836} +{"info/global_step": 837, "train_info/time_within_train_step": 27.69820785522461, "step": 837} +{"train_info/time_between_train_steps": 0.014090299606323242, "step": 837} +{"info/global_step": 838, "train_info/time_within_train_step": 27.7501003742218, "step": 838} +{"train_info/time_between_train_steps": 0.00616908073425293, "step": 838} +{"info/global_step": 839, "train_info/time_within_train_step": 27.767473936080933, "step": 839} +{"train_info/time_between_train_steps": 0.011206388473510742, "step": 839} +{"info/global_step": 840, "train_info/time_within_train_step": 27.826356410980225, "step": 840} +{"train_info/time_between_train_steps": 0.006383180618286133, "step": 840} +{"train_info/time_between_train_steps": 13.538064241409302, "step": 840} +{"info/global_step": 841, "train_info/time_within_train_step": 27.62559938430786, "step": 841} +{"train_info/time_between_train_steps": 0.005848407745361328, "step": 841} +{"info/global_step": 842, "train_info/time_within_train_step": 27.875704050064087, "step": 842} +{"train_info/time_between_train_steps": 0.01061248779296875, "step": 842} +{"info/global_step": 843, "train_info/time_within_train_step": 27.665048122406006, "step": 843} +{"train_info/time_between_train_steps": 0.005866050720214844, "step": 843} +{"info/global_step": 844, "train_info/time_within_train_step": 27.76052713394165, "step": 844} +{"train_info/time_between_train_steps": 0.006094932556152344, "step": 844} +{"info/global_step": 845, "train_info/time_within_train_step": 27.67101740837097, "step": 845} +{"train_info/time_between_train_steps": 0.005758047103881836, "step": 845} +{"info/global_step": 846, "train_info/time_within_train_step": 27.810913801193237, "step": 846} +{"train_info/time_between_train_steps": 0.0059490203857421875, "step": 846} +{"info/global_step": 847, "train_info/time_within_train_step": 27.655303955078125, "step": 847} +{"train_info/time_between_train_steps": 0.0060651302337646484, "step": 847} +{"info/global_step": 848, "train_info/time_within_train_step": 27.88160753250122, "step": 848} +{"train_info/time_between_train_steps": 0.00581669807434082, "step": 848} +{"info/global_step": 849, "train_info/time_within_train_step": 27.611141681671143, "step": 849} +{"train_info/time_between_train_steps": 0.0056514739990234375, "step": 849} +{"info/global_step": 850, "train_info/time_within_train_step": 27.85739231109619, "step": 850} +{"train_info": {"train_info/memory_allocated": 2082.109375, "train_info/memory_max_allocated": 21965.9951171875, "train_info/memory_reserved": 24598.0, "train_info/memory_max_reserved": 24598.0, "_timestamp": 1733982583, "_runtime": 25968}, "step": 850} +{"logs": {"train/loss": 3.2586, "train/learning_rate": 0.00019444444444444443, "train/epoch": 28.01, "_timestamp": 1733982583, "_runtime": 25968}, "step": 850} +{"train_info/time_between_train_steps": 0.007708549499511719, "step": 850} +{"info/global_step": 851, "train_info/time_within_train_step": 27.718570470809937, "step": 851} +{"train_info/time_between_train_steps": 0.010741949081420898, "step": 851} +{"info/global_step": 852, "train_info/time_within_train_step": 27.597532987594604, "step": 852} +{"train_info/time_between_train_steps": 0.005743265151977539, "step": 852} +{"info/global_step": 853, "train_info/time_within_train_step": 27.61179280281067, "step": 853} +{"train_info/time_between_train_steps": 0.005353450775146484, "step": 853} +{"info/global_step": 854, "train_info/time_within_train_step": 27.671139001846313, "step": 854} +{"train_info/time_between_train_steps": 0.005707502365112305, "step": 854} +{"info/global_step": 855, "train_info/time_within_train_step": 27.681315183639526, "step": 855} +{"train_info/time_between_train_steps": 0.005917787551879883, "step": 855} +{"info/global_step": 856, "train_info/time_within_train_step": 27.684030294418335, "step": 856} +{"train_info/time_between_train_steps": 0.005437135696411133, "step": 856} +{"info/global_step": 857, "train_info/time_within_train_step": 27.671525239944458, "step": 857} +{"train_info/time_between_train_steps": 0.005576133728027344, "step": 857} +{"info/global_step": 858, "train_info/time_within_train_step": 27.634629726409912, "step": 858} +{"train_info/time_between_train_steps": 0.005960941314697266, "step": 858} +{"info/global_step": 859, "train_info/time_within_train_step": 27.733899354934692, "step": 859} +{"train_info/time_between_train_steps": 0.00567626953125, "step": 859} +{"info/global_step": 860, "train_info/time_within_train_step": 27.688504219055176, "step": 860} +{"train_info/time_between_train_steps": 0.010727405548095703, "step": 860} +{"info/global_step": 861, "train_info/time_within_train_step": 27.677213668823242, "step": 861} +{"train_info/time_between_train_steps": 0.005843400955200195, "step": 861} +{"info/global_step": 862, "train_info/time_within_train_step": 27.59881591796875, "step": 862} +{"train_info/time_between_train_steps": 0.005678653717041016, "step": 862} +{"info/global_step": 863, "train_info/time_within_train_step": 27.64249038696289, "step": 863} +{"train_info/time_between_train_steps": 0.005686283111572266, "step": 863} +{"info/global_step": 864, "train_info/time_within_train_step": 27.71365761756897, "step": 864} +{"train_info/time_between_train_steps": 0.006322383880615234, "step": 864} +{"info/global_step": 865, "train_info/time_within_train_step": 27.92376446723938, "step": 865} +{"train_info/time_between_train_steps": 0.009647130966186523, "step": 865} +{"info/global_step": 866, "train_info/time_within_train_step": 27.692285537719727, "step": 866} +{"train_info/time_between_train_steps": 0.009467124938964844, "step": 866} +{"info/global_step": 867, "train_info/time_within_train_step": 27.74914288520813, "step": 867} +{"train_info/time_between_train_steps": 0.01268625259399414, "step": 867} +{"info/global_step": 868, "train_info/time_within_train_step": 27.783721446990967, "step": 868} +{"train_info/time_between_train_steps": 0.005659580230712891, "step": 868} +{"info/global_step": 869, "train_info/time_within_train_step": 27.7075457572937, "step": 869} +{"train_info/time_between_train_steps": 0.006285905838012695, "step": 869} +{"info/global_step": 870, "train_info/time_within_train_step": 27.708760023117065, "step": 870} +{"train_info/time_between_train_steps": 0.0064699649810791016, "step": 870} +{"train_info/time_between_train_steps": 13.205479621887207, "step": 870} +{"info/global_step": 871, "train_info/time_within_train_step": 27.66756319999695, "step": 871} +{"train_info/time_between_train_steps": 0.011016130447387695, "step": 871} +{"info/global_step": 872, "train_info/time_within_train_step": 27.80808687210083, "step": 872} +{"train_info/time_between_train_steps": 0.005182743072509766, "step": 872} +{"info/global_step": 873, "train_info/time_within_train_step": 27.703675270080566, "step": 873} +{"train_info/time_between_train_steps": 0.0060312747955322266, "step": 873} +{"info/global_step": 874, "train_info/time_within_train_step": 27.831284761428833, "step": 874} +{"train_info/time_between_train_steps": 0.006026744842529297, "step": 874} +{"info/global_step": 875, "train_info/time_within_train_step": 27.668376922607422, "step": 875} +{"train_info/time_between_train_steps": 0.006039142608642578, "step": 875} +{"info/global_step": 876, "train_info/time_within_train_step": 27.753189086914062, "step": 876} +{"train_info/time_between_train_steps": 0.0053937435150146484, "step": 876} +{"info/global_step": 877, "train_info/time_within_train_step": 27.704580783843994, "step": 877} +{"train_info/time_between_train_steps": 0.005661725997924805, "step": 877} +{"info/global_step": 878, "train_info/time_within_train_step": 27.776188373565674, "step": 878} +{"train_info/time_between_train_steps": 0.005625724792480469, "step": 878} +{"info/global_step": 879, "train_info/time_within_train_step": 27.693099975585938, "step": 879} +{"train_info/time_between_train_steps": 0.006074666976928711, "step": 879} +{"info/global_step": 880, "train_info/time_within_train_step": 27.814544677734375, "step": 880} +{"train_info/time_between_train_steps": 0.005965232849121094, "step": 880} +{"info/global_step": 881, "train_info/time_within_train_step": 27.83345937728882, "step": 881} +{"train_info/time_between_train_steps": 0.005776882171630859, "step": 881} +{"info/global_step": 882, "train_info/time_within_train_step": 27.823894739151, "step": 882} +{"train_info/time_between_train_steps": 0.010229825973510742, "step": 882} +{"info/global_step": 883, "train_info/time_within_train_step": 27.6579270362854, "step": 883} +{"train_info/time_between_train_steps": 0.005676984786987305, "step": 883} +{"info/global_step": 884, "train_info/time_within_train_step": 27.63091492652893, "step": 884} +{"train_info/time_between_train_steps": 0.0058460235595703125, "step": 884} +{"info/global_step": 885, "train_info/time_within_train_step": 27.626373291015625, "step": 885} +{"train_info/time_between_train_steps": 0.01099538803100586, "step": 885} +{"info/global_step": 886, "train_info/time_within_train_step": 27.720350742340088, "step": 886} +{"train_info/time_between_train_steps": 0.009690284729003906, "step": 886} +{"info/global_step": 887, "train_info/time_within_train_step": 27.627918004989624, "step": 887} +{"train_info/time_between_train_steps": 0.0059986114501953125, "step": 887} +{"info/global_step": 888, "train_info/time_within_train_step": 27.67137622833252, "step": 888} +{"train_info/time_between_train_steps": 0.0056765079498291016, "step": 888} +{"info/global_step": 889, "train_info/time_within_train_step": 27.618184566497803, "step": 889} +{"train_info/time_between_train_steps": 0.005201101303100586, "step": 889} +{"info/global_step": 890, "train_info/time_within_train_step": 27.75932550430298, "step": 890} +{"train_info/time_between_train_steps": 0.005738973617553711, "step": 890} +{"info/global_step": 891, "train_info/time_within_train_step": 27.746987104415894, "step": 891} +{"train_info/time_between_train_steps": 0.008961200714111328, "step": 891} +{"info/global_step": 892, "train_info/time_within_train_step": 27.6553795337677, "step": 892} +{"train_info/time_between_train_steps": 0.005747795104980469, "step": 892} +{"info/global_step": 893, "train_info/time_within_train_step": 27.599791049957275, "step": 893} +{"train_info/time_between_train_steps": 0.0052793025970458984, "step": 893} +{"info/global_step": 894, "train_info/time_within_train_step": 27.675915002822876, "step": 894} +{"train_info/time_between_train_steps": 0.010496377944946289, "step": 894} +{"info/global_step": 895, "train_info/time_within_train_step": 27.67742657661438, "step": 895} +{"train_info/time_between_train_steps": 0.005518436431884766, "step": 895} +{"info/global_step": 896, "train_info/time_within_train_step": 27.79856514930725, "step": 896} +{"train_info/time_between_train_steps": 0.0052530765533447266, "step": 896} +{"info/global_step": 897, "train_info/time_within_train_step": 27.760733366012573, "step": 897} +{"train_info/time_between_train_steps": 0.006810188293457031, "step": 897} +{"info/global_step": 898, "train_info/time_within_train_step": 27.643564701080322, "step": 898} +{"train_info/time_between_train_steps": 0.00606083869934082, "step": 898} +{"info/global_step": 899, "train_info/time_within_train_step": 27.691232681274414, "step": 899} +{"train_info/time_between_train_steps": 0.006108522415161133, "step": 899} +{"info/global_step": 900, "train_info/time_within_train_step": 27.708823442459106, "step": 900} +{"train_info": {"train_info/memory_allocated": 2082.109375, "train_info/memory_max_allocated": 21965.9951171875, "train_info/memory_reserved": 24598.0, "train_info/memory_max_reserved": 24598.0, "_timestamp": 1733983982, "_runtime": 27367}, "step": 900} +{"logs": {"train/loss": 3.179, "train/learning_rate": 0.00016666666666666666, "train/epoch": 29.02, "_timestamp": 1733983982, "_runtime": 27367}, "step": 900} +{"train_info/time_between_train_steps": 2.660590171813965, "step": 900} +{"train_info/time_between_train_steps": 15.8445565700531, "step": 900} +{"info/global_step": 901, "train_info/time_within_train_step": 27.73105478286743, "step": 901} +{"train_info/time_between_train_steps": 0.00522160530090332, "step": 901} +{"info/global_step": 902, "train_info/time_within_train_step": 27.95388674736023, "step": 902} +{"train_info/time_between_train_steps": 0.0058324337005615234, "step": 902} +{"info/global_step": 903, "train_info/time_within_train_step": 27.662715435028076, "step": 903} +{"train_info/time_between_train_steps": 0.009663820266723633, "step": 903} +{"info/global_step": 904, "train_info/time_within_train_step": 27.758185148239136, "step": 904} +{"train_info/time_between_train_steps": 0.06076955795288086, "step": 904} +{"info/global_step": 905, "train_info/time_within_train_step": 27.61523675918579, "step": 905} +{"train_info/time_between_train_steps": 0.011188030242919922, "step": 905} +{"info/global_step": 906, "train_info/time_within_train_step": 27.862079858779907, "step": 906} +{"train_info/time_between_train_steps": 0.005752086639404297, "step": 906} +{"info/global_step": 907, "train_info/time_within_train_step": 27.670915365219116, "step": 907} +{"train_info/time_between_train_steps": 0.005853891372680664, "step": 907} +{"info/global_step": 908, "train_info/time_within_train_step": 27.88821506500244, "step": 908} +{"train_info/time_between_train_steps": 0.005722522735595703, "step": 908} +{"info/global_step": 909, "train_info/time_within_train_step": 27.618356227874756, "step": 909} +{"train_info/time_between_train_steps": 0.005611419677734375, "step": 909} +{"info/global_step": 910, "train_info/time_within_train_step": 27.70417809486389, "step": 910} +{"train_info/time_between_train_steps": 0.005715131759643555, "step": 910} +{"info/global_step": 911, "train_info/time_within_train_step": 27.63526701927185, "step": 911} +{"train_info/time_between_train_steps": 0.005567312240600586, "step": 911} +{"info/global_step": 912, "train_info/time_within_train_step": 27.695374488830566, "step": 912} +{"train_info/time_between_train_steps": 0.005766630172729492, "step": 912} +{"info/global_step": 913, "train_info/time_within_train_step": 27.644776582717896, "step": 913} +{"train_info/time_between_train_steps": 0.0055866241455078125, "step": 913} +{"info/global_step": 914, "train_info/time_within_train_step": 27.66183042526245, "step": 914} +{"train_info/time_between_train_steps": 0.005616426467895508, "step": 914} +{"info/global_step": 915, "train_info/time_within_train_step": 27.749658584594727, "step": 915} +{"train_info/time_between_train_steps": 0.005978584289550781, "step": 915} +{"info/global_step": 916, "train_info/time_within_train_step": 27.70905613899231, "step": 916} +{"train_info/time_between_train_steps": 0.005859851837158203, "step": 916} +{"info/global_step": 917, "train_info/time_within_train_step": 27.64148712158203, "step": 917} +{"train_info/time_between_train_steps": 0.009392261505126953, "step": 917} +{"info/global_step": 918, "train_info/time_within_train_step": 27.602000951766968, "step": 918} +{"train_info/time_between_train_steps": 0.005628108978271484, "step": 918} +{"info/global_step": 919, "train_info/time_within_train_step": 27.621498107910156, "step": 919} +{"train_info/time_between_train_steps": 0.005420207977294922, "step": 919} +{"info/global_step": 920, "train_info/time_within_train_step": 27.599782705307007, "step": 920} +{"train_info/time_between_train_steps": 0.0056912899017333984, "step": 920} +{"info/global_step": 921, "train_info/time_within_train_step": 27.73608946800232, "step": 921} +{"train_info/time_between_train_steps": 0.015220165252685547, "step": 921} +{"info/global_step": 922, "train_info/time_within_train_step": 27.69664716720581, "step": 922} +{"train_info/time_between_train_steps": 0.010313272476196289, "step": 922} +{"info/global_step": 923, "train_info/time_within_train_step": 27.703861951828003, "step": 923} +{"train_info/time_between_train_steps": 0.006063699722290039, "step": 923} +{"info/global_step": 924, "train_info/time_within_train_step": 27.68170666694641, "step": 924} +{"train_info/time_between_train_steps": 0.009485483169555664, "step": 924} +{"info/global_step": 925, "train_info/time_within_train_step": 27.630741596221924, "step": 925} +{"train_info/time_between_train_steps": 0.006281852722167969, "step": 925} +{"info/global_step": 926, "train_info/time_within_train_step": 27.661811351776123, "step": 926} +{"train_info/time_between_train_steps": 0.005719184875488281, "step": 926} +{"info/global_step": 927, "train_info/time_within_train_step": 27.81045365333557, "step": 927} +{"train_info/time_between_train_steps": 0.005533933639526367, "step": 927} +{"info/global_step": 928, "train_info/time_within_train_step": 27.649951934814453, "step": 928} +{"train_info/time_between_train_steps": 0.006158351898193359, "step": 928} +{"info/global_step": 929, "train_info/time_within_train_step": 27.706718921661377, "step": 929} +{"train_info/time_between_train_steps": 0.010410785675048828, "step": 929} +{"info/global_step": 930, "train_info/time_within_train_step": 27.703025102615356, "step": 930} +{"train_info/time_between_train_steps": 0.011952877044677734, "step": 930} +{"train_info/time_between_train_steps": 13.486564874649048, "step": 930} +{"info/global_step": 931, "train_info/time_within_train_step": 27.716692447662354, "step": 931} +{"train_info/time_between_train_steps": 0.0052032470703125, "step": 931} +{"info/global_step": 932, "train_info/time_within_train_step": 27.831042289733887, "step": 932} +{"train_info/time_between_train_steps": 0.005364179611206055, "step": 932} +{"info/global_step": 933, "train_info/time_within_train_step": 27.72288990020752, "step": 933} +{"train_info/time_between_train_steps": 0.006115913391113281, "step": 933} +{"info/global_step": 934, "train_info/time_within_train_step": 27.89282774925232, "step": 934} +{"train_info/time_between_train_steps": 0.005473136901855469, "step": 934} +{"info/global_step": 935, "train_info/time_within_train_step": 27.79576563835144, "step": 935} +{"train_info/time_between_train_steps": 0.00571441650390625, "step": 935} +{"info/global_step": 936, "train_info/time_within_train_step": 27.78354001045227, "step": 936} +{"train_info/time_between_train_steps": 0.005900382995605469, "step": 936} +{"info/global_step": 937, "train_info/time_within_train_step": 27.661318063735962, "step": 937} +{"train_info/time_between_train_steps": 0.010620832443237305, "step": 937} +{"info/global_step": 938, "train_info/time_within_train_step": 27.807271242141724, "step": 938} +{"train_info/time_between_train_steps": 0.00588226318359375, "step": 938} +{"info/global_step": 939, "train_info/time_within_train_step": 27.711512804031372, "step": 939} +{"train_info/time_between_train_steps": 0.005963325500488281, "step": 939} +{"info/global_step": 940, "train_info/time_within_train_step": 27.800089836120605, "step": 940} +{"train_info/time_between_train_steps": 0.005915164947509766, "step": 940} +{"info/global_step": 941, "train_info/time_within_train_step": 27.782894372940063, "step": 941} +{"train_info/time_between_train_steps": 0.005295991897583008, "step": 941} +{"info/global_step": 942, "train_info/time_within_train_step": 27.772783279418945, "step": 942} +{"train_info/time_between_train_steps": 0.005496025085449219, "step": 942} +{"info/global_step": 943, "train_info/time_within_train_step": 27.628540754318237, "step": 943} +{"train_info/time_between_train_steps": 0.005318403244018555, "step": 943} +{"info/global_step": 944, "train_info/time_within_train_step": 27.711432218551636, "step": 944} +{"train_info/time_between_train_steps": 0.0063629150390625, "step": 944} +{"info/global_step": 945, "train_info/time_within_train_step": 27.69985318183899, "step": 945} +{"train_info/time_between_train_steps": 0.014289617538452148, "step": 945} +{"info/global_step": 946, "train_info/time_within_train_step": 27.62814235687256, "step": 946} +{"train_info/time_between_train_steps": 0.005244016647338867, "step": 946} +{"info/global_step": 947, "train_info/time_within_train_step": 27.63744854927063, "step": 947} +{"train_info/time_between_train_steps": 0.005648136138916016, "step": 947} +{"info/global_step": 948, "train_info/time_within_train_step": 27.630499601364136, "step": 948} +{"train_info/time_between_train_steps": 0.0058057308197021484, "step": 948} +{"info/global_step": 949, "train_info/time_within_train_step": 27.80899977684021, "step": 949} +{"train_info/time_between_train_steps": 0.016402482986450195, "step": 949} +{"info/global_step": 950, "train_info/time_within_train_step": 27.714191436767578, "step": 950} +{"train_info": {"train_info/memory_allocated": 2082.109375, "train_info/memory_max_allocated": 21965.9951171875, "train_info/memory_reserved": 24598.0, "train_info/memory_max_reserved": 24598.0, "_timestamp": 1733985398, "_runtime": 28783}, "step": 950} +{"logs": {"train/loss": 3.1549, "train/learning_rate": 0.0001388888888888889, "train/epoch": 31.02, "_timestamp": 1733985398, "_runtime": 28783}, "step": 950} +{"train_info/time_between_train_steps": 0.007576942443847656, "step": 950} +{"info/global_step": 951, "train_info/time_within_train_step": 27.781614303588867, "step": 951} +{"train_info/time_between_train_steps": 0.01015615463256836, "step": 951} +{"info/global_step": 952, "train_info/time_within_train_step": 27.73526954650879, "step": 952} +{"train_info/time_between_train_steps": 0.005347490310668945, "step": 952} +{"info/global_step": 953, "train_info/time_within_train_step": 27.65146231651306, "step": 953} +{"train_info/time_between_train_steps": 0.0056345462799072266, "step": 953} +{"info/global_step": 954, "train_info/time_within_train_step": 27.72733211517334, "step": 954} +{"train_info/time_between_train_steps": 0.00553131103515625, "step": 954} +{"info/global_step": 955, "train_info/time_within_train_step": 27.647069692611694, "step": 955} +{"train_info/time_between_train_steps": 0.010740041732788086, "step": 955} +{"info/global_step": 956, "train_info/time_within_train_step": 27.758431911468506, "step": 956} +{"train_info/time_between_train_steps": 0.01036977767944336, "step": 956} +{"info/global_step": 957, "train_info/time_within_train_step": 27.706799268722534, "step": 957} +{"train_info/time_between_train_steps": 0.01078939437866211, "step": 957} +{"info/global_step": 958, "train_info/time_within_train_step": 27.852243661880493, "step": 958} +{"train_info/time_between_train_steps": 0.005906581878662109, "step": 958} +{"info/global_step": 959, "train_info/time_within_train_step": 27.6240816116333, "step": 959} +{"train_info/time_between_train_steps": 0.006083965301513672, "step": 959} +{"info/global_step": 960, "train_info/time_within_train_step": 27.627175331115723, "step": 960} +{"train_info/time_between_train_steps": 0.0068874359130859375, "step": 960} +{"train_info/time_between_train_steps": 13.635254621505737, "step": 960} +{"info/global_step": 961, "train_info/time_within_train_step": 27.63496208190918, "step": 961} +{"train_info/time_between_train_steps": 0.0053043365478515625, "step": 961} +{"info/global_step": 962, "train_info/time_within_train_step": 27.721856832504272, "step": 962} +{"train_info/time_between_train_steps": 0.005639076232910156, "step": 962} +{"info/global_step": 963, "train_info/time_within_train_step": 27.63774800300598, "step": 963} +{"train_info/time_between_train_steps": 0.005252361297607422, "step": 963} +{"info/global_step": 964, "train_info/time_within_train_step": 27.818084716796875, "step": 964} +{"train_info/time_between_train_steps": 0.005486249923706055, "step": 964} +{"info/global_step": 965, "train_info/time_within_train_step": 27.741613149642944, "step": 965} +{"train_info/time_between_train_steps": 0.005602598190307617, "step": 965} +{"info/global_step": 966, "train_info/time_within_train_step": 27.754581451416016, "step": 966} +{"train_info/time_between_train_steps": 0.005858659744262695, "step": 966} +{"info/global_step": 967, "train_info/time_within_train_step": 27.67779278755188, "step": 967} +{"train_info/time_between_train_steps": 0.005708932876586914, "step": 967} +{"info/global_step": 968, "train_info/time_within_train_step": 27.900858640670776, "step": 968} +{"train_info/time_between_train_steps": 0.0058743953704833984, "step": 968} +{"info/global_step": 969, "train_info/time_within_train_step": 27.777782201766968, "step": 969} +{"train_info/time_between_train_steps": 0.006230354309082031, "step": 969} +{"info/global_step": 970, "train_info/time_within_train_step": 27.85850691795349, "step": 970} +{"train_info/time_between_train_steps": 0.005839347839355469, "step": 970} +{"info/global_step": 971, "train_info/time_within_train_step": 27.640284776687622, "step": 971} +{"train_info/time_between_train_steps": 0.0057146549224853516, "step": 971} +{"info/global_step": 972, "train_info/time_within_train_step": 27.73103356361389, "step": 972} +{"train_info/time_between_train_steps": 0.0058362483978271484, "step": 972} +{"info/global_step": 973, "train_info/time_within_train_step": 27.814488172531128, "step": 973} +{"train_info/time_between_train_steps": 0.010936498641967773, "step": 973} +{"info/global_step": 974, "train_info/time_within_train_step": 27.673176050186157, "step": 974} +{"train_info/time_between_train_steps": 0.005655050277709961, "step": 974} +{"info/global_step": 975, "train_info/time_within_train_step": 27.595670700073242, "step": 975} +{"train_info/time_between_train_steps": 0.005731105804443359, "step": 975} +{"info/global_step": 976, "train_info/time_within_train_step": 27.633810997009277, "step": 976} +{"train_info/time_between_train_steps": 0.005312681198120117, "step": 976} +{"info/global_step": 977, "train_info/time_within_train_step": 27.674505710601807, "step": 977} +{"train_info/time_between_train_steps": 0.005585908889770508, "step": 977} +{"info/global_step": 978, "train_info/time_within_train_step": 27.639204263687134, "step": 978} +{"train_info/time_between_train_steps": 0.010927677154541016, "step": 978} +{"info/global_step": 979, "train_info/time_within_train_step": 27.669069528579712, "step": 979} +{"train_info/time_between_train_steps": 0.0059278011322021484, "step": 979} +{"info/global_step": 980, "train_info/time_within_train_step": 27.67775011062622, "step": 980} +{"train_info/time_between_train_steps": 0.0056972503662109375, "step": 980} +{"info/global_step": 981, "train_info/time_within_train_step": 27.678637266159058, "step": 981} +{"train_info/time_between_train_steps": 0.01063227653503418, "step": 981} +{"info/global_step": 982, "train_info/time_within_train_step": 27.688905239105225, "step": 982} +{"train_info/time_between_train_steps": 0.011188507080078125, "step": 982} +{"info/global_step": 983, "train_info/time_within_train_step": 27.731500387191772, "step": 983} +{"train_info/time_between_train_steps": 0.005640745162963867, "step": 983} +{"info/global_step": 984, "train_info/time_within_train_step": 27.648539543151855, "step": 984} +{"train_info/time_between_train_steps": 0.0060198307037353516, "step": 984} +{"info/global_step": 985, "train_info/time_within_train_step": 27.691577434539795, "step": 985} +{"train_info/time_between_train_steps": 0.012291431427001953, "step": 985} +{"info/global_step": 986, "train_info/time_within_train_step": 27.687376737594604, "step": 986} +{"train_info/time_between_train_steps": 0.005702495574951172, "step": 986} +{"info/global_step": 987, "train_info/time_within_train_step": 27.69130825996399, "step": 987} +{"train_info/time_between_train_steps": 0.005896806716918945, "step": 987} +{"info/global_step": 988, "train_info/time_within_train_step": 27.645320653915405, "step": 988} +{"train_info/time_between_train_steps": 0.006063222885131836, "step": 988} +{"info/global_step": 989, "train_info/time_within_train_step": 27.763041973114014, "step": 989} +{"train_info/time_between_train_steps": 0.005593776702880859, "step": 989} +{"info/global_step": 990, "train_info/time_within_train_step": 27.675376415252686, "step": 990} +{"train_info/time_between_train_steps": 0.011799097061157227, "step": 990} +{"train_info/time_between_train_steps": 13.140018463134766, "step": 990} +{"info/global_step": 991, "train_info/time_within_train_step": 27.66829490661621, "step": 991} +{"train_info/time_between_train_steps": 0.005905628204345703, "step": 991} +{"info/global_step": 992, "train_info/time_within_train_step": 27.80946135520935, "step": 992} +{"train_info/time_between_train_steps": 0.01124262809753418, "step": 992} +{"info/global_step": 993, "train_info/time_within_train_step": 27.67364239692688, "step": 993} +{"train_info/time_between_train_steps": 0.010602712631225586, "step": 993} +{"info/global_step": 994, "train_info/time_within_train_step": 27.832902908325195, "step": 994} +{"train_info/time_between_train_steps": 0.005676746368408203, "step": 994} +{"info/global_step": 995, "train_info/time_within_train_step": 27.621479511260986, "step": 995} +{"train_info/time_between_train_steps": 0.005772829055786133, "step": 995} +{"info/global_step": 996, "train_info/time_within_train_step": 27.885953664779663, "step": 996} +{"train_info/time_between_train_steps": 0.005971431732177734, "step": 996} +{"info/global_step": 997, "train_info/time_within_train_step": 27.65652370452881, "step": 997} +{"train_info/time_between_train_steps": 0.006066322326660156, "step": 997} +{"info/global_step": 998, "train_info/time_within_train_step": 27.779054880142212, "step": 998} +{"train_info/time_between_train_steps": 0.01052403450012207, "step": 998} +{"info/global_step": 999, "train_info/time_within_train_step": 27.724469423294067, "step": 999} +{"train_info/time_between_train_steps": 0.010540962219238281, "step": 999} +{"info/global_step": 1000, "train_info/time_within_train_step": 27.750713348388672, "step": 1000} +{"train_info": {"train_info/memory_allocated": 2082.109375, "train_info/memory_max_allocated": 21965.9951171875, "train_info/memory_reserved": 24598.0, "train_info/memory_max_reserved": 24598.0, "_timestamp": 1733986811, "_runtime": 30196}, "step": 1000} +{"logs": {"train/loss": 3.109, "train/learning_rate": 0.00011111111111111109, "train/epoch": 33.01, "_timestamp": 1733986811, "_runtime": 30196}, "step": 1000} +{"train_info": {"train_info/memory_allocated": 2082.109375, "train_info/memory_max_allocated": 21965.9951171875, "train_info/memory_reserved": 28596.0, "train_info/memory_max_reserved": 28596.0, "_timestamp": 1733986813, "_runtime": 30198}, "step": 1000} +{"logs": {"eval/loss": 3.771193027496338, "eval/runtime": 2.3153, "eval/samples_per_second": 51.828, "eval/steps_per_second": 3.455, "train/epoch": 33.01, "_timestamp": 1733986813, "_runtime": 30198}, "step": 1000} +{"train_info": {"train_info/memory_allocated": 2082.109375, "train_info/memory_max_allocated": 21965.9951171875, "train_info/memory_reserved": 28596.0, "train_info/memory_max_reserved": 28596.0, "_timestamp": 1733986813, "_runtime": 30198}, "step": 1000} +{"logs": {"eval//local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_loss": 3.771193027496338, "eval//local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_ppl": 43.43184933001428, "eval//local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_runtime": 2.3153, "eval//local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_samples_per_second": 51.828, "train/epoch": 33.01, "_timestamp": 1733986813, "_runtime": 30198}, "step": 1000} +{"train_info/time_between_train_steps": 5.069730758666992, "step": 1000} +{"info/global_step": 1001, "train_info/time_within_train_step": 27.699512720108032, "step": 1001} +{"train_info/time_between_train_steps": 0.005894660949707031, "step": 1001} +{"info/global_step": 1002, "train_info/time_within_train_step": 27.604471683502197, "step": 1002} +{"train_info/time_between_train_steps": 0.005524635314941406, "step": 1002} +{"info/global_step": 1003, "train_info/time_within_train_step": 27.6469886302948, "step": 1003} +{"train_info/time_between_train_steps": 0.0055730342864990234, "step": 1003} +{"info/global_step": 1004, "train_info/time_within_train_step": 27.859611749649048, "step": 1004} +{"train_info/time_between_train_steps": 0.005871295928955078, "step": 1004} +{"info/global_step": 1005, "train_info/time_within_train_step": 27.700226068496704, "step": 1005} +{"train_info/time_between_train_steps": 0.0062596797943115234, "step": 1005} +{"info/global_step": 1006, "train_info/time_within_train_step": 27.685474634170532, "step": 1006} +{"train_info/time_between_train_steps": 0.00604557991027832, "step": 1006} +{"info/global_step": 1007, "train_info/time_within_train_step": 27.64798140525818, "step": 1007} +{"train_info/time_between_train_steps": 0.006103515625, "step": 1007} +{"info/global_step": 1008, "train_info/time_within_train_step": 27.663928985595703, "step": 1008} +{"train_info/time_between_train_steps": 0.005775928497314453, "step": 1008} +{"info/global_step": 1009, "train_info/time_within_train_step": 27.62166976928711, "step": 1009} +{"train_info/time_between_train_steps": 0.005658626556396484, "step": 1009} +{"info/global_step": 1010, "train_info/time_within_train_step": 27.618370294570923, "step": 1010} +{"train_info/time_between_train_steps": 0.0058746337890625, "step": 1010} +{"info/global_step": 1011, "train_info/time_within_train_step": 27.606687784194946, "step": 1011} +{"train_info/time_between_train_steps": 0.005725860595703125, "step": 1011} +{"info/global_step": 1012, "train_info/time_within_train_step": 27.665467977523804, "step": 1012} +{"train_info/time_between_train_steps": 0.010460376739501953, "step": 1012} +{"info/global_step": 1013, "train_info/time_within_train_step": 27.677388668060303, "step": 1013} +{"train_info/time_between_train_steps": 0.005822896957397461, "step": 1013} +{"info/global_step": 1014, "train_info/time_within_train_step": 27.644346952438354, "step": 1014} +{"train_info/time_between_train_steps": 0.005778312683105469, "step": 1014} +{"info/global_step": 1015, "train_info/time_within_train_step": 27.74756622314453, "step": 1015} +{"train_info/time_between_train_steps": 0.005916118621826172, "step": 1015} +{"info/global_step": 1016, "train_info/time_within_train_step": 27.63974905014038, "step": 1016} +{"train_info/time_between_train_steps": 0.005663633346557617, "step": 1016} +{"info/global_step": 1017, "train_info/time_within_train_step": 27.69257426261902, "step": 1017} +{"train_info/time_between_train_steps": 0.0066318511962890625, "step": 1017} +{"info/global_step": 1018, "train_info/time_within_train_step": 27.69157648086548, "step": 1018} +{"train_info/time_between_train_steps": 0.005999088287353516, "step": 1018} +{"info/global_step": 1019, "train_info/time_within_train_step": 27.689592123031616, "step": 1019} +{"train_info/time_between_train_steps": 0.011631965637207031, "step": 1019} +{"info/global_step": 1020, "train_info/time_within_train_step": 27.800703287124634, "step": 1020} +{"train_info/time_between_train_steps": 0.011857748031616211, "step": 1020} +{"train_info/time_between_train_steps": 13.370172262191772, "step": 1020} +{"info/global_step": 1021, "train_info/time_within_train_step": 27.629762649536133, "step": 1021} +{"train_info/time_between_train_steps": 0.005563020706176758, "step": 1021} +{"info/global_step": 1022, "train_info/time_within_train_step": 27.85153102874756, "step": 1022} +{"train_info/time_between_train_steps": 0.006085634231567383, "step": 1022} +{"info/global_step": 1023, "train_info/time_within_train_step": 27.849653244018555, "step": 1023} +{"train_info/time_between_train_steps": 0.005708932876586914, "step": 1023} +{"info/global_step": 1024, "train_info/time_within_train_step": 27.831469535827637, "step": 1024} +{"train_info/time_between_train_steps": 0.006080150604248047, "step": 1024} +{"info/global_step": 1025, "train_info/time_within_train_step": 27.668826818466187, "step": 1025} +{"train_info/time_between_train_steps": 0.00648808479309082, "step": 1025} +{"info/global_step": 1026, "train_info/time_within_train_step": 27.835281133651733, "step": 1026} +{"train_info/time_between_train_steps": 0.005782127380371094, "step": 1026} +{"info/global_step": 1027, "train_info/time_within_train_step": 27.702819347381592, "step": 1027} +{"train_info/time_between_train_steps": 0.0059795379638671875, "step": 1027} +{"info/global_step": 1028, "train_info/time_within_train_step": 27.90062975883484, "step": 1028} +{"train_info/time_between_train_steps": 0.005995988845825195, "step": 1028} +{"info/global_step": 1029, "train_info/time_within_train_step": 27.8054678440094, "step": 1029} +{"train_info/time_between_train_steps": 0.0057697296142578125, "step": 1029} +{"info/global_step": 1030, "train_info/time_within_train_step": 27.73228168487549, "step": 1030} +{"train_info/time_between_train_steps": 0.0057146549224853516, "step": 1030} +{"info/global_step": 1031, "train_info/time_within_train_step": 27.694977045059204, "step": 1031} +{"train_info/time_between_train_steps": 0.008240461349487305, "step": 1031} +{"info/global_step": 1032, "train_info/time_within_train_step": 27.689348220825195, "step": 1032} +{"train_info/time_between_train_steps": 0.0057277679443359375, "step": 1032} +{"info/global_step": 1033, "train_info/time_within_train_step": 27.755364656448364, "step": 1033} +{"train_info/time_between_train_steps": 0.005169868469238281, "step": 1033} +{"info/global_step": 1034, "train_info/time_within_train_step": 27.67505121231079, "step": 1034} +{"train_info/time_between_train_steps": 0.005664825439453125, "step": 1034} +{"info/global_step": 1035, "train_info/time_within_train_step": 27.855881929397583, "step": 1035} +{"train_info/time_between_train_steps": 0.005771160125732422, "step": 1035} +{"info/global_step": 1036, "train_info/time_within_train_step": 27.71006441116333, "step": 1036} +{"train_info/time_between_train_steps": 0.005808591842651367, "step": 1036} +{"info/global_step": 1037, "train_info/time_within_train_step": 27.6796293258667, "step": 1037} +{"train_info/time_between_train_steps": 0.005964756011962891, "step": 1037} +{"info/global_step": 1038, "train_info/time_within_train_step": 27.73997163772583, "step": 1038} +{"train_info/time_between_train_steps": 0.005989789962768555, "step": 1038} +{"info/global_step": 1039, "train_info/time_within_train_step": 27.686118602752686, "step": 1039} +{"train_info/time_between_train_steps": 0.00570368766784668, "step": 1039} +{"info/global_step": 1040, "train_info/time_within_train_step": 27.738727807998657, "step": 1040} +{"train_info/time_between_train_steps": 0.00579524040222168, "step": 1040} +{"info/global_step": 1041, "train_info/time_within_train_step": 27.6304612159729, "step": 1041} +{"train_info/time_between_train_steps": 0.005736827850341797, "step": 1041} +{"info/global_step": 1042, "train_info/time_within_train_step": 27.69089937210083, "step": 1042} +{"train_info/time_between_train_steps": 0.009948253631591797, "step": 1042} +{"info/global_step": 1043, "train_info/time_within_train_step": 27.783607959747314, "step": 1043} +{"train_info/time_between_train_steps": 0.006125211715698242, "step": 1043} +{"info/global_step": 1044, "train_info/time_within_train_step": 27.610169172286987, "step": 1044} +{"train_info/time_between_train_steps": 0.005336284637451172, "step": 1044} +{"info/global_step": 1045, "train_info/time_within_train_step": 27.603317975997925, "step": 1045} +{"train_info/time_between_train_steps": 0.005897045135498047, "step": 1045} +{"info/global_step": 1046, "train_info/time_within_train_step": 27.646758317947388, "step": 1046} +{"train_info/time_between_train_steps": 0.005536079406738281, "step": 1046} +{"info/global_step": 1047, "train_info/time_within_train_step": 27.676657915115356, "step": 1047} +{"train_info/time_between_train_steps": 0.005621671676635742, "step": 1047} +{"info/global_step": 1048, "train_info/time_within_train_step": 27.771257877349854, "step": 1048} +{"train_info/time_between_train_steps": 0.014121770858764648, "step": 1048} +{"info/global_step": 1049, "train_info/time_within_train_step": 27.71352744102478, "step": 1049} +{"train_info/time_between_train_steps": 0.0059070587158203125, "step": 1049} +{"info/global_step": 1050, "train_info/time_within_train_step": 27.72890019416809, "step": 1050} +{"train_info": {"train_info/memory_allocated": 2082.109375, "train_info/memory_max_allocated": 21965.9951171875, "train_info/memory_reserved": 28596.0, "train_info/memory_max_reserved": 28596.0, "_timestamp": 1733988215, "_runtime": 31600}, "step": 1050} +{"logs": {"train/loss": 3.0475, "train/learning_rate": 8.333333333333333e-05, "train/epoch": 34.02, "_timestamp": 1733988215, "_runtime": 31600}, "step": 1050} +{"train_info/time_between_train_steps": 0.00863194465637207, "step": 1050} +{"train_info/time_between_train_steps": 13.75635814666748, "step": 1050} +{"info/global_step": 1051, "train_info/time_within_train_step": 27.74818468093872, "step": 1051} +{"train_info/time_between_train_steps": 0.006246805191040039, "step": 1051} +{"info/global_step": 1052, "train_info/time_within_train_step": 27.865265607833862, "step": 1052} +{"train_info/time_between_train_steps": 0.010149478912353516, "step": 1052} +{"info/global_step": 1053, "train_info/time_within_train_step": 27.678677558898926, "step": 1053} +{"train_info/time_between_train_steps": 0.005952358245849609, "step": 1053} +{"info/global_step": 1054, "train_info/time_within_train_step": 27.74115753173828, "step": 1054} +{"train_info/time_between_train_steps": 0.010132074356079102, "step": 1054} +{"info/global_step": 1055, "train_info/time_within_train_step": 27.690956830978394, "step": 1055} +{"train_info/time_between_train_steps": 0.010764360427856445, "step": 1055} +{"info/global_step": 1056, "train_info/time_within_train_step": 27.835668087005615, "step": 1056} +{"train_info/time_between_train_steps": 0.005857706069946289, "step": 1056} +{"info/global_step": 1057, "train_info/time_within_train_step": 27.654621601104736, "step": 1057} +{"train_info/time_between_train_steps": 0.00629425048828125, "step": 1057} +{"info/global_step": 1058, "train_info/time_within_train_step": 27.829525470733643, "step": 1058} +{"train_info/time_between_train_steps": 0.0056531429290771484, "step": 1058} +{"info/global_step": 1059, "train_info/time_within_train_step": 27.742645740509033, "step": 1059} +{"train_info/time_between_train_steps": 0.00589442253112793, "step": 1059} +{"info/global_step": 1060, "train_info/time_within_train_step": 27.800498962402344, "step": 1060} +{"train_info/time_between_train_steps": 0.00560450553894043, "step": 1060} +{"info/global_step": 1061, "train_info/time_within_train_step": 27.65975522994995, "step": 1061} +{"train_info/time_between_train_steps": 0.0057544708251953125, "step": 1061} +{"info/global_step": 1062, "train_info/time_within_train_step": 27.710456609725952, "step": 1062} +{"train_info/time_between_train_steps": 0.005543708801269531, "step": 1062} +{"info/global_step": 1063, "train_info/time_within_train_step": 27.72316861152649, "step": 1063} +{"train_info/time_between_train_steps": 0.010350942611694336, "step": 1063} +{"info/global_step": 1064, "train_info/time_within_train_step": 27.783321857452393, "step": 1064} +{"train_info/time_between_train_steps": 0.0058596134185791016, "step": 1064} +{"info/global_step": 1065, "train_info/time_within_train_step": 27.7919864654541, "step": 1065} +{"train_info/time_between_train_steps": 0.012386083602905273, "step": 1065} +{"info/global_step": 1066, "train_info/time_within_train_step": 27.71608543395996, "step": 1066} +{"train_info/time_between_train_steps": 0.005591869354248047, "step": 1066} +{"info/global_step": 1067, "train_info/time_within_train_step": 27.60114598274231, "step": 1067} +{"train_info/time_between_train_steps": 0.005971431732177734, "step": 1067} +{"info/global_step": 1068, "train_info/time_within_train_step": 27.615582704544067, "step": 1068} +{"train_info/time_between_train_steps": 0.005549430847167969, "step": 1068} +{"info/global_step": 1069, "train_info/time_within_train_step": 27.664326667785645, "step": 1069} +{"train_info/time_between_train_steps": 0.005692243576049805, "step": 1069} +{"info/global_step": 1070, "train_info/time_within_train_step": 27.747320890426636, "step": 1070} +{"train_info/time_between_train_steps": 0.00543975830078125, "step": 1070} +{"info/global_step": 1071, "train_info/time_within_train_step": 27.7101571559906, "step": 1071} +{"train_info/time_between_train_steps": 0.005936861038208008, "step": 1071} +{"info/global_step": 1072, "train_info/time_within_train_step": 27.68065643310547, "step": 1072} +{"train_info/time_between_train_steps": 0.006018400192260742, "step": 1072} +{"info/global_step": 1073, "train_info/time_within_train_step": 27.693690061569214, "step": 1073} +{"train_info/time_between_train_steps": 0.011269569396972656, "step": 1073} +{"info/global_step": 1074, "train_info/time_within_train_step": 27.77188539505005, "step": 1074} +{"train_info/time_between_train_steps": 0.015792369842529297, "step": 1074} +{"info/global_step": 1075, "train_info/time_within_train_step": 27.67374563217163, "step": 1075} +{"train_info/time_between_train_steps": 0.005690336227416992, "step": 1075} +{"info/global_step": 1076, "train_info/time_within_train_step": 27.791025161743164, "step": 1076} +{"train_info/time_between_train_steps": 0.005976438522338867, "step": 1076} +{"info/global_step": 1077, "train_info/time_within_train_step": 27.678840398788452, "step": 1077} +{"train_info/time_between_train_steps": 0.005956888198852539, "step": 1077} +{"info/global_step": 1078, "train_info/time_within_train_step": 27.750925302505493, "step": 1078} +{"train_info/time_between_train_steps": 0.011492729187011719, "step": 1078} +{"info/global_step": 1079, "train_info/time_within_train_step": 27.649028062820435, "step": 1079} +{"train_info/time_between_train_steps": 0.006345510482788086, "step": 1079} +{"info/global_step": 1080, "train_info/time_within_train_step": 27.704766035079956, "step": 1080} +{"train_info/time_between_train_steps": 0.006574153900146484, "step": 1080} +{"train_info/time_between_train_steps": 13.202275514602661, "step": 1080} +{"info/global_step": 1081, "train_info/time_within_train_step": 27.854581356048584, "step": 1081} +{"train_info/time_between_train_steps": 0.005165815353393555, "step": 1081} +{"info/global_step": 1082, "train_info/time_within_train_step": 27.786242961883545, "step": 1082} +{"train_info/time_between_train_steps": 0.006355762481689453, "step": 1082} +{"info/global_step": 1083, "train_info/time_within_train_step": 27.64443850517273, "step": 1083} +{"train_info/time_between_train_steps": 0.0055959224700927734, "step": 1083} +{"info/global_step": 1084, "train_info/time_within_train_step": 27.830227613449097, "step": 1084} +{"train_info/time_between_train_steps": 0.005785942077636719, "step": 1084} +{"info/global_step": 1085, "train_info/time_within_train_step": 27.699605464935303, "step": 1085} +{"train_info/time_between_train_steps": 0.010434389114379883, "step": 1085} +{"info/global_step": 1086, "train_info/time_within_train_step": 27.760987281799316, "step": 1086} +{"train_info/time_between_train_steps": 0.0055637359619140625, "step": 1086} +{"info/global_step": 1087, "train_info/time_within_train_step": 27.60963010787964, "step": 1087} +{"train_info/time_between_train_steps": 0.0058786869049072266, "step": 1087} +{"info/global_step": 1088, "train_info/time_within_train_step": 27.755130290985107, "step": 1088} +{"train_info/time_between_train_steps": 0.0056858062744140625, "step": 1088} +{"info/global_step": 1089, "train_info/time_within_train_step": 27.67775797843933, "step": 1089} +{"train_info/time_between_train_steps": 0.005855560302734375, "step": 1089} +{"info/global_step": 1090, "train_info/time_within_train_step": 27.80557346343994, "step": 1090} +{"train_info/time_between_train_steps": 0.005780696868896484, "step": 1090} +{"info/global_step": 1091, "train_info/time_within_train_step": 27.712743997573853, "step": 1091} +{"train_info/time_between_train_steps": 0.009945154190063477, "step": 1091} +{"info/global_step": 1092, "train_info/time_within_train_step": 27.66876769065857, "step": 1092} +{"train_info/time_between_train_steps": 0.015380859375, "step": 1092} +{"info/global_step": 1093, "train_info/time_within_train_step": 27.73447608947754, "step": 1093} +{"train_info/time_between_train_steps": 0.005186319351196289, "step": 1093} +{"info/global_step": 1094, "train_info/time_within_train_step": 27.650252103805542, "step": 1094} +{"train_info/time_between_train_steps": 0.0053997039794921875, "step": 1094} +{"info/global_step": 1095, "train_info/time_within_train_step": 27.72205376625061, "step": 1095} +{"train_info/time_between_train_steps": 0.00582575798034668, "step": 1095} +{"info/global_step": 1096, "train_info/time_within_train_step": 27.69259524345398, "step": 1096} +{"train_info/time_between_train_steps": 0.0058345794677734375, "step": 1096} +{"info/global_step": 1097, "train_info/time_within_train_step": 27.899667263031006, "step": 1097} +{"train_info/time_between_train_steps": 0.009984254837036133, "step": 1097} +{"info/global_step": 1098, "train_info/time_within_train_step": 27.729508876800537, "step": 1098} +{"train_info/time_between_train_steps": 0.0056416988372802734, "step": 1098} +{"info/global_step": 1099, "train_info/time_within_train_step": 27.692224502563477, "step": 1099} +{"train_info/time_between_train_steps": 0.011142492294311523, "step": 1099} +{"info/global_step": 1100, "train_info/time_within_train_step": 27.704168558120728, "step": 1100} +{"train_info": {"train_info/memory_allocated": 2082.109375, "train_info/memory_max_allocated": 21965.9951171875, "train_info/memory_reserved": 28596.0, "train_info/memory_max_reserved": 28596.0, "_timestamp": 1733989629, "_runtime": 33014}, "step": 1100} +{"logs": {"train/loss": 3.0353, "train/learning_rate": 5.5555555555555545e-05, "train/epoch": 36.02, "_timestamp": 1733989629, "_runtime": 33014}, "step": 1100} +{"train_info/time_between_train_steps": 2.604116678237915, "step": 1100} +{"info/global_step": 1101, "train_info/time_within_train_step": 27.640926361083984, "step": 1101} +{"train_info/time_between_train_steps": 0.006031036376953125, "step": 1101} +{"info/global_step": 1102, "train_info/time_within_train_step": 27.628812074661255, "step": 1102} +{"train_info/time_between_train_steps": 0.00609588623046875, "step": 1102} +{"info/global_step": 1103, "train_info/time_within_train_step": 27.685484886169434, "step": 1103} +{"train_info/time_between_train_steps": 0.006142377853393555, "step": 1103} +{"info/global_step": 1104, "train_info/time_within_train_step": 27.7051203250885, "step": 1104} +{"train_info/time_between_train_steps": 0.005904674530029297, "step": 1104} +{"info/global_step": 1105, "train_info/time_within_train_step": 27.82415533065796, "step": 1105} +{"train_info/time_between_train_steps": 0.0059604644775390625, "step": 1105} +{"info/global_step": 1106, "train_info/time_within_train_step": 27.73003578186035, "step": 1106} +{"train_info/time_between_train_steps": 0.010421514511108398, "step": 1106} +{"info/global_step": 1107, "train_info/time_within_train_step": 27.683637857437134, "step": 1107} +{"train_info/time_between_train_steps": 0.005821704864501953, "step": 1107} +{"info/global_step": 1108, "train_info/time_within_train_step": 27.644197463989258, "step": 1108} +{"train_info/time_between_train_steps": 0.0064351558685302734, "step": 1108} +{"info/global_step": 1109, "train_info/time_within_train_step": 27.758477687835693, "step": 1109} +{"train_info/time_between_train_steps": 0.0061151981353759766, "step": 1109} +{"info/global_step": 1110, "train_info/time_within_train_step": 27.697195768356323, "step": 1110} +{"train_info/time_between_train_steps": 0.006868600845336914, "step": 1110} +{"train_info/time_between_train_steps": 13.467293977737427, "step": 1110} +{"info/global_step": 1111, "train_info/time_within_train_step": 27.63030743598938, "step": 1111} +{"train_info/time_between_train_steps": 0.005671977996826172, "step": 1111} +{"info/global_step": 1112, "train_info/time_within_train_step": 27.901524543762207, "step": 1112} +{"train_info/time_between_train_steps": 0.0055844783782958984, "step": 1112} +{"info/global_step": 1113, "train_info/time_within_train_step": 27.608649253845215, "step": 1113} +{"train_info/time_between_train_steps": 0.005742788314819336, "step": 1113} +{"info/global_step": 1114, "train_info/time_within_train_step": 27.783468008041382, "step": 1114} +{"train_info/time_between_train_steps": 0.006273031234741211, "step": 1114} +{"info/global_step": 1115, "train_info/time_within_train_step": 27.813008069992065, "step": 1115} +{"train_info/time_between_train_steps": 0.010641098022460938, "step": 1115} +{"info/global_step": 1116, "train_info/time_within_train_step": 27.913946390151978, "step": 1116} +{"train_info/time_between_train_steps": 0.011171579360961914, "step": 1116} +{"info/global_step": 1117, "train_info/time_within_train_step": 27.662792205810547, "step": 1117} +{"train_info/time_between_train_steps": 0.00577235221862793, "step": 1117} +{"info/global_step": 1118, "train_info/time_within_train_step": 27.796602249145508, "step": 1118} +{"train_info/time_between_train_steps": 0.010258913040161133, "step": 1118} +{"info/global_step": 1119, "train_info/time_within_train_step": 27.65818500518799, "step": 1119} +{"train_info/time_between_train_steps": 0.015189170837402344, "step": 1119} +{"info/global_step": 1120, "train_info/time_within_train_step": 27.866528749465942, "step": 1120} +{"train_info/time_between_train_steps": 0.0057985782623291016, "step": 1120} +{"info/global_step": 1121, "train_info/time_within_train_step": 27.709141492843628, "step": 1121} +{"train_info/time_between_train_steps": 0.005796909332275391, "step": 1121} +{"info/global_step": 1122, "train_info/time_within_train_step": 27.612977743148804, "step": 1122} +{"train_info/time_between_train_steps": 0.005221366882324219, "step": 1122} +{"info/global_step": 1123, "train_info/time_within_train_step": 27.63252019882202, "step": 1123} +{"train_info/time_between_train_steps": 0.005635261535644531, "step": 1123} +{"info/global_step": 1124, "train_info/time_within_train_step": 27.67782497406006, "step": 1124} +{"train_info/time_between_train_steps": 0.005622386932373047, "step": 1124} +{"info/global_step": 1125, "train_info/time_within_train_step": 27.658196210861206, "step": 1125} +{"train_info/time_between_train_steps": 0.0058820247650146484, "step": 1125} +{"info/global_step": 1126, "train_info/time_within_train_step": 27.742108583450317, "step": 1126} +{"train_info/time_between_train_steps": 0.005616903305053711, "step": 1126} +{"info/global_step": 1127, "train_info/time_within_train_step": 27.69471001625061, "step": 1127} +{"train_info/time_between_train_steps": 0.005697965621948242, "step": 1127} +{"info/global_step": 1128, "train_info/time_within_train_step": 27.74132466316223, "step": 1128} +{"train_info/time_between_train_steps": 0.005343914031982422, "step": 1128} +{"info/global_step": 1129, "train_info/time_within_train_step": 27.6448814868927, "step": 1129} +{"train_info/time_between_train_steps": 0.00597691535949707, "step": 1129} +{"info/global_step": 1130, "train_info/time_within_train_step": 27.619864463806152, "step": 1130} +{"train_info/time_between_train_steps": 0.00568389892578125, "step": 1130} +{"info/global_step": 1131, "train_info/time_within_train_step": 27.601613521575928, "step": 1131} +{"train_info/time_between_train_steps": 0.005362749099731445, "step": 1131} +{"info/global_step": 1132, "train_info/time_within_train_step": 27.670620679855347, "step": 1132} +{"train_info/time_between_train_steps": 0.011550664901733398, "step": 1132} +{"info/global_step": 1133, "train_info/time_within_train_step": 27.68560767173767, "step": 1133} +{"train_info/time_between_train_steps": 0.005965471267700195, "step": 1133} +{"info/global_step": 1134, "train_info/time_within_train_step": 27.78325319290161, "step": 1134} +{"train_info/time_between_train_steps": 0.010512590408325195, "step": 1134} +{"info/global_step": 1135, "train_info/time_within_train_step": 27.68183994293213, "step": 1135} +{"train_info/time_between_train_steps": 0.0067272186279296875, "step": 1135} +{"info/global_step": 1136, "train_info/time_within_train_step": 27.693676233291626, "step": 1136} +{"train_info/time_between_train_steps": 0.005839347839355469, "step": 1136} +{"info/global_step": 1137, "train_info/time_within_train_step": 27.679414987564087, "step": 1137} +{"train_info/time_between_train_steps": 0.015659093856811523, "step": 1137} +{"info/global_step": 1138, "train_info/time_within_train_step": 27.725311517715454, "step": 1138} +{"train_info/time_between_train_steps": 0.005801677703857422, "step": 1138} +{"info/global_step": 1139, "train_info/time_within_train_step": 27.743845224380493, "step": 1139} +{"train_info/time_between_train_steps": 0.006040811538696289, "step": 1139} +{"info/global_step": 1140, "train_info/time_within_train_step": 27.72423219680786, "step": 1140} +{"train_info/time_between_train_steps": 0.013887882232666016, "step": 1140} +{"train_info/time_between_train_steps": 13.506225824356079, "step": 1140} +{"info/global_step": 1141, "train_info/time_within_train_step": 27.614355087280273, "step": 1141} +{"train_info/time_between_train_steps": 0.010040044784545898, "step": 1141} +{"info/global_step": 1142, "train_info/time_within_train_step": 27.773178577423096, "step": 1142} +{"train_info/time_between_train_steps": 0.010596275329589844, "step": 1142} +{"info/global_step": 1143, "train_info/time_within_train_step": 27.83446741104126, "step": 1143} +{"train_info/time_between_train_steps": 0.005570411682128906, "step": 1143} +{"info/global_step": 1144, "train_info/time_within_train_step": 27.81984543800354, "step": 1144} +{"train_info/time_between_train_steps": 0.005635738372802734, "step": 1144} +{"info/global_step": 1145, "train_info/time_within_train_step": 27.661415815353394, "step": 1145} +{"train_info/time_between_train_steps": 0.005877256393432617, "step": 1145} +{"info/global_step": 1146, "train_info/time_within_train_step": 27.88739800453186, "step": 1146} +{"train_info/time_between_train_steps": 0.005778789520263672, "step": 1146} +{"info/global_step": 1147, "train_info/time_within_train_step": 27.624659061431885, "step": 1147} +{"train_info/time_between_train_steps": 0.011123180389404297, "step": 1147} +{"info/global_step": 1148, "train_info/time_within_train_step": 27.81012988090515, "step": 1148} +{"train_info/time_between_train_steps": 0.005815029144287109, "step": 1148} +{"info/global_step": 1149, "train_info/time_within_train_step": 27.84186100959778, "step": 1149} +{"train_info/time_between_train_steps": 0.011845588684082031, "step": 1149} +{"info/global_step": 1150, "train_info/time_within_train_step": 27.934028387069702, "step": 1150} +{"train_info": {"train_info/memory_allocated": 2082.109375, "train_info/memory_max_allocated": 21965.9951171875, "train_info/memory_reserved": 28596.0, "train_info/memory_max_reserved": 28596.0, "_timestamp": 1733991045, "_runtime": 34430}, "step": 1150} +{"logs": {"train/loss": 3.0056, "train/learning_rate": 2.7777777777777772e-05, "train/epoch": 38.01, "_timestamp": 1733991045, "_runtime": 34430}, "step": 1150} +{"train_info/time_between_train_steps": 0.007683992385864258, "step": 1150} +{"info/global_step": 1151, "train_info/time_within_train_step": 27.698110103607178, "step": 1151} +{"train_info/time_between_train_steps": 0.0056154727935791016, "step": 1151} +{"info/global_step": 1152, "train_info/time_within_train_step": 27.740365028381348, "step": 1152} +{"train_info/time_between_train_steps": 0.005557537078857422, "step": 1152} +{"info/global_step": 1153, "train_info/time_within_train_step": 27.660119771957397, "step": 1153} +{"train_info/time_between_train_steps": 0.010641813278198242, "step": 1153} +{"info/global_step": 1154, "train_info/time_within_train_step": 27.702433824539185, "step": 1154} +{"train_info/time_between_train_steps": 0.008876323699951172, "step": 1154} +{"info/global_step": 1155, "train_info/time_within_train_step": 27.796197414398193, "step": 1155} +{"train_info/time_between_train_steps": 0.005737781524658203, "step": 1155} +{"info/global_step": 1156, "train_info/time_within_train_step": 27.677065134048462, "step": 1156} +{"train_info/time_between_train_steps": 0.0053691864013671875, "step": 1156} +{"info/global_step": 1157, "train_info/time_within_train_step": 27.646057605743408, "step": 1157} +{"train_info/time_between_train_steps": 0.0054168701171875, "step": 1157} +{"info/global_step": 1158, "train_info/time_within_train_step": 27.59878706932068, "step": 1158} +{"train_info/time_between_train_steps": 0.0055806636810302734, "step": 1158} +{"info/global_step": 1159, "train_info/time_within_train_step": 27.78149437904358, "step": 1159} +{"train_info/time_between_train_steps": 0.0052874088287353516, "step": 1159} +{"info/global_step": 1160, "train_info/time_within_train_step": 27.646048307418823, "step": 1160} +{"train_info/time_between_train_steps": 0.010015487670898438, "step": 1160} +{"info/global_step": 1161, "train_info/time_within_train_step": 27.62010169029236, "step": 1161} +{"train_info/time_between_train_steps": 0.0056629180908203125, "step": 1161} +{"info/global_step": 1162, "train_info/time_within_train_step": 27.619399785995483, "step": 1162} +{"train_info/time_between_train_steps": 0.005957603454589844, "step": 1162} +{"info/global_step": 1163, "train_info/time_within_train_step": 27.67317533493042, "step": 1163} +{"train_info/time_between_train_steps": 0.0058002471923828125, "step": 1163} +{"info/global_step": 1164, "train_info/time_within_train_step": 27.63097620010376, "step": 1164} +{"train_info/time_between_train_steps": 0.005500078201293945, "step": 1164} +{"info/global_step": 1165, "train_info/time_within_train_step": 27.654283046722412, "step": 1165} +{"train_info/time_between_train_steps": 0.005680084228515625, "step": 1165} +{"info/global_step": 1166, "train_info/time_within_train_step": 27.748389720916748, "step": 1166} +{"train_info/time_between_train_steps": 0.00556635856628418, "step": 1166} +{"info/global_step": 1167, "train_info/time_within_train_step": 27.68030595779419, "step": 1167} +{"train_info/time_between_train_steps": 0.005777597427368164, "step": 1167} +{"info/global_step": 1168, "train_info/time_within_train_step": 27.6846661567688, "step": 1168} +{"train_info/time_between_train_steps": 0.008233308792114258, "step": 1168} +{"info/global_step": 1169, "train_info/time_within_train_step": 27.694804668426514, "step": 1169} +{"train_info/time_between_train_steps": 0.006621122360229492, "step": 1169} +{"info/global_step": 1170, "train_info/time_within_train_step": 27.698646068572998, "step": 1170} +{"train_info/time_between_train_steps": 0.006445646286010742, "step": 1170} +{"train_info/time_between_train_steps": 13.210197448730469, "step": 1170} +{"info/global_step": 1171, "train_info/time_within_train_step": 27.633400440216064, "step": 1171} +{"train_info/time_between_train_steps": 0.005545616149902344, "step": 1171} +{"info/global_step": 1172, "train_info/time_within_train_step": 27.811994314193726, "step": 1172} +{"train_info/time_between_train_steps": 0.005899906158447266, "step": 1172} +{"info/global_step": 1173, "train_info/time_within_train_step": 27.662935256958008, "step": 1173} +{"train_info/time_between_train_steps": 0.005833148956298828, "step": 1173} +{"info/global_step": 1174, "train_info/time_within_train_step": 28.018747329711914, "step": 1174} +{"train_info/time_between_train_steps": 0.0059359073638916016, "step": 1174} +{"info/global_step": 1175, "train_info/time_within_train_step": 27.91778588294983, "step": 1175} +{"train_info/time_between_train_steps": 0.011384725570678711, "step": 1175} +{"info/global_step": 1176, "train_info/time_within_train_step": 27.936585187911987, "step": 1176} +{"train_info/time_between_train_steps": 0.006087303161621094, "step": 1176} +{"info/global_step": 1177, "train_info/time_within_train_step": 27.691871166229248, "step": 1177} +{"train_info/time_between_train_steps": 0.0056269168853759766, "step": 1177} +{"info/global_step": 1178, "train_info/time_within_train_step": 27.906352758407593, "step": 1178} +{"train_info/time_between_train_steps": 0.010942220687866211, "step": 1178} +{"info/global_step": 1179, "train_info/time_within_train_step": 27.656354904174805, "step": 1179} +{"train_info/time_between_train_steps": 0.006750583648681641, "step": 1179} +{"info/global_step": 1180, "train_info/time_within_train_step": 27.795297145843506, "step": 1180} +{"train_info/time_between_train_steps": 0.011961698532104492, "step": 1180} +{"info/global_step": 1181, "train_info/time_within_train_step": 27.748061418533325, "step": 1181} +{"train_info/time_between_train_steps": 0.005961418151855469, "step": 1181} +{"info/global_step": 1182, "train_info/time_within_train_step": 27.62927556037903, "step": 1182} +{"train_info/time_between_train_steps": 0.005774259567260742, "step": 1182} +{"info/global_step": 1183, "train_info/time_within_train_step": 27.647262573242188, "step": 1183} +{"train_info/time_between_train_steps": 0.010522603988647461, "step": 1183} +{"info/global_step": 1184, "train_info/time_within_train_step": 27.796785593032837, "step": 1184} +{"train_info/time_between_train_steps": 0.005886554718017578, "step": 1184} +{"info/global_step": 1185, "train_info/time_within_train_step": 27.64915919303894, "step": 1185} +{"train_info/time_between_train_steps": 0.005767345428466797, "step": 1185} +{"info/global_step": 1186, "train_info/time_within_train_step": 27.65510082244873, "step": 1186} +{"train_info/time_between_train_steps": 0.010391473770141602, "step": 1186} +{"info/global_step": 1187, "train_info/time_within_train_step": 27.754868030548096, "step": 1187} +{"train_info/time_between_train_steps": 0.005922079086303711, "step": 1187} +{"info/global_step": 1188, "train_info/time_within_train_step": 27.682817459106445, "step": 1188} +{"train_info/time_between_train_steps": 0.005724430084228516, "step": 1188} +{"info/global_step": 1189, "train_info/time_within_train_step": 27.727694034576416, "step": 1189} +{"train_info/time_between_train_steps": 0.006698131561279297, "step": 1189} +{"info/global_step": 1190, "train_info/time_within_train_step": 27.767825603485107, "step": 1190} +{"train_info/time_between_train_steps": 0.005738735198974609, "step": 1190} +{"info/global_step": 1191, "train_info/time_within_train_step": 27.68353247642517, "step": 1191} +{"train_info/time_between_train_steps": 0.0059814453125, "step": 1191} +{"info/global_step": 1192, "train_info/time_within_train_step": 27.751811504364014, "step": 1192} +{"train_info/time_between_train_steps": 0.005967378616333008, "step": 1192} +{"info/global_step": 1193, "train_info/time_within_train_step": 27.70653200149536, "step": 1193} +{"train_info/time_between_train_steps": 0.0057125091552734375, "step": 1193} +{"info/global_step": 1194, "train_info/time_within_train_step": 27.73143196105957, "step": 1194} +{"train_info/time_between_train_steps": 0.005815744400024414, "step": 1194} +{"info/global_step": 1195, "train_info/time_within_train_step": 27.68502974510193, "step": 1195} +{"train_info/time_between_train_steps": 0.0058515071868896484, "step": 1195} +{"info/global_step": 1196, "train_info/time_within_train_step": 27.82064437866211, "step": 1196} +{"train_info/time_between_train_steps": 0.01062154769897461, "step": 1196} +{"info/global_step": 1197, "train_info/time_within_train_step": 27.69415259361267, "step": 1197} +{"train_info/time_between_train_steps": 0.005507946014404297, "step": 1197} +{"info/global_step": 1198, "train_info/time_within_train_step": 27.693443536758423, "step": 1198} +{"train_info/time_between_train_steps": 0.006059169769287109, "step": 1198} +{"info/global_step": 1199, "train_info/time_within_train_step": 27.6530282497406, "step": 1199} +{"train_info/time_between_train_steps": 0.0063364505767822266, "step": 1199} +{"info/global_step": 1200, "train_info/time_within_train_step": 27.691134691238403, "step": 1200} +{"train_info": {"train_info/memory_allocated": 2082.109375, "train_info/memory_max_allocated": 21965.9951171875, "train_info/memory_reserved": 28596.0, "train_info/memory_max_reserved": 28596.0, "_timestamp": 1733992444, "_runtime": 35829}, "step": 1200} +{"logs": {"train/loss": 2.9624, "train/learning_rate": 0.0, "train/epoch": 39.02, "_timestamp": 1733992444, "_runtime": 35829}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 2082.109375, "train_info/memory_max_allocated": 21965.9951171875, "train_info/memory_reserved": 28596.0, "train_info/memory_max_reserved": 28596.0, "_timestamp": 1733992447, "_runtime": 35832}, "step": 1200} +{"logs": {"train/train_runtime": 35831.9904, "train/train_samples_per_second": 17.147, "train/train_steps_per_second": 0.033, "train/total_flos": 3.24784950607872e+17, "train/train_loss": 3.999629693031311, "train/epoch": 39.02, "_timestamp": 1733992447, "_runtime": 35832}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 2082.1083984375, "train_info/memory_max_allocated": 21965.9951171875, "train_info/memory_reserved": 28596.0, "train_info/memory_max_reserved": 28596.0, "_timestamp": 1733992450, "_runtime": 35835}, "step": 1200} +{"logs": {"eval/loss": 3.7953500747680664, "eval/runtime": 2.3593, "eval/samples_per_second": 50.863, "eval/steps_per_second": 3.391, "train/epoch": 39.02, "_timestamp": 1733992450, "_runtime": 35835}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 2082.1083984375, "train_info/memory_max_allocated": 21965.9951171875, "train_info/memory_reserved": 28596.0, "train_info/memory_max_reserved": 28596.0, "_timestamp": 1733992450, "_runtime": 35835}, "step": 1200} +{"logs": {"eval//local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_loss": 3.7953500747680664, "eval//local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_ppl": 44.493809839596516, "eval//local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_runtime": 2.3593, "eval//local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_samples_per_second": 50.863, "train/epoch": 39.02, "_timestamp": 1733992450, "_runtime": 35835}, "step": 1200} diff --git a/pytorch_model.bin b/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd1ed8166e639c1aa03e2f6d4738d036a1abc54f --- /dev/null +++ b/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d481dd90e0112c7b86510b69821042c684cc4f5d27820394af11badd3314e8a +size 552615017 diff --git a/shuffle_control_ro_RO_randinit_seed53.log b/shuffle_control_ro_RO_randinit_seed53.log new file mode 100755 index 0000000000000000000000000000000000000000..87eafcbc490c2d5293be005a9670e8be8c48b2c9 --- /dev/null +++ b/shuffle_control_ro_RO_randinit_seed53.log @@ -0,0 +1,121 @@ +|=>> 12/11 [23:35:59] - mistral - INFO :: Starting Run: shuffle_control_ro_RO_randinit_seed53... +|=>> 12/11 [23:35:59] - mistral - INFO :: Setting Random Seed to 53! +|=>> 12/11 [23:35:59] - mistral - INFO :: Building Tokenize and Initializing `gpt2-small` via AutoModel/AutoConfig... +|=>> 12/11 [23:35:59] - mistral - INFO :: Using Configs For Model From: /local/xiulyang/mission-impossible-language-models/mistral/conf/models/gpt2-small-64000-RO.json ... +|=>> 12/11 [23:35:59] - mistral.models.auto - INFO :: Building Hugging Face GPT2Config from provided configs: {'activation_function': 'gelu_new', 'architectures': ['GPT2LMHeadModel'], 'attn_pdrop': 0.1, 'embd_pdrop': 0.1, 'eos_token_id': 0, 'bos_token_id': 0, 'initializer_range': 0.02, 'layer_norm_epsilon': 1e-05, 'model_type': 'gpt2', 'n_ctx': 1024, 'n_embd': 768, 'n_head': 12, 'n_inner': None, 'n_layer': 12, 'n_positions': 1024, 'reorder_and_upcast_attn': True, 'resid_pdrop': 0.1, 'scale_attn_by_inverse_layer_idx': True, 'scale_attn_weights': True, 'summary_activation': None, 'summary_first_dropout': 0.2, 'summary_proj_to_labels': True, 'summary_type': 'cls_index', 'summary_use_proj': True, 'task_specific_params': {'text-generation': {'do_sample': True, 'max_length': 1024}}, 'torch_dtype': 'float32', 'transformers_version': '4.35.2', 'use_cache': False, 'vocab_size': 64000} ... +|=>> 12/11 [23:35:59] - mistral.models.auto - INFO :: Fetching Hugging Face [Fast] AutoTokenizer for Model: `gpt2`... +|=>> 12/11 [23:35:59] - mistral.models.auto - INFO :: Using a Pretokenized Dataset +|=>> 12/11 [23:35:59] - mistral.models.auto - INFO :: Initializing Custom GPT-2 Model from Configuration: `gpt2`... +|=>> 12/11 [23:36:04] - mistral - INFO :: Setting Training Arguments from Quinfig... +|=>> 12/11 [23:36:04] - mistral.args.training - INFO :: Setting Gradient Accumulation Steps = `64` [BSZ: 512 World Size: 1 Device BSZ: 8] +|=>> 12/11 [23:36:04] - mistral - INFO :: Downloading and Preprocessing Dataset `/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py`... +|=>> 12/11 [23:36:04] - datasets_modules.datasets.babylm_dataset.43b5d6f57ff7566a9e5fca41a7eeab46164e1c1b30ed1dcb135a5808f8c10159.babylm_dataset - INFO :: Generating examples from = /local/xiulyang/mission-impossible-language-models/data/multilingual/multilingual_data_perturbed/shuffle_control_ro/train +|=>> 12/11 [23:36:05] - datasets_modules.datasets.babylm_dataset.43b5d6f57ff7566a9e5fca41a7eeab46164e1c1b30ed1dcb135a5808f8c10159.babylm_dataset - INFO :: Total sentences: 962141 +|=>> 12/11 [23:36:05] - datasets_modules.datasets.babylm_dataset.43b5d6f57ff7566a9e5fca41a7eeab46164e1c1b30ed1dcb135a5808f8c10159.babylm_dataset - INFO :: Loading pre-tokenized data +|=>> 12/11 [23:36:10] - datasets_modules.datasets.babylm_dataset.43b5d6f57ff7566a9e5fca41a7eeab46164e1c1b30ed1dcb135a5808f8c10159.babylm_dataset - INFO :: Concatenating tokenized data using EOS token +|=>> 12/11 [23:36:10] - datasets_modules.datasets.babylm_dataset.43b5d6f57ff7566a9e5fca41a7eeab46164e1c1b30ed1dcb135a5808f8c10159.babylm_dataset - INFO :: Chunking tokens into sublists of 1024 +|=>> 12/11 [23:36:11] - datasets_modules.datasets.babylm_dataset.43b5d6f57ff7566a9e5fca41a7eeab46164e1c1b30ed1dcb135a5808f8c10159.babylm_dataset - INFO :: Writing dataset as space-separated sequences of tokens +|=>> 12/11 [23:36:15] - datasets_modules.datasets.babylm_dataset.43b5d6f57ff7566a9e5fca41a7eeab46164e1c1b30ed1dcb135a5808f8c10159.babylm_dataset - INFO :: Generating examples from = /local/xiulyang/mission-impossible-language-models/data/multilingual/multilingual_data_perturbed/shuffle_control_ro/dev +|=>> 12/11 [23:36:15] - datasets_modules.datasets.babylm_dataset.43b5d6f57ff7566a9e5fca41a7eeab46164e1c1b30ed1dcb135a5808f8c10159.babylm_dataset - INFO :: Total sentences: 5151 +|=>> 12/11 [23:36:15] - datasets_modules.datasets.babylm_dataset.43b5d6f57ff7566a9e5fca41a7eeab46164e1c1b30ed1dcb135a5808f8c10159.babylm_dataset - INFO :: Loading pre-tokenized data +|=>> 12/11 [23:36:15] - datasets_modules.datasets.babylm_dataset.43b5d6f57ff7566a9e5fca41a7eeab46164e1c1b30ed1dcb135a5808f8c10159.babylm_dataset - INFO :: Concatenating tokenized data using EOS token +|=>> 12/11 [23:36:15] - datasets_modules.datasets.babylm_dataset.43b5d6f57ff7566a9e5fca41a7eeab46164e1c1b30ed1dcb135a5808f8c10159.babylm_dataset - INFO :: Chunking tokens into sublists of 1024 +|=>> 12/11 [23:36:15] - datasets_modules.datasets.babylm_dataset.43b5d6f57ff7566a9e5fca41a7eeab46164e1c1b30ed1dcb135a5808f8c10159.babylm_dataset - INFO :: Writing dataset as space-separated sequences of tokens +|=>> 12/11 [23:36:15] - mistral.corpora.auto - INFO :: Building Tokenized Indexed Dataset for {dataset_id}/{dataset_name}... +|=>> 12/11 [23:36:15] - mistral.corpora.auto - INFO :: Building Indexed Dataset for train +|=>> 12/11 [23:36:48] - mistral.corpora.auto - INFO :: Building Indexed Dataset for validation +|=>> 12/11 [23:36:48] - mistral - INFO :: Initializing Model Trainer... +|=>> 12/11 [23:36:48] - mistral - INFO :: Training Arguments: TrainingArguments( +_n_gpu=1, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +bf16=False, +bf16_full_eval=False, +data_seed=53, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=IntervalStrategy.STEPS, +fp16=True, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +gradient_accumulation_steps=64, +gradient_checkpointing=False, +greater_is_better=None, +group_by_length=False, +half_precision_backend=auto, +hub_model_id=None, +hub_strategy=HubStrategy.EVERY_SAVE, +hub_token=, +ignore_data_skip=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0006, +length_column_name=length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=logs, +logging_first_step=True, +logging_nan_inf_filter=True, +logging_steps=50, +logging_strategy=IntervalStrategy.STEPS, +lr_scheduler_type=SchedulerType.LINEAR, +max_grad_norm=1.0, +max_steps=1200, +metric_for_best_model=None, +mp_parameters=, +no_cuda=False, +num_train_epochs=3.0, +optim=OptimizerNames.ADAMW_HF, +output_dir=//local/xiulyang/babylm_models/shuffle_control_ro_RO_randinit/babylm_shuffle_control_ro_RO_randinit_seed53/runs/shuffle_control_ro_RO_randinit_seed53, +overwrite_output_dir=False, +past_index=-1, +per_device_eval_batch_size=16, +per_device_train_batch_size=8, +prediction_loss_only=True, +push_to_hub=False, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +remove_unused_columns=True, +report_to=[], +resume_from_checkpoint=None, +run_name=shuffle_control_ro_RO_randinit_seed53, +save_on_each_node=False, +save_steps=1000, +save_strategy=IntervalStrategy.STEPS, +save_total_limit=None, +seed=53, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=120, +weight_decay=0.1, +xpu_backend=None, +) +|=>> 12/11 [23:36:48] - mistral.core.callbacks - INFO :: Setting W&B Project: xiulin-yang-compling +|=>> 12/11 [23:36:55] - mistral - INFO :: Training... +|=>> 12/11 [23:36:55] - mistral.core.callbacks - INFO :: Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" +|=>> 12/12 [09:34:08] - mistral - INFO :: ...and that's all folks! +|=>> 12/12 [09:34:08] - mistral - INFO :: Running final evaluation... diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..938bd31b67d4eb91525b5f5b946519126a3331c7 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5ee64ec673c5915c725d59ab09d5567c896e983bbaa61ee563d03df6350d2f +size 3183