diff --git a/checkpoint-0/config.json b/checkpoint-0/config.json new file mode 100755 index 0000000000000000000000000000000000000000..8c8c89d1ef92210e20560d15855b17533ee84d76 --- /dev/null +++ b/checkpoint-0/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-0/pytorch_model.bin b/checkpoint-0/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..e62f1c2a73e204dd71fd38dbd4de66ee58a3ae92 --- /dev/null +++ b/checkpoint-0/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4794ea0b9a495e87fc34e316efe1b585527d6e3d15e4d093362ab24ee74ab3f +size 510396521 diff --git a/checkpoint-0/special_tokens_map.json b/checkpoint-0/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-0/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-0/tokenizer_config.json b/checkpoint-0/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-0/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-0/training_args.bin b/checkpoint-0/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..549697e20517d083291acd771c7e4f73a0a43ccb --- /dev/null +++ b/checkpoint-0/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1246cf5e18070da7948fefcc7820aec8b0a60d030d56ab5ce5ddfdd1b0e451a +size 3183 diff --git a/checkpoint-100/config.json b/checkpoint-100/config.json new file mode 100755 index 0000000000000000000000000000000000000000..8c8c89d1ef92210e20560d15855b17533ee84d76 --- /dev/null +++ b/checkpoint-100/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-100/optimizer.pt b/checkpoint-100/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..aed0417b39a37983593e88d0d213bb745eaf6642 --- /dev/null +++ b/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1dbd0db23c353965422608c28de08ce378f2cef69a5d2c8dd44a1e1950cbae5 +size 995603825 diff --git a/checkpoint-100/pytorch_model.bin b/checkpoint-100/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..f2f1507854cb0c023e98f62ed5f8f93397f88808 --- /dev/null +++ b/checkpoint-100/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f652d4c966235fbcf326dc2df84034784094aea84ad413900782d1ae0e7bbcb3 +size 510396521 diff --git a/checkpoint-100/rng_state.pth b/checkpoint-100/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..2e602b2e2ef3761f9ec13c9d9e9652b27f163a79 --- /dev/null +++ b/checkpoint-100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:409451256ec723db0162c3421c60776939bf008a4ee720ddafd5c2cf0797e714 +size 14567 diff --git a/checkpoint-100/scaler.pt b/checkpoint-100/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..45cc4a33e17645cb0ed4a911b11c77cb2e7ce7f3 --- /dev/null +++ b/checkpoint-100/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13a3423b2fe42f204bc8fe2c666ff379f9fd753a0f13613064a5e71e86b519e8 +size 559 diff --git a/checkpoint-100/scheduler.pt b/checkpoint-100/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..595eec9094e91b3eb24c2de88c461df2c22026ab --- /dev/null +++ b/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75e9b9d31d11c624d89b0c04ad496adf4b5addd3e703848d2583972c703e8da6 +size 623 diff --git a/checkpoint-100/special_tokens_map.json b/checkpoint-100/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-100/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-100/tokenizer_config.json b/checkpoint-100/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-100/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-100/trainer_state.json b/checkpoint-100/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..0f2786fe5bd3b1b003803b3c3115c2d1fd930d68 --- /dev/null +++ b/checkpoint-100/trainer_state.json @@ -0,0 +1,34 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0108333333333333, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.8955, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1689, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.7154, + "step": 100 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.746963132416e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-100/training_args.bin b/checkpoint-100/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..549697e20517d083291acd771c7e4f73a0a43ccb --- /dev/null +++ b/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1246cf5e18070da7948fefcc7820aec8b0a60d030d56ab5ce5ddfdd1b0e451a +size 3183 diff --git a/checkpoint-1000/config.json b/checkpoint-1000/config.json new file mode 100755 index 0000000000000000000000000000000000000000..8c8c89d1ef92210e20560d15855b17533ee84d76 --- /dev/null +++ b/checkpoint-1000/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-1000/optimizer.pt b/checkpoint-1000/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..1f26c0bc8d8f0b053cb2e9396989d5a79b8e27eb --- /dev/null +++ b/checkpoint-1000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37f332f4643aa0aded0948b19b605751e238f55a7be16ed8bb3bbda9cef8dbc5 +size 995604017 diff --git a/checkpoint-1000/pytorch_model.bin b/checkpoint-1000/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..bd55dfdd4101a6f2fcb8aa8b3078b6cfd41ae8a0 --- /dev/null +++ b/checkpoint-1000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38853167867e184456386c940191a63deeaf83e93416dbd1420621a54b8d464d +size 510396521 diff --git a/checkpoint-1000/rng_state.pth b/checkpoint-1000/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..1844c5da2b612ece18b1f097cbc88eec16179984 --- /dev/null +++ b/checkpoint-1000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20028d847d6f0ebc0564aa5b109f633042826016cd967a87e400b6096386e3d6 +size 14567 diff --git a/checkpoint-1000/scaler.pt b/checkpoint-1000/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..b04695d3a30e4bab2b78883d9c849c25c37ef7d7 --- /dev/null +++ b/checkpoint-1000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f810fc7b695697c440d8985f6042b4ba23a9e1027604c265718b518ca29f1b2b +size 559 diff --git a/checkpoint-1000/scheduler.pt b/checkpoint-1000/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..2a616899591a36b66ac0bd1ceeb324087c181a2a --- /dev/null +++ b/checkpoint-1000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0691206f4bd9ca409d6e7104087a4e0eb05df8f8f555a400f6ecc532edba52d8 +size 623 diff --git a/checkpoint-1000/special_tokens_map.json b/checkpoint-1000/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-1000/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-1000/tokenizer_config.json b/checkpoint-1000/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-1000/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-1000/trainer_state.json b/checkpoint-1000/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..80744be49e12ed280b7155c2457d5c55588e9d61 --- /dev/null +++ b/checkpoint-1000/trainer_state.json @@ -0,0 +1,158 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 34.01166666666666, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.8955, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1689, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.7154, + "step": 100 + }, + { + "epoch": 5.0, + "learning_rate": 0.0005833333333333333, + "loss": 6.5645, + "step": 150 + }, + { + "epoch": 6.02, + "learning_rate": 0.0005555555555555556, + "loss": 6.3217, + "step": 200 + }, + { + "epoch": 8.02, + "learning_rate": 0.0005277777777777777, + "loss": 6.2449, + "step": 250 + }, + { + "epoch": 10.01, + "learning_rate": 0.0005, + "loss": 6.006, + "step": 300 + }, + { + "epoch": 12.0, + "learning_rate": 0.00047222222222222224, + "loss": 5.7678, + "step": 350 + }, + { + "epoch": 13.02, + "learning_rate": 0.00044444444444444436, + "loss": 5.2976, + "step": 400 + }, + { + "epoch": 15.01, + "learning_rate": 0.00041666666666666664, + "loss": 5.0428, + "step": 450 + }, + { + "epoch": 17.01, + "learning_rate": 0.00038888888888888887, + "loss": 4.7926, + "step": 500 + }, + { + "epoch": 18.02, + "learning_rate": 0.0003611111111111111, + "loss": 4.5128, + "step": 550 + }, + { + "epoch": 20.02, + "learning_rate": 0.0003333333333333333, + "loss": 4.4343, + "step": 600 + }, + { + "epoch": 22.01, + "learning_rate": 0.00030555555555555555, + "loss": 4.2938, + "step": 650 + }, + { + "epoch": 24.0, + "learning_rate": 0.0002777777777777778, + "loss": 4.1808, + "step": 700 + }, + { + "epoch": 25.02, + "learning_rate": 0.00025, + "loss": 4.0149, + "step": 750 + }, + { + "epoch": 27.01, + "learning_rate": 0.00022222222222222218, + "loss": 4.0023, + "step": 800 + }, + { + "epoch": 29.01, + "learning_rate": 0.00019444444444444443, + "loss": 3.9298, + "step": 850 + }, + { + "epoch": 31.0, + "learning_rate": 0.00016666666666666666, + "loss": 3.8642, + "step": 900 + }, + { + "epoch": 32.02, + "learning_rate": 0.0001388888888888889, + "loss": 3.747, + "step": 950 + }, + { + "epoch": 34.01, + "learning_rate": 0.00011111111111111109, + "loss": 3.7634, + "step": 1000 + }, + { + "epoch": 34.01, + "eval_loss": 4.5402936935424805, + "eval_runtime": 5.0343, + "eval_samples_per_second": 20.857, + "eval_steps_per_second": 1.39, + "step": 1000 + }, + { + "epoch": 34.01, + "eval_/scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_loss": 4.5402936935424805, + "eval_/scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_ppl": 93.7183205435465, + "eval_/scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_runtime": 5.0343, + "eval_/scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 20.857, + "step": 1000 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.7564741623808e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1000/training_args.bin b/checkpoint-1000/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..549697e20517d083291acd771c7e4f73a0a43ccb --- /dev/null +++ b/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1246cf5e18070da7948fefcc7820aec8b0a60d030d56ab5ce5ddfdd1b0e451a +size 3183 diff --git a/checkpoint-1100/config.json b/checkpoint-1100/config.json new file mode 100755 index 0000000000000000000000000000000000000000..8c8c89d1ef92210e20560d15855b17533ee84d76 --- /dev/null +++ b/checkpoint-1100/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-1100/optimizer.pt b/checkpoint-1100/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..fa8acd94b2ee60e005ebfe792c63d3f76b37c38d --- /dev/null +++ b/checkpoint-1100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c492193868409d299e6d113aa630c0134099a57a8b5f1cb1640c32418d9ec08 +size 995604017 diff --git a/checkpoint-1100/pytorch_model.bin b/checkpoint-1100/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..14d11e306065692e75d995c879cc8cfa3cb0cfc5 --- /dev/null +++ b/checkpoint-1100/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:597872f13fd69a66d1cd70a8da4593d2438403477bf8aed64c4fa36fe2cb1988 +size 510396521 diff --git a/checkpoint-1100/rng_state.pth b/checkpoint-1100/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..f014cb7936739b0376ccb9fe939d75bcd0f3b84f --- /dev/null +++ b/checkpoint-1100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b498c3d24f764ff04eb2d7145a328566fc0f4ee260a3e57a14e3411bb5addc06 +size 14567 diff --git a/checkpoint-1100/scaler.pt b/checkpoint-1100/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..37bf049fbd5fd721203bf0238edc8ff67dbd8f94 --- /dev/null +++ b/checkpoint-1100/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fb16c30b686aa43e110b0d33f9d46bf3127b7124542ca8dc34831233d4675a0 +size 559 diff --git a/checkpoint-1100/scheduler.pt b/checkpoint-1100/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..c158169ec0bb09620952d544fd4b4edea0cc9cf4 --- /dev/null +++ b/checkpoint-1100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f1b7713e4bb40428f29080b7d08d4a52f779ac863737861e4724292b2cf6c59 +size 623 diff --git a/checkpoint-1100/special_tokens_map.json b/checkpoint-1100/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-1100/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-1100/tokenizer_config.json b/checkpoint-1100/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-1100/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-1100/trainer_state.json b/checkpoint-1100/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..11a45118d03bcdebcb8cdce6e3ff989b15133baf --- /dev/null +++ b/checkpoint-1100/trainer_state.json @@ -0,0 +1,170 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 37.0225, + "global_step": 1100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.8955, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1689, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.7154, + "step": 100 + }, + { + "epoch": 5.0, + "learning_rate": 0.0005833333333333333, + "loss": 6.5645, + "step": 150 + }, + { + "epoch": 6.02, + "learning_rate": 0.0005555555555555556, + "loss": 6.3217, + "step": 200 + }, + { + "epoch": 8.02, + "learning_rate": 0.0005277777777777777, + "loss": 6.2449, + "step": 250 + }, + { + "epoch": 10.01, + "learning_rate": 0.0005, + "loss": 6.006, + "step": 300 + }, + { + "epoch": 12.0, + "learning_rate": 0.00047222222222222224, + "loss": 5.7678, + "step": 350 + }, + { + "epoch": 13.02, + "learning_rate": 0.00044444444444444436, + "loss": 5.2976, + "step": 400 + }, + { + "epoch": 15.01, + "learning_rate": 0.00041666666666666664, + "loss": 5.0428, + "step": 450 + }, + { + "epoch": 17.01, + "learning_rate": 0.00038888888888888887, + "loss": 4.7926, + "step": 500 + }, + { + "epoch": 18.02, + "learning_rate": 0.0003611111111111111, + "loss": 4.5128, + "step": 550 + }, + { + "epoch": 20.02, + "learning_rate": 0.0003333333333333333, + "loss": 4.4343, + "step": 600 + }, + { + "epoch": 22.01, + "learning_rate": 0.00030555555555555555, + "loss": 4.2938, + "step": 650 + }, + { + "epoch": 24.0, + "learning_rate": 0.0002777777777777778, + "loss": 4.1808, + "step": 700 + }, + { + "epoch": 25.02, + "learning_rate": 0.00025, + "loss": 4.0149, + "step": 750 + }, + { + "epoch": 27.01, + "learning_rate": 0.00022222222222222218, + "loss": 4.0023, + "step": 800 + }, + { + "epoch": 29.01, + "learning_rate": 0.00019444444444444443, + "loss": 3.9298, + "step": 850 + }, + { + "epoch": 31.0, + "learning_rate": 0.00016666666666666666, + "loss": 3.8642, + "step": 900 + }, + { + "epoch": 32.02, + "learning_rate": 0.0001388888888888889, + "loss": 3.747, + "step": 950 + }, + { + "epoch": 34.01, + "learning_rate": 0.00011111111111111109, + "loss": 3.7634, + "step": 1000 + }, + { + "epoch": 34.01, + "eval_loss": 4.5402936935424805, + "eval_runtime": 5.0343, + "eval_samples_per_second": 20.857, + "eval_steps_per_second": 1.39, + "step": 1000 + }, + { + "epoch": 34.01, + "eval_/scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_loss": 4.5402936935424805, + "eval_/scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_ppl": 93.7183205435465, + "eval_/scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_runtime": 5.0343, + "eval_/scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 20.857, + "step": 1000 + }, + { + "epoch": 36.01, + "learning_rate": 8.333333333333333e-05, + "loss": 3.7216, + "step": 1050 + }, + { + "epoch": 37.02, + "learning_rate": 5.5555555555555545e-05, + "loss": 3.6231, + "step": 1100 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 3.0311704756224e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1100/training_args.bin b/checkpoint-1100/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..549697e20517d083291acd771c7e4f73a0a43ccb --- /dev/null +++ b/checkpoint-1100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1246cf5e18070da7948fefcc7820aec8b0a60d030d56ab5ce5ddfdd1b0e451a +size 3183 diff --git a/checkpoint-1200/config.json b/checkpoint-1200/config.json new file mode 100755 index 0000000000000000000000000000000000000000..8c8c89d1ef92210e20560d15855b17533ee84d76 --- /dev/null +++ b/checkpoint-1200/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-1200/optimizer.pt b/checkpoint-1200/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..6b74c6156d0dbf5bcd916f3b15a07ac48da96bb8 --- /dev/null +++ b/checkpoint-1200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e17a9b524ff90732303b25f10c3e9007baa8c3a7aa6cae6a8ec26ab05bfc394 +size 995604017 diff --git a/checkpoint-1200/pytorch_model.bin b/checkpoint-1200/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..8a4f7a589ad6f44c5c8185fa5d8542aa3993c838 --- /dev/null +++ b/checkpoint-1200/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f28431ebc04471305544f66cbe239f2119b4bf1cd08403beea8aea8c2f1edb2 +size 510396521 diff --git a/checkpoint-1200/rng_state.pth b/checkpoint-1200/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..8dd008ba40e7ead3b8e09848d48112eabe51599b --- /dev/null +++ b/checkpoint-1200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b58bd534d85c6970f3f1390e4d8b03ac58be3946abc5575abb8d5ea2fffd26a +size 14567 diff --git a/checkpoint-1200/scaler.pt b/checkpoint-1200/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..8953dddccbefc4703c09dcda27d83c15add2bade --- /dev/null +++ b/checkpoint-1200/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19c7277eaca0850ae3e9b6790b3d002d820169cce0671185e672c28c8ae8e056 +size 559 diff --git a/checkpoint-1200/scheduler.pt b/checkpoint-1200/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..310d39c17fc616a9c83286ed00f0f4cefba9f5df --- /dev/null +++ b/checkpoint-1200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:935a8fb09a6e9698d9894853b05e181b3f56098deaaecddde08e55f06bf000c4 +size 623 diff --git a/checkpoint-1200/special_tokens_map.json b/checkpoint-1200/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-1200/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-1200/tokenizer_config.json b/checkpoint-1200/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-1200/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-1200/trainer_state.json b/checkpoint-1200/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..febb7c7009f723dafc53a7eb19c48ea4f3a96e76 --- /dev/null +++ b/checkpoint-1200/trainer_state.json @@ -0,0 +1,182 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 41.009166666666665, + "global_step": 1200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.8955, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1689, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.7154, + "step": 100 + }, + { + "epoch": 5.0, + "learning_rate": 0.0005833333333333333, + "loss": 6.5645, + "step": 150 + }, + { + "epoch": 6.02, + "learning_rate": 0.0005555555555555556, + "loss": 6.3217, + "step": 200 + }, + { + "epoch": 8.02, + "learning_rate": 0.0005277777777777777, + "loss": 6.2449, + "step": 250 + }, + { + "epoch": 10.01, + "learning_rate": 0.0005, + "loss": 6.006, + "step": 300 + }, + { + "epoch": 12.0, + "learning_rate": 0.00047222222222222224, + "loss": 5.7678, + "step": 350 + }, + { + "epoch": 13.02, + "learning_rate": 0.00044444444444444436, + "loss": 5.2976, + "step": 400 + }, + { + "epoch": 15.01, + "learning_rate": 0.00041666666666666664, + "loss": 5.0428, + "step": 450 + }, + { + "epoch": 17.01, + "learning_rate": 0.00038888888888888887, + "loss": 4.7926, + "step": 500 + }, + { + "epoch": 18.02, + "learning_rate": 0.0003611111111111111, + "loss": 4.5128, + "step": 550 + }, + { + "epoch": 20.02, + "learning_rate": 0.0003333333333333333, + "loss": 4.4343, + "step": 600 + }, + { + "epoch": 22.01, + "learning_rate": 0.00030555555555555555, + "loss": 4.2938, + "step": 650 + }, + { + "epoch": 24.0, + "learning_rate": 0.0002777777777777778, + "loss": 4.1808, + "step": 700 + }, + { + "epoch": 25.02, + "learning_rate": 0.00025, + "loss": 4.0149, + "step": 750 + }, + { + "epoch": 27.01, + "learning_rate": 0.00022222222222222218, + "loss": 4.0023, + "step": 800 + }, + { + "epoch": 29.01, + "learning_rate": 0.00019444444444444443, + "loss": 3.9298, + "step": 850 + }, + { + "epoch": 31.0, + "learning_rate": 0.00016666666666666666, + "loss": 3.8642, + "step": 900 + }, + { + "epoch": 32.02, + "learning_rate": 0.0001388888888888889, + "loss": 3.747, + "step": 950 + }, + { + "epoch": 34.01, + "learning_rate": 0.00011111111111111109, + "loss": 3.7634, + "step": 1000 + }, + { + "epoch": 34.01, + "eval_loss": 4.5402936935424805, + "eval_runtime": 5.0343, + "eval_samples_per_second": 20.857, + "eval_steps_per_second": 1.39, + "step": 1000 + }, + { + "epoch": 34.01, + "eval_/scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_loss": 4.5402936935424805, + "eval_/scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_ppl": 93.7183205435465, + "eval_/scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_runtime": 5.0343, + "eval_/scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 20.857, + "step": 1000 + }, + { + "epoch": 36.01, + "learning_rate": 8.333333333333333e-05, + "loss": 3.7216, + "step": 1050 + }, + { + "epoch": 37.02, + "learning_rate": 5.5555555555555545e-05, + "loss": 3.6231, + "step": 1100 + }, + { + "epoch": 39.02, + "learning_rate": 2.7777777777777772e-05, + "loss": 3.6623, + "step": 1150 + }, + { + "epoch": 41.01, + "learning_rate": 0.0, + "loss": 3.6417, + "step": 1200 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 3.3082445463552e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1200/training_args.bin b/checkpoint-1200/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..549697e20517d083291acd771c7e4f73a0a43ccb --- /dev/null +++ b/checkpoint-1200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1246cf5e18070da7948fefcc7820aec8b0a60d030d56ab5ce5ddfdd1b0e451a +size 3183 diff --git a/checkpoint-200/config.json b/checkpoint-200/config.json new file mode 100755 index 0000000000000000000000000000000000000000..8c8c89d1ef92210e20560d15855b17533ee84d76 --- /dev/null +++ b/checkpoint-200/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-200/optimizer.pt b/checkpoint-200/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..ff3c09defedb6aefa48bc8489e6aaf8b62d22fab --- /dev/null +++ b/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b432c338f6a1115fe69d3a999182ab326a7f06d0d35ae86cc57c3e4a3e63489 +size 995603825 diff --git a/checkpoint-200/pytorch_model.bin b/checkpoint-200/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..c3b016d546484ab1f2d749d3ccb6f2e133c557b4 --- /dev/null +++ b/checkpoint-200/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8764036a1e37c991da585d7ca792fce892eb2008cb6b83d2d09b9e7c0949eb7f +size 510396521 diff --git a/checkpoint-200/rng_state.pth b/checkpoint-200/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..83fa17ed3ea6137d7ca4b458550b1040cf1a8954 --- /dev/null +++ b/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0659a702435c2471de444fe0ad5befa4e8e6fc9a0289d8950a5ea19abd7131f +size 14567 diff --git a/checkpoint-200/scaler.pt b/checkpoint-200/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..365b52ebf376498237a843f6d7332e5a49b14902 --- /dev/null +++ b/checkpoint-200/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb6982c29cd162f49aeb531674acf574eccd46a8f556bec596040d7c3b95200a +size 559 diff --git a/checkpoint-200/scheduler.pt b/checkpoint-200/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..36cb8da739c80a63971a62f06f781c40ac0fceb2 --- /dev/null +++ b/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a992625ada6d884e508ff9392d16738b4a4163147f8fcbf9f46be82ecae9888 +size 623 diff --git a/checkpoint-200/special_tokens_map.json b/checkpoint-200/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-200/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-200/tokenizer_config.json b/checkpoint-200/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-200/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-200/trainer_state.json b/checkpoint-200/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..27e65258e38b3749a4a9e4daf6f816c01c4ddb38 --- /dev/null +++ b/checkpoint-200/trainer_state.json @@ -0,0 +1,46 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 6.0216666666666665, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.8955, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1689, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.7154, + "step": 100 + }, + { + "epoch": 5.0, + "learning_rate": 0.0005833333333333333, + "loss": 6.5645, + "step": 150 + }, + { + "epoch": 6.02, + "learning_rate": 0.0005555555555555556, + "loss": 6.3217, + "step": 200 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 5.493926264832e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-200/training_args.bin b/checkpoint-200/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..549697e20517d083291acd771c7e4f73a0a43ccb --- /dev/null +++ b/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1246cf5e18070da7948fefcc7820aec8b0a60d030d56ab5ce5ddfdd1b0e451a +size 3183 diff --git a/checkpoint-300/config.json b/checkpoint-300/config.json new file mode 100755 index 0000000000000000000000000000000000000000..8c8c89d1ef92210e20560d15855b17533ee84d76 --- /dev/null +++ b/checkpoint-300/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-300/optimizer.pt b/checkpoint-300/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..99c96017d1f0febdcd144fb45177dc5e101632b6 --- /dev/null +++ b/checkpoint-300/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebe6063539132f55a47ada80c615f986d7b1b65ff3314ade1193a446317052a4 +size 995604017 diff --git a/checkpoint-300/pytorch_model.bin b/checkpoint-300/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..496a3abf2a5faff09739bcf6364d72fd14b76065 --- /dev/null +++ b/checkpoint-300/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a74e08e5c3ac725cec4d10605d2762f048fa88ecf89c9251dfa18f7f23d60565 +size 510396521 diff --git a/checkpoint-300/rng_state.pth b/checkpoint-300/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..1a3f644c37afa7ba12dc1d9a945b80b7d6b26a13 --- /dev/null +++ b/checkpoint-300/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff0d2762e59d8ed47f3e8660abc8e1602b7c22d0efdf5dfe43eb5107a8ca1439 +size 14567 diff --git a/checkpoint-300/scaler.pt b/checkpoint-300/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..01066cf4761ea9d2f7962f5181762f7b08690b79 --- /dev/null +++ b/checkpoint-300/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0967b9f865f16344c55f5ccc3cf7d6e8e97ca61dda304e931ca6bad130f48dd1 +size 559 diff --git a/checkpoint-300/scheduler.pt b/checkpoint-300/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..681987f21e79cd4685aac32a5e6b74341e25d936 --- /dev/null +++ b/checkpoint-300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1db899b266916f792a0898ceb27a87eaf76647f10c29cc0c13ce22f12a12efd +size 623 diff --git a/checkpoint-300/special_tokens_map.json b/checkpoint-300/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-300/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-300/tokenizer_config.json b/checkpoint-300/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-300/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-300/trainer_state.json b/checkpoint-300/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..0f179047693e04938507a9af970b53d1c9ee9763 --- /dev/null +++ b/checkpoint-300/trainer_state.json @@ -0,0 +1,58 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 10.008333333333333, + "global_step": 300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.8955, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1689, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.7154, + "step": 100 + }, + { + "epoch": 5.0, + "learning_rate": 0.0005833333333333333, + "loss": 6.5645, + "step": 150 + }, + { + "epoch": 6.02, + "learning_rate": 0.0005555555555555556, + "loss": 6.3217, + "step": 200 + }, + { + "epoch": 8.02, + "learning_rate": 0.0005277777777777777, + "loss": 6.2449, + "step": 250 + }, + { + "epoch": 10.01, + "learning_rate": 0.0005, + "loss": 6.006, + "step": 300 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 8.26466697216e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-300/training_args.bin b/checkpoint-300/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..549697e20517d083291acd771c7e4f73a0a43ccb --- /dev/null +++ b/checkpoint-300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1246cf5e18070da7948fefcc7820aec8b0a60d030d56ab5ce5ddfdd1b0e451a +size 3183 diff --git a/checkpoint-400/config.json b/checkpoint-400/config.json new file mode 100755 index 0000000000000000000000000000000000000000..8c8c89d1ef92210e20560d15855b17533ee84d76 --- /dev/null +++ b/checkpoint-400/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-400/optimizer.pt b/checkpoint-400/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..a46b2c66fccd99b789859b6c1fa0911cdedc02f0 --- /dev/null +++ b/checkpoint-400/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d538747e3920d5d14d0453279b97247eeab4a64b5078c24141d18a945078b7ac +size 995604017 diff --git a/checkpoint-400/pytorch_model.bin b/checkpoint-400/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..69b9d19625fdb76820c6a55563aa0929e106e85a --- /dev/null +++ b/checkpoint-400/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c58050d0c1786aae445aac210f5efc4c45ab2ba94bf18d1950c282471ea79e7 +size 510396521 diff --git a/checkpoint-400/rng_state.pth b/checkpoint-400/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..92b2d82e6b3a516529dd733a023a89f58ae2788d --- /dev/null +++ b/checkpoint-400/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04d38ba89a88bd478c33588609a5f833c29f08fd8d3394d3277c55f13c6296dc +size 14567 diff --git a/checkpoint-400/scaler.pt b/checkpoint-400/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..9c7aef4199e98d81152810b661dbaffc01963383 --- /dev/null +++ b/checkpoint-400/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:476e510c8ea7edbd2b51d1e76a4e037820a5639381c0d8b5d32dafa492795a1e +size 559 diff --git a/checkpoint-400/scheduler.pt b/checkpoint-400/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..967673cfc91836d239beb5a7ede06992232ba309 --- /dev/null +++ b/checkpoint-400/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db087d678047b5c346bbb8511936612c1fdf223c6fd70321e97369bc31ed76a8 +size 623 diff --git a/checkpoint-400/special_tokens_map.json b/checkpoint-400/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-400/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-400/tokenizer_config.json b/checkpoint-400/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-400/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-400/trainer_state.json b/checkpoint-400/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..e3378c04399244d587d63f15072f06041dad19cf --- /dev/null +++ b/checkpoint-400/trainer_state.json @@ -0,0 +1,70 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 13.019166666666667, + "global_step": 400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.8955, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1689, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.7154, + "step": 100 + }, + { + "epoch": 5.0, + "learning_rate": 0.0005833333333333333, + "loss": 6.5645, + "step": 150 + }, + { + "epoch": 6.02, + "learning_rate": 0.0005555555555555556, + "loss": 6.3217, + "step": 200 + }, + { + "epoch": 8.02, + "learning_rate": 0.0005277777777777777, + "loss": 6.2449, + "step": 250 + }, + { + "epoch": 10.01, + "learning_rate": 0.0005, + "loss": 6.006, + "step": 300 + }, + { + "epoch": 12.0, + "learning_rate": 0.00047222222222222224, + "loss": 5.7678, + "step": 350 + }, + { + "epoch": 13.02, + "learning_rate": 0.00044444444444444436, + "loss": 5.2976, + "step": 400 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.1011630104576e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-400/training_args.bin b/checkpoint-400/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..549697e20517d083291acd771c7e4f73a0a43ccb --- /dev/null +++ b/checkpoint-400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1246cf5e18070da7948fefcc7820aec8b0a60d030d56ab5ce5ddfdd1b0e451a +size 3183 diff --git a/checkpoint-500/config.json b/checkpoint-500/config.json new file mode 100755 index 0000000000000000000000000000000000000000..8c8c89d1ef92210e20560d15855b17533ee84d76 --- /dev/null +++ b/checkpoint-500/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-500/optimizer.pt b/checkpoint-500/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..778d730e02b91586bae851e6022fa81144df8fca --- /dev/null +++ b/checkpoint-500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19fe224e3e7429afc117a85fc4999c120259fb03085484abf96f624b26795799 +size 995604017 diff --git a/checkpoint-500/pytorch_model.bin b/checkpoint-500/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..99296c48d4f40dc43779553c425f50ca43d6d5a3 --- /dev/null +++ b/checkpoint-500/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdb7f4fcd7344da5bf6b88daa777671d36993a011d318a2d7dfebdfe2e857cfd +size 510396521 diff --git a/checkpoint-500/rng_state.pth b/checkpoint-500/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..850424b84e129cad2d4393c4faafdbe342aebc45 --- /dev/null +++ b/checkpoint-500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9defccd97f261d889fb9f958fda24b416cce9daf86201d0383155736430060fb +size 14567 diff --git a/checkpoint-500/scaler.pt b/checkpoint-500/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..e8b96c9c2837f2c95b1d07d4fc3f245f9ad1ef62 --- /dev/null +++ b/checkpoint-500/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fa4c7be44c959599b8b43bb9bc3371e9e4e5bbc5758b3ab5afcccfda3e72e67 +size 559 diff --git a/checkpoint-500/scheduler.pt b/checkpoint-500/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..77b958181a5b47b022b96b38a6207f274a1b6604 --- /dev/null +++ b/checkpoint-500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:026fae4a90d56c24de94b10dfa7a75b6ba4e43bd5c1a3fdb2d77356b81cd6f8a +size 623 diff --git a/checkpoint-500/special_tokens_map.json b/checkpoint-500/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-500/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-500/tokenizer_config.json b/checkpoint-500/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-500/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-500/trainer_state.json b/checkpoint-500/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..ce3562316d7ffc52a8e13634b715f68dea62ef79 --- /dev/null +++ b/checkpoint-500/trainer_state.json @@ -0,0 +1,82 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 17.00583333333333, + "global_step": 500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.8955, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1689, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.7154, + "step": 100 + }, + { + "epoch": 5.0, + "learning_rate": 0.0005833333333333333, + "loss": 6.5645, + "step": 150 + }, + { + "epoch": 6.02, + "learning_rate": 0.0005555555555555556, + "loss": 6.3217, + "step": 200 + }, + { + "epoch": 8.02, + "learning_rate": 0.0005277777777777777, + "loss": 6.2449, + "step": 250 + }, + { + "epoch": 10.01, + "learning_rate": 0.0005, + "loss": 6.006, + "step": 300 + }, + { + "epoch": 12.0, + "learning_rate": 0.00047222222222222224, + "loss": 5.7678, + "step": 350 + }, + { + "epoch": 13.02, + "learning_rate": 0.00044444444444444436, + "loss": 5.2976, + "step": 400 + }, + { + "epoch": 15.01, + "learning_rate": 0.00041666666666666664, + "loss": 5.0428, + "step": 450 + }, + { + "epoch": 17.01, + "learning_rate": 0.00038888888888888887, + "loss": 4.7926, + "step": 500 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.3782370811904e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-500/training_args.bin b/checkpoint-500/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..549697e20517d083291acd771c7e4f73a0a43ccb --- /dev/null +++ b/checkpoint-500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1246cf5e18070da7948fefcc7820aec8b0a60d030d56ab5ce5ddfdd1b0e451a +size 3183 diff --git a/checkpoint-600/config.json b/checkpoint-600/config.json new file mode 100755 index 0000000000000000000000000000000000000000..8c8c89d1ef92210e20560d15855b17533ee84d76 --- /dev/null +++ b/checkpoint-600/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-600/optimizer.pt b/checkpoint-600/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..a55b374883805f20ee87be44676fdfd290407ac8 --- /dev/null +++ b/checkpoint-600/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4624dc8f2d4b2107423e54419586982b146b4e65c309686408e61c48242617f +size 995604017 diff --git a/checkpoint-600/pytorch_model.bin b/checkpoint-600/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..2d47b328b1456e1514d2ea6a8158a1aeacf32f21 --- /dev/null +++ b/checkpoint-600/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ad37978178d083c06c730d2e6cecd78b7dff33df8d8366104979d93a21a9e67 +size 510396521 diff --git a/checkpoint-600/rng_state.pth b/checkpoint-600/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..2c257cc4e15e5361edd3fc7c2b25bf5b629a4832 --- /dev/null +++ b/checkpoint-600/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2286672ce40e94e80ee1b5231ee5622093ae4b2a0015e3d3059a95777d3b334e +size 14567 diff --git a/checkpoint-600/scaler.pt b/checkpoint-600/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..f95b3e36da01561ec333a83ee8419ad225633e06 --- /dev/null +++ b/checkpoint-600/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e8415b86bbce347c0df306b84a695add049c2a3b2d0b6f4dda3bf036d341150 +size 559 diff --git a/checkpoint-600/scheduler.pt b/checkpoint-600/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..62e5359badd619e7dad2c51d98fa8043d9948f0b --- /dev/null +++ b/checkpoint-600/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:700940432b1c2117248896e2ce5a58d93c051d92ea97707f74d76bf1ef24deee +size 623 diff --git a/checkpoint-600/special_tokens_map.json b/checkpoint-600/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-600/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-600/tokenizer_config.json b/checkpoint-600/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-600/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-600/trainer_state.json b/checkpoint-600/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..0ac1a438636d8b7c67e136fe7924439e6b488a88 --- /dev/null +++ b/checkpoint-600/trainer_state.json @@ -0,0 +1,94 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 20.016666666666666, + "global_step": 600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.8955, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1689, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.7154, + "step": 100 + }, + { + "epoch": 5.0, + "learning_rate": 0.0005833333333333333, + "loss": 6.5645, + "step": 150 + }, + { + "epoch": 6.02, + "learning_rate": 0.0005555555555555556, + "loss": 6.3217, + "step": 200 + }, + { + "epoch": 8.02, + "learning_rate": 0.0005277777777777777, + "loss": 6.2449, + "step": 250 + }, + { + "epoch": 10.01, + "learning_rate": 0.0005, + "loss": 6.006, + "step": 300 + }, + { + "epoch": 12.0, + "learning_rate": 0.00047222222222222224, + "loss": 5.7678, + "step": 350 + }, + { + "epoch": 13.02, + "learning_rate": 0.00044444444444444436, + "loss": 5.2976, + "step": 400 + }, + { + "epoch": 15.01, + "learning_rate": 0.00041666666666666664, + "loss": 5.0428, + "step": 450 + }, + { + "epoch": 17.01, + "learning_rate": 0.00038888888888888887, + "loss": 4.7926, + "step": 500 + }, + { + "epoch": 18.02, + "learning_rate": 0.0003611111111111111, + "loss": 4.5128, + "step": 550 + }, + { + "epoch": 20.02, + "learning_rate": 0.0003333333333333333, + "loss": 4.4343, + "step": 600 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.652933394432e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-600/training_args.bin b/checkpoint-600/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..549697e20517d083291acd771c7e4f73a0a43ccb --- /dev/null +++ b/checkpoint-600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1246cf5e18070da7948fefcc7820aec8b0a60d030d56ab5ce5ddfdd1b0e451a +size 3183 diff --git a/checkpoint-700/config.json b/checkpoint-700/config.json new file mode 100755 index 0000000000000000000000000000000000000000..8c8c89d1ef92210e20560d15855b17533ee84d76 --- /dev/null +++ b/checkpoint-700/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-700/optimizer.pt b/checkpoint-700/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..06a36a98b88e4cad2b3d8ce817aa2aa3d4428cab --- /dev/null +++ b/checkpoint-700/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97792c9fe78e945c4598ce54f37691154467660457d65178c378e12ab1d7f53a +size 995604017 diff --git a/checkpoint-700/pytorch_model.bin b/checkpoint-700/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..6f910362ecb06e482fa96d2d38faac9d3d0ecba3 --- /dev/null +++ b/checkpoint-700/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51ff3cdab58bd3667913a1157ac5413082e441d8f762c65fdbed81e394705a63 +size 510396521 diff --git a/checkpoint-700/rng_state.pth b/checkpoint-700/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..dc641f836255c07d36d90dab3b3a172578edb374 --- /dev/null +++ b/checkpoint-700/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:291b564dc55a1c18a9b8b0c7f524d9c908b09c98e8874a53caa017a7bb646a35 +size 14567 diff --git a/checkpoint-700/scaler.pt b/checkpoint-700/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..12f2b8ec834e54a2bd7cfdd0e07b0c6e125b6490 --- /dev/null +++ b/checkpoint-700/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fb213daf5cce18a5f92167ca14da9df084d907f2b9796efc4666630f312b58c +size 559 diff --git a/checkpoint-700/scheduler.pt b/checkpoint-700/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..42408b6253265af34ae78746144fbba9316e0d7e --- /dev/null +++ b/checkpoint-700/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2d4910fd408e002ebeff50d62bfb043dcae5ef658777d0c3ee4a3bbb515ec15 +size 623 diff --git a/checkpoint-700/special_tokens_map.json b/checkpoint-700/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-700/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-700/tokenizer_config.json b/checkpoint-700/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-700/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-700/trainer_state.json b/checkpoint-700/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..8bffaaf10e1bded034c876096d1221b8d2251132 --- /dev/null +++ b/checkpoint-700/trainer_state.json @@ -0,0 +1,106 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 24.003333333333334, + "global_step": 700, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.8955, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1689, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.7154, + "step": 100 + }, + { + "epoch": 5.0, + "learning_rate": 0.0005833333333333333, + "loss": 6.5645, + "step": 150 + }, + { + "epoch": 6.02, + "learning_rate": 0.0005555555555555556, + "loss": 6.3217, + "step": 200 + }, + { + "epoch": 8.02, + "learning_rate": 0.0005277777777777777, + "loss": 6.2449, + "step": 250 + }, + { + "epoch": 10.01, + "learning_rate": 0.0005, + "loss": 6.006, + "step": 300 + }, + { + "epoch": 12.0, + "learning_rate": 0.00047222222222222224, + "loss": 5.7678, + "step": 350 + }, + { + "epoch": 13.02, + "learning_rate": 0.00044444444444444436, + "loss": 5.2976, + "step": 400 + }, + { + "epoch": 15.01, + "learning_rate": 0.00041666666666666664, + "loss": 5.0428, + "step": 450 + }, + { + "epoch": 17.01, + "learning_rate": 0.00038888888888888887, + "loss": 4.7926, + "step": 500 + }, + { + "epoch": 18.02, + "learning_rate": 0.0003611111111111111, + "loss": 4.5128, + "step": 550 + }, + { + "epoch": 20.02, + "learning_rate": 0.0003333333333333333, + "loss": 4.4343, + "step": 600 + }, + { + "epoch": 22.01, + "learning_rate": 0.00030555555555555555, + "loss": 4.2938, + "step": 650 + }, + { + "epoch": 24.0, + "learning_rate": 0.0002777777777777778, + "loss": 4.1808, + "step": 700 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.9300074651648e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-700/training_args.bin b/checkpoint-700/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..549697e20517d083291acd771c7e4f73a0a43ccb --- /dev/null +++ b/checkpoint-700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1246cf5e18070da7948fefcc7820aec8b0a60d030d56ab5ce5ddfdd1b0e451a +size 3183 diff --git a/checkpoint-800/config.json b/checkpoint-800/config.json new file mode 100755 index 0000000000000000000000000000000000000000..8c8c89d1ef92210e20560d15855b17533ee84d76 --- /dev/null +++ b/checkpoint-800/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-800/optimizer.pt b/checkpoint-800/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..91daff964c66cd8a180c745ff5184bc71d330656 --- /dev/null +++ b/checkpoint-800/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29b1f34faee3d71a6d159105326cceae0897cb49d26dabf945503048f1745e8e +size 995604017 diff --git a/checkpoint-800/pytorch_model.bin b/checkpoint-800/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..2837aa2fad674300e05e033806030f2eca8e8593 --- /dev/null +++ b/checkpoint-800/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1acb74f9ccd178deb43615cd5f6832f1ffb57a1c83ca151a50e4e375b1c5f645 +size 510396521 diff --git a/checkpoint-800/rng_state.pth b/checkpoint-800/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..44db0d60911a9cec4becbae74b167ab9be67b6ec --- /dev/null +++ b/checkpoint-800/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:811b5ecc8c50bdf7b9fce31fa5a08cb9299182dd2e6c67eea5d337f727826868 +size 14567 diff --git a/checkpoint-800/scaler.pt b/checkpoint-800/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..b3c73372264156b02df8dada2192ee3c96dd5fc4 --- /dev/null +++ b/checkpoint-800/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c2074cdcefbaa0a39f736d6b0f7bf018c350d49e85648bc8accc4f756ad816e +size 559 diff --git a/checkpoint-800/scheduler.pt b/checkpoint-800/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..75713dbce3771306ca00343ecc497c4f19a01d03 --- /dev/null +++ b/checkpoint-800/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b27fb255c84833fb6ab5d93679cb236a569de9a1c4f805f72a2f60a2bc7c7499 +size 623 diff --git a/checkpoint-800/special_tokens_map.json b/checkpoint-800/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-800/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-800/tokenizer_config.json b/checkpoint-800/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-800/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-800/trainer_state.json b/checkpoint-800/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..9dc7a53f631e4b98ee3095a374adcb10d518b783 --- /dev/null +++ b/checkpoint-800/trainer_state.json @@ -0,0 +1,118 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 27.014166666666668, + "global_step": 800, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.8955, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1689, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.7154, + "step": 100 + }, + { + "epoch": 5.0, + "learning_rate": 0.0005833333333333333, + "loss": 6.5645, + "step": 150 + }, + { + "epoch": 6.02, + "learning_rate": 0.0005555555555555556, + "loss": 6.3217, + "step": 200 + }, + { + "epoch": 8.02, + "learning_rate": 0.0005277777777777777, + "loss": 6.2449, + "step": 250 + }, + { + "epoch": 10.01, + "learning_rate": 0.0005, + "loss": 6.006, + "step": 300 + }, + { + "epoch": 12.0, + "learning_rate": 0.00047222222222222224, + "loss": 5.7678, + "step": 350 + }, + { + "epoch": 13.02, + "learning_rate": 0.00044444444444444436, + "loss": 5.2976, + "step": 400 + }, + { + "epoch": 15.01, + "learning_rate": 0.00041666666666666664, + "loss": 5.0428, + "step": 450 + }, + { + "epoch": 17.01, + "learning_rate": 0.00038888888888888887, + "loss": 4.7926, + "step": 500 + }, + { + "epoch": 18.02, + "learning_rate": 0.0003611111111111111, + "loss": 4.5128, + "step": 550 + }, + { + "epoch": 20.02, + "learning_rate": 0.0003333333333333333, + "loss": 4.4343, + "step": 600 + }, + { + "epoch": 22.01, + "learning_rate": 0.00030555555555555555, + "loss": 4.2938, + "step": 650 + }, + { + "epoch": 24.0, + "learning_rate": 0.0002777777777777778, + "loss": 4.1808, + "step": 700 + }, + { + "epoch": 25.02, + "learning_rate": 0.00025, + "loss": 4.0149, + "step": 750 + }, + { + "epoch": 27.01, + "learning_rate": 0.00022222222222222218, + "loss": 4.0023, + "step": 800 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.2047037784064e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-800/training_args.bin b/checkpoint-800/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..549697e20517d083291acd771c7e4f73a0a43ccb --- /dev/null +++ b/checkpoint-800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1246cf5e18070da7948fefcc7820aec8b0a60d030d56ab5ce5ddfdd1b0e451a +size 3183 diff --git a/checkpoint-900/config.json b/checkpoint-900/config.json new file mode 100755 index 0000000000000000000000000000000000000000..8c8c89d1ef92210e20560d15855b17533ee84d76 --- /dev/null +++ b/checkpoint-900/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-900/optimizer.pt b/checkpoint-900/optimizer.pt new file mode 100755 index 0000000000000000000000000000000000000000..3b9369acd126ebae89024786e567db0e40e6976c --- /dev/null +++ b/checkpoint-900/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03956f0d3d3d80ee16a12535d4862f6472a4f6a10e9777b70dd9e62b023109df +size 995604017 diff --git a/checkpoint-900/pytorch_model.bin b/checkpoint-900/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..f428cf4cffb5664b172b2acb952f66658ccfc1a5 --- /dev/null +++ b/checkpoint-900/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b7172144fb6262269c880699c87a28c3fe8a8492d43e10eeaaa0f340c0ad9f2 +size 510396521 diff --git a/checkpoint-900/rng_state.pth b/checkpoint-900/rng_state.pth new file mode 100755 index 0000000000000000000000000000000000000000..af04df5917e77bdfea22ab3222a7ccf04a4f8396 --- /dev/null +++ b/checkpoint-900/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60101699297b5effe4452ff807929c17e39a4044f33d64adbd3151f9fb748de9 +size 14567 diff --git a/checkpoint-900/scaler.pt b/checkpoint-900/scaler.pt new file mode 100755 index 0000000000000000000000000000000000000000..be54cb13c777bc6feccb478ff218e7e21fad482a --- /dev/null +++ b/checkpoint-900/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8695f57df923e22b943b0b0f2b9cc7007008e80b53ccee275b3a35963fe67e9 +size 559 diff --git a/checkpoint-900/scheduler.pt b/checkpoint-900/scheduler.pt new file mode 100755 index 0000000000000000000000000000000000000000..539d7c83ea252818dc9cbffac08cf340bb05a454 --- /dev/null +++ b/checkpoint-900/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2cb09e5db72772a15094286e93cbb61d745d9b63863703cf53da0bcb9827821 +size 623 diff --git a/checkpoint-900/special_tokens_map.json b/checkpoint-900/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-900/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-900/tokenizer_config.json b/checkpoint-900/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-900/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-900/trainer_state.json b/checkpoint-900/trainer_state.json new file mode 100755 index 0000000000000000000000000000000000000000..eaf5075ebfc44e75bbcc529c8188b0b43a2dd578 --- /dev/null +++ b/checkpoint-900/trainer_state.json @@ -0,0 +1,130 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 31.000833333333333, + "global_step": 900, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 10.8955, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1689, + "step": 50 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005, + "loss": 6.7154, + "step": 100 + }, + { + "epoch": 5.0, + "learning_rate": 0.0005833333333333333, + "loss": 6.5645, + "step": 150 + }, + { + "epoch": 6.02, + "learning_rate": 0.0005555555555555556, + "loss": 6.3217, + "step": 200 + }, + { + "epoch": 8.02, + "learning_rate": 0.0005277777777777777, + "loss": 6.2449, + "step": 250 + }, + { + "epoch": 10.01, + "learning_rate": 0.0005, + "loss": 6.006, + "step": 300 + }, + { + "epoch": 12.0, + "learning_rate": 0.00047222222222222224, + "loss": 5.7678, + "step": 350 + }, + { + "epoch": 13.02, + "learning_rate": 0.00044444444444444436, + "loss": 5.2976, + "step": 400 + }, + { + "epoch": 15.01, + "learning_rate": 0.00041666666666666664, + "loss": 5.0428, + "step": 450 + }, + { + "epoch": 17.01, + "learning_rate": 0.00038888888888888887, + "loss": 4.7926, + "step": 500 + }, + { + "epoch": 18.02, + "learning_rate": 0.0003611111111111111, + "loss": 4.5128, + "step": 550 + }, + { + "epoch": 20.02, + "learning_rate": 0.0003333333333333333, + "loss": 4.4343, + "step": 600 + }, + { + "epoch": 22.01, + "learning_rate": 0.00030555555555555555, + "loss": 4.2938, + "step": 650 + }, + { + "epoch": 24.0, + "learning_rate": 0.0002777777777777778, + "loss": 4.1808, + "step": 700 + }, + { + "epoch": 25.02, + "learning_rate": 0.00025, + "loss": 4.0149, + "step": 750 + }, + { + "epoch": 27.01, + "learning_rate": 0.00022222222222222218, + "loss": 4.0023, + "step": 800 + }, + { + "epoch": 29.01, + "learning_rate": 0.00019444444444444443, + "loss": 3.9298, + "step": 850 + }, + { + "epoch": 31.0, + "learning_rate": 0.00016666666666666666, + "loss": 3.8642, + "step": 900 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.4817778491392e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-900/training_args.bin b/checkpoint-900/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..549697e20517d083291acd771c7e4f73a0a43ccb --- /dev/null +++ b/checkpoint-900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1246cf5e18070da7948fefcc7820aec8b0a60d030d56ab5ce5ddfdd1b0e451a +size 3183 diff --git a/config.json b/config.json new file mode 100755 index 0000000000000000000000000000000000000000..8c8c89d1ef92210e20560d15855b17533ee84d76 --- /dev/null +++ b/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 0, + "embd_pdrop": 0.1, + "eos_token_id": 0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/metrics.json b/metrics.json new file mode 100755 index 0000000000000000000000000000000000000000..ca3cbc7c15622e913b58996ecfd8dabef97d1cb4 --- /dev/null +++ b/metrics.json @@ -0,0 +1,2502 @@ +{"num_parameters": 124439808, "trainable_parameters": 124439808, "step": 0} +{"train_info/time_between_train_steps": 4.292348861694336, "step": 0} +{"info/global_step": 1, "train_info/time_within_train_step": 28.554610013961792, "step": 1} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 19761.71484375, "train_info/memory_reserved": 22624.0, "train_info/memory_max_reserved": 22624.0, "_timestamp": 1736102926, "_runtime": 53}, "step": 1} +{"logs": {"train/loss": 10.8955, "train/learning_rate": 4.9999999999999996e-06, "train/epoch": 0.0, "_timestamp": 1736102926, "_runtime": 53}, "step": 1} +{"train_info/time_between_train_steps": 0.026253938674926758, "step": 1} +{"info/global_step": 2, "train_info/time_within_train_step": 27.991029739379883, "step": 2} +{"train_info/time_between_train_steps": 0.009650707244873047, "step": 2} +{"info/global_step": 3, "train_info/time_within_train_step": 27.745694637298584, "step": 3} +{"train_info/time_between_train_steps": 0.005402565002441406, "step": 3} +{"info/global_step": 4, "train_info/time_within_train_step": 27.899869203567505, "step": 4} +{"train_info/time_between_train_steps": 0.0054285526275634766, "step": 4} +{"info/global_step": 5, "train_info/time_within_train_step": 27.813738346099854, "step": 5} +{"train_info/time_between_train_steps": 0.005475044250488281, "step": 5} +{"info/global_step": 6, "train_info/time_within_train_step": 27.882979154586792, "step": 6} +{"train_info/time_between_train_steps": 0.0054891109466552734, "step": 6} +{"info/global_step": 7, "train_info/time_within_train_step": 27.762709856033325, "step": 7} +{"train_info/time_between_train_steps": 0.005458354949951172, "step": 7} +{"info/global_step": 8, "train_info/time_within_train_step": 27.862273454666138, "step": 8} +{"train_info/time_between_train_steps": 0.005442380905151367, "step": 8} +{"info/global_step": 9, "train_info/time_within_train_step": 27.74773597717285, "step": 9} +{"train_info/time_between_train_steps": 0.0055654048919677734, "step": 9} +{"info/global_step": 10, "train_info/time_within_train_step": 27.81714105606079, "step": 10} +{"train_info/time_between_train_steps": 0.00519108772277832, "step": 10} +{"info/global_step": 11, "train_info/time_within_train_step": 27.745790243148804, "step": 11} +{"train_info/time_between_train_steps": 0.0052487850189208984, "step": 11} +{"info/global_step": 12, "train_info/time_within_train_step": 27.722702503204346, "step": 12} +{"train_info/time_between_train_steps": 0.0050885677337646484, "step": 12} +{"info/global_step": 13, "train_info/time_within_train_step": 27.714336156845093, "step": 13} +{"train_info/time_between_train_steps": 0.005147457122802734, "step": 13} +{"info/global_step": 14, "train_info/time_within_train_step": 27.78301763534546, "step": 14} +{"train_info/time_between_train_steps": 0.0051457881927490234, "step": 14} +{"info/global_step": 15, "train_info/time_within_train_step": 27.92108678817749, "step": 15} +{"train_info/time_between_train_steps": 0.009785652160644531, "step": 15} +{"info/global_step": 16, "train_info/time_within_train_step": 27.94456195831299, "step": 16} +{"train_info/time_between_train_steps": 0.009927988052368164, "step": 16} +{"info/global_step": 17, "train_info/time_within_train_step": 27.719585418701172, "step": 17} +{"train_info/time_between_train_steps": 0.005074262619018555, "step": 17} +{"info/global_step": 18, "train_info/time_within_train_step": 27.720096111297607, "step": 18} +{"train_info/time_between_train_steps": 0.005132198333740234, "step": 18} +{"info/global_step": 19, "train_info/time_within_train_step": 27.759334802627563, "step": 19} +{"train_info/time_between_train_steps": 0.005214691162109375, "step": 19} +{"info/global_step": 20, "train_info/time_within_train_step": 27.732022762298584, "step": 20} +{"train_info/time_between_train_steps": 0.005163669586181641, "step": 20} +{"info/global_step": 21, "train_info/time_within_train_step": 27.733471870422363, "step": 21} +{"train_info/time_between_train_steps": 0.005465984344482422, "step": 21} +{"info/global_step": 22, "train_info/time_within_train_step": 27.72275733947754, "step": 22} +{"train_info/time_between_train_steps": 0.009939432144165039, "step": 22} +{"info/global_step": 23, "train_info/time_within_train_step": 27.720118761062622, "step": 23} +{"train_info/time_between_train_steps": 0.00516200065612793, "step": 23} +{"info/global_step": 24, "train_info/time_within_train_step": 27.75191879272461, "step": 24} +{"train_info/time_between_train_steps": 0.01003885269165039, "step": 24} +{"info/global_step": 25, "train_info/time_within_train_step": 27.726535320281982, "step": 25} +{"train_info/time_between_train_steps": 0.00529789924621582, "step": 25} +{"info/global_step": 26, "train_info/time_within_train_step": 28.063698530197144, "step": 26} +{"train_info/time_between_train_steps": 0.005324840545654297, "step": 26} +{"info/global_step": 27, "train_info/time_within_train_step": 30.421839952468872, "step": 27} +{"train_info/time_between_train_steps": 0.005215167999267578, "step": 27} +{"info/global_step": 28, "train_info/time_within_train_step": 27.860331296920776, "step": 28} +{"train_info/time_between_train_steps": 0.00554656982421875, "step": 28} +{"info/global_step": 29, "train_info/time_within_train_step": 27.783212661743164, "step": 29} +{"train_info/time_between_train_steps": 0.0058498382568359375, "step": 29} +{"train_info/time_between_train_steps": 28.053426504135132, "step": 29} +{"info/global_step": 30, "train_info/time_within_train_step": 27.784778356552124, "step": 30} +{"train_info/time_between_train_steps": 0.005707740783691406, "step": 30} +{"info/global_step": 31, "train_info/time_within_train_step": 28.08927321434021, "step": 31} +{"train_info/time_between_train_steps": 0.0055921077728271484, "step": 31} +{"info/global_step": 32, "train_info/time_within_train_step": 27.74448251724243, "step": 32} +{"train_info/time_between_train_steps": 0.005652189254760742, "step": 32} +{"info/global_step": 33, "train_info/time_within_train_step": 27.908212900161743, "step": 33} +{"train_info/time_between_train_steps": 0.005631208419799805, "step": 33} +{"info/global_step": 34, "train_info/time_within_train_step": 27.741475820541382, "step": 34} +{"train_info/time_between_train_steps": 0.005792379379272461, "step": 34} +{"info/global_step": 35, "train_info/time_within_train_step": 27.932087182998657, "step": 35} +{"train_info/time_between_train_steps": 0.005433797836303711, "step": 35} +{"info/global_step": 36, "train_info/time_within_train_step": 27.83171558380127, "step": 36} +{"train_info/time_between_train_steps": 0.010277271270751953, "step": 36} +{"info/global_step": 37, "train_info/time_within_train_step": 27.93983244895935, "step": 37} +{"train_info/time_between_train_steps": 0.00529789924621582, "step": 37} +{"info/global_step": 38, "train_info/time_within_train_step": 27.76951766014099, "step": 38} +{"train_info/time_between_train_steps": 0.0059871673583984375, "step": 38} +{"info/global_step": 39, "train_info/time_within_train_step": 27.81393051147461, "step": 39} +{"train_info/time_between_train_steps": 0.005247354507446289, "step": 39} +{"info/global_step": 40, "train_info/time_within_train_step": 27.766051769256592, "step": 40} +{"train_info/time_between_train_steps": 0.0051937103271484375, "step": 40} +{"info/global_step": 41, "train_info/time_within_train_step": 28.452638387680054, "step": 41} +{"train_info/time_between_train_steps": 0.009634733200073242, "step": 41} +{"info/global_step": 42, "train_info/time_within_train_step": 28.819684267044067, "step": 42} +{"train_info/time_between_train_steps": 0.005264997482299805, "step": 42} +{"info/global_step": 43, "train_info/time_within_train_step": 29.354187726974487, "step": 43} +{"train_info/time_between_train_steps": 0.005579948425292969, "step": 43} +{"info/global_step": 44, "train_info/time_within_train_step": 30.516908407211304, "step": 44} +{"train_info/time_between_train_steps": 0.005427837371826172, "step": 44} +{"info/global_step": 45, "train_info/time_within_train_step": 28.155130624771118, "step": 45} +{"train_info/time_between_train_steps": 0.005769491195678711, "step": 45} +{"info/global_step": 46, "train_info/time_within_train_step": 28.054415702819824, "step": 46} +{"train_info/time_between_train_steps": 0.019214391708374023, "step": 46} +{"info/global_step": 47, "train_info/time_within_train_step": 27.773606061935425, "step": 47} +{"train_info/time_between_train_steps": 0.005396127700805664, "step": 47} +{"info/global_step": 48, "train_info/time_within_train_step": 27.837465524673462, "step": 48} +{"train_info/time_between_train_steps": 0.010444879531860352, "step": 48} +{"info/global_step": 49, "train_info/time_within_train_step": 27.863500118255615, "step": 49} +{"train_info/time_between_train_steps": 0.005441904067993164, "step": 49} +{"info/global_step": 50, "train_info/time_within_train_step": 27.75362515449524, "step": 50} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736104337, "_runtime": 1464}, "step": 50} +{"logs": {"train/loss": 8.1689, "train/learning_rate": 0.00025, "train/epoch": 1.02, "_timestamp": 1736104337, "_runtime": 1464}, "step": 50} +{"train_info/time_between_train_steps": 0.03341031074523926, "step": 50} +{"info/global_step": 51, "train_info/time_within_train_step": 27.867873907089233, "step": 51} +{"train_info/time_between_train_steps": 0.0052835941314697266, "step": 51} +{"info/global_step": 52, "train_info/time_within_train_step": 27.749942302703857, "step": 52} +{"train_info/time_between_train_steps": 0.005439281463623047, "step": 52} +{"info/global_step": 53, "train_info/time_within_train_step": 27.76623797416687, "step": 53} +{"train_info/time_between_train_steps": 0.005480527877807617, "step": 53} +{"info/global_step": 54, "train_info/time_within_train_step": 27.959596872329712, "step": 54} +{"train_info/time_between_train_steps": 0.010314702987670898, "step": 54} +{"info/global_step": 55, "train_info/time_within_train_step": 27.902862548828125, "step": 55} +{"train_info/time_between_train_steps": 0.010084152221679688, "step": 55} +{"info/global_step": 56, "train_info/time_within_train_step": 27.949700355529785, "step": 56} +{"train_info/time_between_train_steps": 0.010593652725219727, "step": 56} +{"info/global_step": 57, "train_info/time_within_train_step": 27.957653760910034, "step": 57} +{"train_info/time_between_train_steps": 0.006032228469848633, "step": 57} +{"info/global_step": 58, "train_info/time_within_train_step": 27.883302688598633, "step": 58} +{"train_info/time_between_train_steps": 0.006518125534057617, "step": 58} +{"train_info/time_between_train_steps": 28.583098888397217, "step": 58} +{"info/global_step": 59, "train_info/time_within_train_step": 27.763591289520264, "step": 59} +{"train_info/time_between_train_steps": 0.006171464920043945, "step": 59} +{"info/global_step": 60, "train_info/time_within_train_step": 27.932090044021606, "step": 60} +{"train_info/time_between_train_steps": 0.006047248840332031, "step": 60} +{"info/global_step": 61, "train_info/time_within_train_step": 27.848353147506714, "step": 61} +{"train_info/time_between_train_steps": 0.010370254516601562, "step": 61} +{"info/global_step": 62, "train_info/time_within_train_step": 27.91787052154541, "step": 62} +{"train_info/time_between_train_steps": 0.007394075393676758, "step": 62} +{"info/global_step": 63, "train_info/time_within_train_step": 27.79482340812683, "step": 63} +{"train_info/time_between_train_steps": 0.00561976432800293, "step": 63} +{"info/global_step": 64, "train_info/time_within_train_step": 27.86776089668274, "step": 64} +{"train_info/time_between_train_steps": 0.005394697189331055, "step": 64} +{"info/global_step": 65, "train_info/time_within_train_step": 27.74150776863098, "step": 65} +{"train_info/time_between_train_steps": 0.006419181823730469, "step": 65} +{"info/global_step": 66, "train_info/time_within_train_step": 27.891186237335205, "step": 66} +{"train_info/time_between_train_steps": 0.005555152893066406, "step": 66} +{"info/global_step": 67, "train_info/time_within_train_step": 27.743926286697388, "step": 67} +{"train_info/time_between_train_steps": 0.005396842956542969, "step": 67} +{"info/global_step": 68, "train_info/time_within_train_step": 27.894370794296265, "step": 68} +{"train_info/time_between_train_steps": 0.005622386932373047, "step": 68} +{"info/global_step": 69, "train_info/time_within_train_step": 27.740456342697144, "step": 69} +{"train_info/time_between_train_steps": 0.005275726318359375, "step": 69} +{"info/global_step": 70, "train_info/time_within_train_step": 27.729052543640137, "step": 70} +{"train_info/time_between_train_steps": 0.005235433578491211, "step": 70} +{"info/global_step": 71, "train_info/time_within_train_step": 27.760767936706543, "step": 71} +{"train_info/time_between_train_steps": 0.0053713321685791016, "step": 71} +{"info/global_step": 72, "train_info/time_within_train_step": 27.73664355278015, "step": 72} +{"train_info/time_between_train_steps": 0.0051190853118896484, "step": 72} +{"info/global_step": 73, "train_info/time_within_train_step": 27.73826265335083, "step": 73} +{"train_info/time_between_train_steps": 0.006188392639160156, "step": 73} +{"info/global_step": 74, "train_info/time_within_train_step": 27.7374324798584, "step": 74} +{"train_info/time_between_train_steps": 0.0051746368408203125, "step": 74} +{"info/global_step": 75, "train_info/time_within_train_step": 27.780307054519653, "step": 75} +{"train_info/time_between_train_steps": 0.005582332611083984, "step": 75} +{"info/global_step": 76, "train_info/time_within_train_step": 27.765741109848022, "step": 76} +{"train_info/time_between_train_steps": 0.005120038986206055, "step": 76} +{"info/global_step": 77, "train_info/time_within_train_step": 27.842517137527466, "step": 77} +{"train_info/time_between_train_steps": 0.005292415618896484, "step": 77} +{"info/global_step": 78, "train_info/time_within_train_step": 27.74804663658142, "step": 78} +{"train_info/time_between_train_steps": 0.005303382873535156, "step": 78} +{"info/global_step": 79, "train_info/time_within_train_step": 27.74695134162903, "step": 79} +{"train_info/time_between_train_steps": 0.0051801204681396484, "step": 79} +{"info/global_step": 80, "train_info/time_within_train_step": 27.734277486801147, "step": 80} +{"train_info/time_between_train_steps": 0.005166292190551758, "step": 80} +{"info/global_step": 81, "train_info/time_within_train_step": 27.73421812057495, "step": 81} +{"train_info/time_between_train_steps": 0.005282878875732422, "step": 81} +{"info/global_step": 82, "train_info/time_within_train_step": 27.72254228591919, "step": 82} +{"train_info/time_between_train_steps": 0.005378246307373047, "step": 82} +{"info/global_step": 83, "train_info/time_within_train_step": 27.72160243988037, "step": 83} +{"train_info/time_between_train_steps": 0.005222797393798828, "step": 83} +{"info/global_step": 84, "train_info/time_within_train_step": 27.722479820251465, "step": 84} +{"train_info/time_between_train_steps": 0.0053005218505859375, "step": 84} +{"info/global_step": 85, "train_info/time_within_train_step": 27.767858743667603, "step": 85} +{"train_info/time_between_train_steps": 0.005214214324951172, "step": 85} +{"info/global_step": 86, "train_info/time_within_train_step": 27.764195919036865, "step": 86} +{"train_info/time_between_train_steps": 0.005716562271118164, "step": 86} +{"info/global_step": 87, "train_info/time_within_train_step": 27.74385666847229, "step": 87} +{"train_info/time_between_train_steps": 0.006046772003173828, "step": 87} +{"train_info/time_between_train_steps": 28.428340673446655, "step": 87} +{"info/global_step": 88, "train_info/time_within_train_step": 27.723411321640015, "step": 88} +{"train_info/time_between_train_steps": 0.004894256591796875, "step": 88} +{"info/global_step": 89, "train_info/time_within_train_step": 27.911831617355347, "step": 89} +{"train_info/time_between_train_steps": 0.005771636962890625, "step": 89} +{"info/global_step": 90, "train_info/time_within_train_step": 27.776283264160156, "step": 90} +{"train_info/time_between_train_steps": 0.006305694580078125, "step": 90} +{"info/global_step": 91, "train_info/time_within_train_step": 28.17097234725952, "step": 91} +{"train_info/time_between_train_steps": 0.005532264709472656, "step": 91} +{"info/global_step": 92, "train_info/time_within_train_step": 27.874319314956665, "step": 92} +{"train_info/time_between_train_steps": 0.00569462776184082, "step": 92} +{"info/global_step": 93, "train_info/time_within_train_step": 27.936763286590576, "step": 93} +{"train_info/time_between_train_steps": 0.005503654479980469, "step": 93} +{"info/global_step": 94, "train_info/time_within_train_step": 27.75070357322693, "step": 94} +{"train_info/time_between_train_steps": 0.006264925003051758, "step": 94} +{"info/global_step": 95, "train_info/time_within_train_step": 27.93393349647522, "step": 95} +{"train_info/time_between_train_steps": 0.005316495895385742, "step": 95} +{"info/global_step": 96, "train_info/time_within_train_step": 27.762370824813843, "step": 96} +{"train_info/time_between_train_steps": 0.0058019161224365234, "step": 96} +{"info/global_step": 97, "train_info/time_within_train_step": 27.806018590927124, "step": 97} +{"train_info/time_between_train_steps": 0.005433320999145508, "step": 97} +{"info/global_step": 98, "train_info/time_within_train_step": 27.75100874900818, "step": 98} +{"train_info/time_between_train_steps": 0.005246162414550781, "step": 98} +{"info/global_step": 99, "train_info/time_within_train_step": 27.735946893692017, "step": 99} +{"train_info/time_between_train_steps": 0.005379676818847656, "step": 99} +{"info/global_step": 100, "train_info/time_within_train_step": 27.71377444267273, "step": 100} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736105786, "_runtime": 2913}, "step": 100} +{"logs": {"train/loss": 6.7154, "train/learning_rate": 0.0005, "train/epoch": 3.01, "_timestamp": 1736105786, "_runtime": 2913}, "step": 100} +{"train_info/time_between_train_steps": 79.62079381942749, "step": 100} +{"info/global_step": 101, "train_info/time_within_train_step": 27.759036540985107, "step": 101} +{"train_info/time_between_train_steps": 0.005654811859130859, "step": 101} +{"info/global_step": 102, "train_info/time_within_train_step": 27.781761646270752, "step": 102} +{"train_info/time_between_train_steps": 0.005449533462524414, "step": 102} +{"info/global_step": 103, "train_info/time_within_train_step": 27.75059485435486, "step": 103} +{"train_info/time_between_train_steps": 0.005269289016723633, "step": 103} +{"info/global_step": 104, "train_info/time_within_train_step": 27.75866675376892, "step": 104} +{"train_info/time_between_train_steps": 0.0059354305267333984, "step": 104} +{"info/global_step": 105, "train_info/time_within_train_step": 27.725574254989624, "step": 105} +{"train_info/time_between_train_steps": 0.0056116580963134766, "step": 105} +{"info/global_step": 106, "train_info/time_within_train_step": 27.738564252853394, "step": 106} +{"train_info/time_between_train_steps": 0.005216836929321289, "step": 106} +{"info/global_step": 107, "train_info/time_within_train_step": 27.825838565826416, "step": 107} +{"train_info/time_between_train_steps": 0.005284786224365234, "step": 107} +{"info/global_step": 108, "train_info/time_within_train_step": 27.728872776031494, "step": 108} +{"train_info/time_between_train_steps": 0.005298137664794922, "step": 108} +{"info/global_step": 109, "train_info/time_within_train_step": 27.71882200241089, "step": 109} +{"train_info/time_between_train_steps": 0.005236625671386719, "step": 109} +{"info/global_step": 110, "train_info/time_within_train_step": 27.72839117050171, "step": 110} +{"train_info/time_between_train_steps": 0.005348920822143555, "step": 110} +{"info/global_step": 111, "train_info/time_within_train_step": 27.72769832611084, "step": 111} +{"train_info/time_between_train_steps": 0.0053174495697021484, "step": 111} +{"info/global_step": 112, "train_info/time_within_train_step": 27.772429943084717, "step": 112} +{"train_info/time_between_train_steps": 0.005191326141357422, "step": 112} +{"info/global_step": 113, "train_info/time_within_train_step": 27.725492477416992, "step": 113} +{"train_info/time_between_train_steps": 0.005441188812255859, "step": 113} +{"info/global_step": 114, "train_info/time_within_train_step": 27.730209589004517, "step": 114} +{"train_info/time_between_train_steps": 0.005284309387207031, "step": 114} +{"info/global_step": 115, "train_info/time_within_train_step": 27.741047382354736, "step": 115} +{"train_info/time_between_train_steps": 0.006103038787841797, "step": 115} +{"info/global_step": 116, "train_info/time_within_train_step": 27.741374731063843, "step": 116} +{"train_info/time_between_train_steps": 0.006026744842529297, "step": 116} +{"train_info/time_between_train_steps": 28.094560861587524, "step": 116} +{"info/global_step": 117, "train_info/time_within_train_step": 27.73029136657715, "step": 117} +{"train_info/time_between_train_steps": 0.0050165653228759766, "step": 117} +{"info/global_step": 118, "train_info/time_within_train_step": 27.834237813949585, "step": 118} +{"train_info/time_between_train_steps": 0.005022764205932617, "step": 118} +{"info/global_step": 119, "train_info/time_within_train_step": 27.738675355911255, "step": 119} +{"train_info/time_between_train_steps": 0.005459308624267578, "step": 119} +{"info/global_step": 120, "train_info/time_within_train_step": 28.124236822128296, "step": 120} +{"train_info/time_between_train_steps": 0.005507707595825195, "step": 120} +{"info/global_step": 121, "train_info/time_within_train_step": 27.761492252349854, "step": 121} +{"train_info/time_between_train_steps": 0.0054857730865478516, "step": 121} +{"info/global_step": 122, "train_info/time_within_train_step": 28.061983346939087, "step": 122} +{"train_info/time_between_train_steps": 0.005474090576171875, "step": 122} +{"info/global_step": 123, "train_info/time_within_train_step": 27.73088574409485, "step": 123} +{"train_info/time_between_train_steps": 0.005324840545654297, "step": 123} +{"info/global_step": 124, "train_info/time_within_train_step": 27.99854016304016, "step": 124} +{"train_info/time_between_train_steps": 0.005518198013305664, "step": 124} +{"info/global_step": 125, "train_info/time_within_train_step": 27.757014513015747, "step": 125} +{"train_info/time_between_train_steps": 0.005232334136962891, "step": 125} +{"info/global_step": 126, "train_info/time_within_train_step": 27.808355569839478, "step": 126} +{"train_info/time_between_train_steps": 0.005500078201293945, "step": 126} +{"info/global_step": 127, "train_info/time_within_train_step": 27.74475646018982, "step": 127} +{"train_info/time_between_train_steps": 0.005266666412353516, "step": 127} +{"info/global_step": 128, "train_info/time_within_train_step": 28.25192904472351, "step": 128} +{"train_info/time_between_train_steps": 0.0055735111236572266, "step": 128} +{"info/global_step": 129, "train_info/time_within_train_step": 27.796892166137695, "step": 129} +{"train_info/time_between_train_steps": 0.010201215744018555, "step": 129} +{"info/global_step": 130, "train_info/time_within_train_step": 27.835719108581543, "step": 130} +{"train_info/time_between_train_steps": 0.005122661590576172, "step": 130} +{"info/global_step": 131, "train_info/time_within_train_step": 27.75009274482727, "step": 131} +{"train_info/time_between_train_steps": 0.005097150802612305, "step": 131} +{"info/global_step": 132, "train_info/time_within_train_step": 27.771708965301514, "step": 132} +{"train_info/time_between_train_steps": 0.005223989486694336, "step": 132} +{"info/global_step": 133, "train_info/time_within_train_step": 27.762232780456543, "step": 133} +{"train_info/time_between_train_steps": 0.005188941955566406, "step": 133} +{"info/global_step": 134, "train_info/time_within_train_step": 27.719998836517334, "step": 134} +{"train_info/time_between_train_steps": 0.005258083343505859, "step": 134} +{"info/global_step": 135, "train_info/time_within_train_step": 27.72526216506958, "step": 135} +{"train_info/time_between_train_steps": 0.00516819953918457, "step": 135} +{"info/global_step": 136, "train_info/time_within_train_step": 27.72535991668701, "step": 136} +{"train_info/time_between_train_steps": 0.0055675506591796875, "step": 136} +{"info/global_step": 137, "train_info/time_within_train_step": 27.75227189064026, "step": 137} +{"train_info/time_between_train_steps": 0.0051805973052978516, "step": 137} +{"info/global_step": 138, "train_info/time_within_train_step": 28.033772706985474, "step": 138} +{"train_info/time_between_train_steps": 0.005335330963134766, "step": 138} +{"info/global_step": 139, "train_info/time_within_train_step": 28.342337131500244, "step": 139} +{"train_info/time_between_train_steps": 0.00539398193359375, "step": 139} +{"info/global_step": 140, "train_info/time_within_train_step": 27.74720048904419, "step": 140} +{"train_info/time_between_train_steps": 0.00811147689819336, "step": 140} +{"info/global_step": 141, "train_info/time_within_train_step": 27.874452352523804, "step": 141} +{"train_info/time_between_train_steps": 0.006790637969970703, "step": 141} +{"info/global_step": 142, "train_info/time_within_train_step": 27.823111534118652, "step": 142} +{"train_info/time_between_train_steps": 0.006436824798583984, "step": 142} +{"info/global_step": 143, "train_info/time_within_train_step": 27.83260202407837, "step": 143} +{"train_info/time_between_train_steps": 0.0057909488677978516, "step": 143} +{"info/global_step": 144, "train_info/time_within_train_step": 27.78461194038391, "step": 144} +{"train_info/time_between_train_steps": 0.00628662109375, "step": 144} +{"info/global_step": 145, "train_info/time_within_train_step": 27.848263263702393, "step": 145} +{"train_info/time_between_train_steps": 0.007349491119384766, "step": 145} +{"train_info/time_between_train_steps": 28.399585008621216, "step": 145} +{"info/global_step": 146, "train_info/time_within_train_step": 27.797995567321777, "step": 146} +{"train_info/time_between_train_steps": 0.0059239864349365234, "step": 146} +{"info/global_step": 147, "train_info/time_within_train_step": 27.93769931793213, "step": 147} +{"train_info/time_between_train_steps": 0.00553584098815918, "step": 147} +{"info/global_step": 148, "train_info/time_within_train_step": 27.748742818832397, "step": 148} +{"train_info/time_between_train_steps": 0.005318403244018555, "step": 148} +{"info/global_step": 149, "train_info/time_within_train_step": 27.966681718826294, "step": 149} +{"train_info/time_between_train_steps": 0.0067980289459228516, "step": 149} +{"info/global_step": 150, "train_info/time_within_train_step": 27.734045267105103, "step": 150} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736107315, "_runtime": 4442}, "step": 150} +{"logs": {"train/loss": 6.5645, "train/learning_rate": 0.0005833333333333333, "train/epoch": 5.0, "_timestamp": 1736107315, "_runtime": 4442}, "step": 150} +{"train_info/time_between_train_steps": 0.07338523864746094, "step": 150} +{"info/global_step": 151, "train_info/time_within_train_step": 27.86878228187561, "step": 151} +{"train_info/time_between_train_steps": 0.0052607059478759766, "step": 151} +{"info/global_step": 152, "train_info/time_within_train_step": 27.85098123550415, "step": 152} +{"train_info/time_between_train_steps": 0.005735158920288086, "step": 152} +{"info/global_step": 153, "train_info/time_within_train_step": 27.915982246398926, "step": 153} +{"train_info/time_between_train_steps": 0.005349159240722656, "step": 153} +{"info/global_step": 154, "train_info/time_within_train_step": 27.724554777145386, "step": 154} +{"train_info/time_between_train_steps": 0.005312442779541016, "step": 154} +{"info/global_step": 155, "train_info/time_within_train_step": 27.80138397216797, "step": 155} +{"train_info/time_between_train_steps": 0.005536794662475586, "step": 155} +{"info/global_step": 156, "train_info/time_within_train_step": 27.770992755889893, "step": 156} +{"train_info/time_between_train_steps": 0.005159854888916016, "step": 156} +{"info/global_step": 157, "train_info/time_within_train_step": 27.744436740875244, "step": 157} +{"train_info/time_between_train_steps": 0.00508570671081543, "step": 157} +{"info/global_step": 158, "train_info/time_within_train_step": 27.711244344711304, "step": 158} +{"train_info/time_between_train_steps": 0.005217790603637695, "step": 158} +{"info/global_step": 159, "train_info/time_within_train_step": 27.726192951202393, "step": 159} +{"train_info/time_between_train_steps": 0.005194664001464844, "step": 159} +{"info/global_step": 160, "train_info/time_within_train_step": 27.72129225730896, "step": 160} +{"train_info/time_between_train_steps": 0.005166530609130859, "step": 160} +{"info/global_step": 161, "train_info/time_within_train_step": 27.73843765258789, "step": 161} +{"train_info/time_between_train_steps": 0.005304098129272461, "step": 161} +{"info/global_step": 162, "train_info/time_within_train_step": 27.758819341659546, "step": 162} +{"train_info/time_between_train_steps": 0.005319356918334961, "step": 162} +{"info/global_step": 163, "train_info/time_within_train_step": 27.719062089920044, "step": 163} +{"train_info/time_between_train_steps": 0.0052373409271240234, "step": 163} +{"info/global_step": 164, "train_info/time_within_train_step": 27.750454664230347, "step": 164} +{"train_info/time_between_train_steps": 0.005303859710693359, "step": 164} +{"info/global_step": 165, "train_info/time_within_train_step": 27.76353120803833, "step": 165} +{"train_info/time_between_train_steps": 0.005539894104003906, "step": 165} +{"info/global_step": 166, "train_info/time_within_train_step": 27.77706289291382, "step": 166} +{"train_info/time_between_train_steps": 0.0051937103271484375, "step": 166} +{"info/global_step": 167, "train_info/time_within_train_step": 27.736335039138794, "step": 167} +{"train_info/time_between_train_steps": 0.0050890445709228516, "step": 167} +{"info/global_step": 168, "train_info/time_within_train_step": 27.87794518470764, "step": 168} +{"train_info/time_between_train_steps": 0.0052814483642578125, "step": 168} +{"info/global_step": 169, "train_info/time_within_train_step": 27.743541955947876, "step": 169} +{"train_info/time_between_train_steps": 0.00522613525390625, "step": 169} +{"info/global_step": 170, "train_info/time_within_train_step": 27.73881459236145, "step": 170} +{"train_info/time_between_train_steps": 0.005157470703125, "step": 170} +{"info/global_step": 171, "train_info/time_within_train_step": 27.777475595474243, "step": 171} +{"train_info/time_between_train_steps": 0.0055408477783203125, "step": 171} +{"info/global_step": 172, "train_info/time_within_train_step": 27.780166149139404, "step": 172} +{"train_info/time_between_train_steps": 0.0055925846099853516, "step": 172} +{"info/global_step": 173, "train_info/time_within_train_step": 27.786925077438354, "step": 173} +{"train_info/time_between_train_steps": 0.0060732364654541016, "step": 173} +{"info/global_step": 174, "train_info/time_within_train_step": 27.75854992866516, "step": 174} +{"train_info/time_between_train_steps": 0.0061261653900146484, "step": 174} +{"train_info/time_between_train_steps": 28.319183349609375, "step": 174} +{"info/global_step": 175, "train_info/time_within_train_step": 27.77672290802002, "step": 175} +{"train_info/time_between_train_steps": 0.005820512771606445, "step": 175} +{"info/global_step": 176, "train_info/time_within_train_step": 27.962001085281372, "step": 176} +{"train_info/time_between_train_steps": 0.005552768707275391, "step": 176} +{"info/global_step": 177, "train_info/time_within_train_step": 27.738496780395508, "step": 177} +{"train_info/time_between_train_steps": 0.00567173957824707, "step": 177} +{"info/global_step": 178, "train_info/time_within_train_step": 27.932852268218994, "step": 178} +{"train_info/time_between_train_steps": 0.0053784847259521484, "step": 178} +{"info/global_step": 179, "train_info/time_within_train_step": 27.752604246139526, "step": 179} +{"train_info/time_between_train_steps": 0.005394697189331055, "step": 179} +{"info/global_step": 180, "train_info/time_within_train_step": 27.9468891620636, "step": 180} +{"train_info/time_between_train_steps": 0.005414009094238281, "step": 180} +{"info/global_step": 181, "train_info/time_within_train_step": 27.745302200317383, "step": 181} +{"train_info/time_between_train_steps": 0.005718231201171875, "step": 181} +{"info/global_step": 182, "train_info/time_within_train_step": 27.92209792137146, "step": 182} +{"train_info/time_between_train_steps": 0.0052602291107177734, "step": 182} +{"info/global_step": 183, "train_info/time_within_train_step": 27.845903873443604, "step": 183} +{"train_info/time_between_train_steps": 0.009853124618530273, "step": 183} +{"info/global_step": 184, "train_info/time_within_train_step": 27.817695140838623, "step": 184} +{"train_info/time_between_train_steps": 0.005445718765258789, "step": 184} +{"info/global_step": 185, "train_info/time_within_train_step": 27.739441871643066, "step": 185} +{"train_info/time_between_train_steps": 0.0050618648529052734, "step": 185} +{"info/global_step": 186, "train_info/time_within_train_step": 27.811342477798462, "step": 186} +{"train_info/time_between_train_steps": 0.005156040191650391, "step": 186} +{"info/global_step": 187, "train_info/time_within_train_step": 27.72360372543335, "step": 187} +{"train_info/time_between_train_steps": 0.005184650421142578, "step": 187} +{"info/global_step": 188, "train_info/time_within_train_step": 27.72288227081299, "step": 188} +{"train_info/time_between_train_steps": 0.0051348209381103516, "step": 188} +{"info/global_step": 189, "train_info/time_within_train_step": 27.730265140533447, "step": 189} +{"train_info/time_between_train_steps": 0.005137443542480469, "step": 189} +{"info/global_step": 190, "train_info/time_within_train_step": 27.72019100189209, "step": 190} +{"train_info/time_between_train_steps": 0.0052089691162109375, "step": 190} +{"info/global_step": 191, "train_info/time_within_train_step": 27.729403018951416, "step": 191} +{"train_info/time_between_train_steps": 0.0051081180572509766, "step": 191} +{"info/global_step": 192, "train_info/time_within_train_step": 27.72902750968933, "step": 192} +{"train_info/time_between_train_steps": 0.005213022232055664, "step": 192} +{"info/global_step": 193, "train_info/time_within_train_step": 27.860586404800415, "step": 193} +{"train_info/time_between_train_steps": 0.005122661590576172, "step": 193} +{"info/global_step": 194, "train_info/time_within_train_step": 27.710824251174927, "step": 194} +{"train_info/time_between_train_steps": 0.005263328552246094, "step": 194} +{"info/global_step": 195, "train_info/time_within_train_step": 27.743626594543457, "step": 195} +{"train_info/time_between_train_steps": 0.005143165588378906, "step": 195} +{"info/global_step": 196, "train_info/time_within_train_step": 27.73236584663391, "step": 196} +{"train_info/time_between_train_steps": 0.005980014801025391, "step": 196} +{"info/global_step": 197, "train_info/time_within_train_step": 27.732630968093872, "step": 197} +{"train_info/time_between_train_steps": 0.005179405212402344, "step": 197} +{"info/global_step": 198, "train_info/time_within_train_step": 27.81360363960266, "step": 198} +{"train_info/time_between_train_steps": 0.009272098541259766, "step": 198} +{"info/global_step": 199, "train_info/time_within_train_step": 27.72666883468628, "step": 199} +{"train_info/time_between_train_steps": 0.0052509307861328125, "step": 199} +{"info/global_step": 200, "train_info/time_within_train_step": 27.805405616760254, "step": 200} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736108760, "_runtime": 5887}, "step": 200} +{"logs": {"train/loss": 6.3217, "train/learning_rate": 0.0005555555555555556, "train/epoch": 6.02, "_timestamp": 1736108760, "_runtime": 5887}, "step": 200} +{"train_info/time_between_train_steps": 206.34052920341492, "step": 200} +{"info/global_step": 201, "train_info/time_within_train_step": 27.92276883125305, "step": 201} +{"train_info/time_between_train_steps": 0.005643129348754883, "step": 201} +{"info/global_step": 202, "train_info/time_within_train_step": 27.82884454727173, "step": 202} +{"train_info/time_between_train_steps": 0.006145954132080078, "step": 202} +{"info/global_step": 203, "train_info/time_within_train_step": 27.78890585899353, "step": 203} +{"train_info/time_between_train_steps": 0.007402896881103516, "step": 203} +{"train_info/time_between_train_steps": 28.21078896522522, "step": 203} +{"info/global_step": 204, "train_info/time_within_train_step": 27.92270302772522, "step": 204} +{"train_info/time_between_train_steps": 0.0056934356689453125, "step": 204} +{"info/global_step": 205, "train_info/time_within_train_step": 27.91774082183838, "step": 205} +{"train_info/time_between_train_steps": 0.00545811653137207, "step": 205} +{"info/global_step": 206, "train_info/time_within_train_step": 27.763643264770508, "step": 206} +{"train_info/time_between_train_steps": 0.005919456481933594, "step": 206} +{"info/global_step": 207, "train_info/time_within_train_step": 28.007978677749634, "step": 207} +{"train_info/time_between_train_steps": 0.00553131103515625, "step": 207} +{"info/global_step": 208, "train_info/time_within_train_step": 27.78055763244629, "step": 208} +{"train_info/time_between_train_steps": 0.005676746368408203, "step": 208} +{"info/global_step": 209, "train_info/time_within_train_step": 27.92908024787903, "step": 209} +{"train_info/time_between_train_steps": 0.009978055953979492, "step": 209} +{"info/global_step": 210, "train_info/time_within_train_step": 27.758219003677368, "step": 210} +{"train_info/time_between_train_steps": 0.0056498050689697266, "step": 210} +{"info/global_step": 211, "train_info/time_within_train_step": 27.960832595825195, "step": 211} +{"train_info/time_between_train_steps": 0.005637407302856445, "step": 211} +{"info/global_step": 212, "train_info/time_within_train_step": 27.771265506744385, "step": 212} +{"train_info/time_between_train_steps": 0.005550384521484375, "step": 212} +{"info/global_step": 213, "train_info/time_within_train_step": 27.972331047058105, "step": 213} +{"train_info/time_between_train_steps": 0.0054399967193603516, "step": 213} +{"info/global_step": 214, "train_info/time_within_train_step": 27.761388063430786, "step": 214} +{"train_info/time_between_train_steps": 0.0050601959228515625, "step": 214} +{"info/global_step": 215, "train_info/time_within_train_step": 27.74384355545044, "step": 215} +{"train_info/time_between_train_steps": 0.005112409591674805, "step": 215} +{"info/global_step": 216, "train_info/time_within_train_step": 27.72466206550598, "step": 216} +{"train_info/time_between_train_steps": 0.0052032470703125, "step": 216} +{"info/global_step": 217, "train_info/time_within_train_step": 27.735950231552124, "step": 217} +{"train_info/time_between_train_steps": 0.005407571792602539, "step": 217} +{"info/global_step": 218, "train_info/time_within_train_step": 27.726083278656006, "step": 218} +{"train_info/time_between_train_steps": 0.005055904388427734, "step": 218} +{"info/global_step": 219, "train_info/time_within_train_step": 27.715848207473755, "step": 219} +{"train_info/time_between_train_steps": 0.005186557769775391, "step": 219} +{"info/global_step": 220, "train_info/time_within_train_step": 27.710731267929077, "step": 220} +{"train_info/time_between_train_steps": 0.0051348209381103516, "step": 220} +{"info/global_step": 221, "train_info/time_within_train_step": 27.725656747817993, "step": 221} +{"train_info/time_between_train_steps": 0.005222320556640625, "step": 221} +{"info/global_step": 222, "train_info/time_within_train_step": 27.71819233894348, "step": 222} +{"train_info/time_between_train_steps": 0.005190610885620117, "step": 222} +{"info/global_step": 223, "train_info/time_within_train_step": 27.744506359100342, "step": 223} +{"train_info/time_between_train_steps": 0.00565338134765625, "step": 223} +{"info/global_step": 224, "train_info/time_within_train_step": 36.08468437194824, "step": 224} +{"train_info/time_between_train_steps": 0.006378889083862305, "step": 224} +{"info/global_step": 225, "train_info/time_within_train_step": 27.772982835769653, "step": 225} +{"train_info/time_between_train_steps": 0.005341053009033203, "step": 225} +{"info/global_step": 226, "train_info/time_within_train_step": 27.772937297821045, "step": 226} +{"train_info/time_between_train_steps": 0.006145477294921875, "step": 226} +{"info/global_step": 227, "train_info/time_within_train_step": 27.78510046005249, "step": 227} +{"train_info/time_between_train_steps": 0.005426883697509766, "step": 227} +{"info/global_step": 228, "train_info/time_within_train_step": 27.769232034683228, "step": 228} +{"train_info/time_between_train_steps": 0.005529642105102539, "step": 228} +{"info/global_step": 229, "train_info/time_within_train_step": 27.893393993377686, "step": 229} +{"train_info/time_between_train_steps": 0.005896091461181641, "step": 229} +{"info/global_step": 230, "train_info/time_within_train_step": 28.354841232299805, "step": 230} +{"train_info/time_between_train_steps": 0.005635738372802734, "step": 230} +{"info/global_step": 231, "train_info/time_within_train_step": 28.25398850440979, "step": 231} +{"train_info/time_between_train_steps": 0.00662994384765625, "step": 231} +{"info/global_step": 232, "train_info/time_within_train_step": 38.66360807418823, "step": 232} +{"train_info/time_between_train_steps": 0.006052970886230469, "step": 232} +{"train_info/time_between_train_steps": 28.70667338371277, "step": 232} +{"info/global_step": 233, "train_info/time_within_train_step": 27.73429775238037, "step": 233} +{"train_info/time_between_train_steps": 0.005736827850341797, "step": 233} +{"info/global_step": 234, "train_info/time_within_train_step": 27.913795709609985, "step": 234} +{"train_info/time_between_train_steps": 0.00546717643737793, "step": 234} +{"info/global_step": 235, "train_info/time_within_train_step": 27.781367301940918, "step": 235} +{"train_info/time_between_train_steps": 0.005733489990234375, "step": 235} +{"info/global_step": 236, "train_info/time_within_train_step": 27.93490195274353, "step": 236} +{"train_info/time_between_train_steps": 0.00563502311706543, "step": 236} +{"info/global_step": 237, "train_info/time_within_train_step": 27.74292516708374, "step": 237} +{"train_info/time_between_train_steps": 0.005715131759643555, "step": 237} +{"info/global_step": 238, "train_info/time_within_train_step": 27.9000141620636, "step": 238} +{"train_info/time_between_train_steps": 0.005484342575073242, "step": 238} +{"info/global_step": 239, "train_info/time_within_train_step": 27.7567138671875, "step": 239} +{"train_info/time_between_train_steps": 0.005501508712768555, "step": 239} +{"info/global_step": 240, "train_info/time_within_train_step": 27.932506561279297, "step": 240} +{"train_info/time_between_train_steps": 0.005479335784912109, "step": 240} +{"info/global_step": 241, "train_info/time_within_train_step": 27.756436586380005, "step": 241} +{"train_info/time_between_train_steps": 0.005633831024169922, "step": 241} +{"info/global_step": 242, "train_info/time_within_train_step": 27.832621335983276, "step": 242} +{"train_info/time_between_train_steps": 0.006344795227050781, "step": 242} +{"info/global_step": 243, "train_info/time_within_train_step": 27.89927911758423, "step": 243} +{"train_info/time_between_train_steps": 0.005445003509521484, "step": 243} +{"info/global_step": 244, "train_info/time_within_train_step": 27.738786458969116, "step": 244} +{"train_info/time_between_train_steps": 0.005144834518432617, "step": 244} +{"info/global_step": 245, "train_info/time_within_train_step": 27.7232608795166, "step": 245} +{"train_info/time_between_train_steps": 0.01000523567199707, "step": 245} +{"info/global_step": 246, "train_info/time_within_train_step": 27.71970272064209, "step": 246} +{"train_info/time_between_train_steps": 0.005407094955444336, "step": 246} +{"info/global_step": 247, "train_info/time_within_train_step": 27.711431741714478, "step": 247} +{"train_info/time_between_train_steps": 0.00503230094909668, "step": 247} +{"info/global_step": 248, "train_info/time_within_train_step": 27.7350652217865, "step": 248} +{"train_info/time_between_train_steps": 0.005288124084472656, "step": 248} +{"info/global_step": 249, "train_info/time_within_train_step": 27.729700565338135, "step": 249} +{"train_info/time_between_train_steps": 0.0050241947174072266, "step": 249} +{"info/global_step": 250, "train_info/time_within_train_step": 27.730565071105957, "step": 250} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736110422, "_runtime": 7549}, "step": 250} +{"logs": {"train/loss": 6.2449, "train/learning_rate": 0.0005277777777777777, "train/epoch": 8.02, "_timestamp": 1736110422, "_runtime": 7549}, "step": 250} +{"train_info/time_between_train_steps": 0.026482820510864258, "step": 250} +{"info/global_step": 251, "train_info/time_within_train_step": 27.729074239730835, "step": 251} +{"train_info/time_between_train_steps": 0.005951881408691406, "step": 251} +{"info/global_step": 252, "train_info/time_within_train_step": 27.718767642974854, "step": 252} +{"train_info/time_between_train_steps": 0.0054607391357421875, "step": 252} +{"info/global_step": 253, "train_info/time_within_train_step": 27.71106266975403, "step": 253} +{"train_info/time_between_train_steps": 0.0070378780364990234, "step": 253} +{"info/global_step": 254, "train_info/time_within_train_step": 27.717467784881592, "step": 254} +{"train_info/time_between_train_steps": 0.005154132843017578, "step": 254} +{"info/global_step": 255, "train_info/time_within_train_step": 27.759243488311768, "step": 255} +{"train_info/time_between_train_steps": 0.005095243453979492, "step": 255} +{"info/global_step": 256, "train_info/time_within_train_step": 27.801512718200684, "step": 256} +{"train_info/time_between_train_steps": 0.014261245727539062, "step": 256} +{"info/global_step": 257, "train_info/time_within_train_step": 28.137711763381958, "step": 257} +{"train_info/time_between_train_steps": 0.009609699249267578, "step": 257} +{"info/global_step": 258, "train_info/time_within_train_step": 27.86030101776123, "step": 258} +{"train_info/time_between_train_steps": 0.005121946334838867, "step": 258} +{"info/global_step": 259, "train_info/time_within_train_step": 27.855117082595825, "step": 259} +{"train_info/time_between_train_steps": 0.005084037780761719, "step": 259} +{"info/global_step": 260, "train_info/time_within_train_step": 27.74164581298828, "step": 260} +{"train_info/time_between_train_steps": 0.005628824234008789, "step": 260} +{"info/global_step": 261, "train_info/time_within_train_step": 27.749576807022095, "step": 261} +{"train_info/time_between_train_steps": 0.006845712661743164, "step": 261} +{"train_info/time_between_train_steps": 28.21044921875, "step": 261} +{"info/global_step": 262, "train_info/time_within_train_step": 27.735482692718506, "step": 262} +{"train_info/time_between_train_steps": 0.005371570587158203, "step": 262} +{"info/global_step": 263, "train_info/time_within_train_step": 27.879475116729736, "step": 263} +{"train_info/time_between_train_steps": 0.0053670406341552734, "step": 263} +{"info/global_step": 264, "train_info/time_within_train_step": 27.723334312438965, "step": 264} +{"train_info/time_between_train_steps": 0.005312919616699219, "step": 264} +{"info/global_step": 265, "train_info/time_within_train_step": 27.826374053955078, "step": 265} +{"train_info/time_between_train_steps": 0.005191326141357422, "step": 265} +{"info/global_step": 266, "train_info/time_within_train_step": 27.73114514350891, "step": 266} +{"train_info/time_between_train_steps": 0.006291627883911133, "step": 266} +{"info/global_step": 267, "train_info/time_within_train_step": 27.889260292053223, "step": 267} +{"train_info/time_between_train_steps": 0.005429506301879883, "step": 267} +{"info/global_step": 268, "train_info/time_within_train_step": 27.81155824661255, "step": 268} +{"train_info/time_between_train_steps": 0.005522727966308594, "step": 268} +{"info/global_step": 269, "train_info/time_within_train_step": 27.864636659622192, "step": 269} +{"train_info/time_between_train_steps": 0.0051403045654296875, "step": 269} +{"info/global_step": 270, "train_info/time_within_train_step": 27.72202777862549, "step": 270} +{"train_info/time_between_train_steps": 0.00510406494140625, "step": 270} +{"info/global_step": 271, "train_info/time_within_train_step": 27.885268926620483, "step": 271} +{"train_info/time_between_train_steps": 0.009245157241821289, "step": 271} +{"info/global_step": 272, "train_info/time_within_train_step": 27.73064160346985, "step": 272} +{"train_info/time_between_train_steps": 0.005076169967651367, "step": 272} +{"info/global_step": 273, "train_info/time_within_train_step": 27.760785579681396, "step": 273} +{"train_info/time_between_train_steps": 0.005150556564331055, "step": 273} +{"info/global_step": 274, "train_info/time_within_train_step": 27.82705855369568, "step": 274} +{"train_info/time_between_train_steps": 0.005051374435424805, "step": 274} +{"info/global_step": 275, "train_info/time_within_train_step": 27.708194494247437, "step": 275} +{"train_info/time_between_train_steps": 0.005002498626708984, "step": 275} +{"info/global_step": 276, "train_info/time_within_train_step": 27.729896068572998, "step": 276} +{"train_info/time_between_train_steps": 0.0050470829010009766, "step": 276} +{"info/global_step": 277, "train_info/time_within_train_step": 27.743865728378296, "step": 277} +{"train_info/time_between_train_steps": 0.005025386810302734, "step": 277} +{"info/global_step": 278, "train_info/time_within_train_step": 27.719937562942505, "step": 278} +{"train_info/time_between_train_steps": 0.009388208389282227, "step": 278} +{"info/global_step": 279, "train_info/time_within_train_step": 27.77027916908264, "step": 279} +{"train_info/time_between_train_steps": 0.005911588668823242, "step": 279} +{"info/global_step": 280, "train_info/time_within_train_step": 27.72557830810547, "step": 280} +{"train_info/time_between_train_steps": 0.005033969879150391, "step": 280} +{"info/global_step": 281, "train_info/time_within_train_step": 27.731183290481567, "step": 281} +{"train_info/time_between_train_steps": 0.0059659481048583984, "step": 281} +{"info/global_step": 282, "train_info/time_within_train_step": 27.72579789161682, "step": 282} +{"train_info/time_between_train_steps": 0.005095958709716797, "step": 282} +{"info/global_step": 283, "train_info/time_within_train_step": 27.718193769454956, "step": 283} +{"train_info/time_between_train_steps": 0.0051097869873046875, "step": 283} +{"info/global_step": 284, "train_info/time_within_train_step": 27.73736572265625, "step": 284} +{"train_info/time_between_train_steps": 0.005154848098754883, "step": 284} +{"info/global_step": 285, "train_info/time_within_train_step": 27.711690664291382, "step": 285} +{"train_info/time_between_train_steps": 0.005110263824462891, "step": 285} +{"info/global_step": 286, "train_info/time_within_train_step": 27.726120471954346, "step": 286} +{"train_info/time_between_train_steps": 0.005250215530395508, "step": 286} +{"info/global_step": 287, "train_info/time_within_train_step": 27.735027551651, "step": 287} +{"train_info/time_between_train_steps": 0.0056610107421875, "step": 287} +{"info/global_step": 288, "train_info/time_within_train_step": 27.744548559188843, "step": 288} +{"train_info/time_between_train_steps": 0.005120754241943359, "step": 288} +{"info/global_step": 289, "train_info/time_within_train_step": 27.81924295425415, "step": 289} +{"train_info/time_between_train_steps": 0.0054399967193603516, "step": 289} +{"info/global_step": 290, "train_info/time_within_train_step": 27.737828969955444, "step": 290} +{"train_info/time_between_train_steps": 0.005799770355224609, "step": 290} +{"train_info/time_between_train_steps": 28.412473440170288, "step": 290} +{"info/global_step": 291, "train_info/time_within_train_step": 27.736586332321167, "step": 291} +{"train_info/time_between_train_steps": 0.005226612091064453, "step": 291} +{"info/global_step": 292, "train_info/time_within_train_step": 27.880980968475342, "step": 292} +{"train_info/time_between_train_steps": 0.0053997039794921875, "step": 292} +{"info/global_step": 293, "train_info/time_within_train_step": 27.918469190597534, "step": 293} +{"train_info/time_between_train_steps": 0.007436037063598633, "step": 293} +{"info/global_step": 294, "train_info/time_within_train_step": 27.842824459075928, "step": 294} +{"train_info/time_between_train_steps": 0.005202054977416992, "step": 294} +{"info/global_step": 295, "train_info/time_within_train_step": 27.722617864608765, "step": 295} +{"train_info/time_between_train_steps": 0.0053844451904296875, "step": 295} +{"info/global_step": 296, "train_info/time_within_train_step": 27.91714835166931, "step": 296} +{"train_info/time_between_train_steps": 0.0055768489837646484, "step": 296} +{"info/global_step": 297, "train_info/time_within_train_step": 27.753161668777466, "step": 297} +{"train_info/time_between_train_steps": 0.005297422409057617, "step": 297} +{"info/global_step": 298, "train_info/time_within_train_step": 27.872628927230835, "step": 298} +{"train_info/time_between_train_steps": 0.005202293395996094, "step": 298} +{"info/global_step": 299, "train_info/time_within_train_step": 27.77114486694336, "step": 299} +{"train_info/time_between_train_steps": 0.0059261322021484375, "step": 299} +{"info/global_step": 300, "train_info/time_within_train_step": 27.85358428955078, "step": 300} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736111869, "_runtime": 8996}, "step": 300} +{"logs": {"train/loss": 6.006, "train/learning_rate": 0.0005, "train/epoch": 10.01, "_timestamp": 1736111869, "_runtime": 8996}, "step": 300} +{"train_info/time_between_train_steps": 102.3541464805603, "step": 300} +{"info/global_step": 301, "train_info/time_within_train_step": 27.780226707458496, "step": 301} +{"train_info/time_between_train_steps": 0.005690813064575195, "step": 301} +{"info/global_step": 302, "train_info/time_within_train_step": 27.722623109817505, "step": 302} +{"train_info/time_between_train_steps": 0.005221366882324219, "step": 302} +{"info/global_step": 303, "train_info/time_within_train_step": 27.727168321609497, "step": 303} +{"train_info/time_between_train_steps": 0.00543665885925293, "step": 303} +{"info/global_step": 304, "train_info/time_within_train_step": 27.823383569717407, "step": 304} +{"train_info/time_between_train_steps": 0.005202531814575195, "step": 304} +{"info/global_step": 305, "train_info/time_within_train_step": 27.75781750679016, "step": 305} +{"train_info/time_between_train_steps": 0.005343198776245117, "step": 305} +{"info/global_step": 306, "train_info/time_within_train_step": 27.747329711914062, "step": 306} +{"train_info/time_between_train_steps": 0.005278587341308594, "step": 306} +{"info/global_step": 307, "train_info/time_within_train_step": 27.760271310806274, "step": 307} +{"train_info/time_between_train_steps": 0.005322456359863281, "step": 307} +{"info/global_step": 308, "train_info/time_within_train_step": 27.739792108535767, "step": 308} +{"train_info/time_between_train_steps": 0.006289243698120117, "step": 308} +{"info/global_step": 309, "train_info/time_within_train_step": 27.743412017822266, "step": 309} +{"train_info/time_between_train_steps": 0.006337642669677734, "step": 309} +{"info/global_step": 310, "train_info/time_within_train_step": 27.740897178649902, "step": 310} +{"train_info/time_between_train_steps": 0.0053386688232421875, "step": 310} +{"info/global_step": 311, "train_info/time_within_train_step": 27.73580551147461, "step": 311} +{"train_info/time_between_train_steps": 0.005391836166381836, "step": 311} +{"info/global_step": 312, "train_info/time_within_train_step": 27.739020824432373, "step": 312} +{"train_info/time_between_train_steps": 0.00518798828125, "step": 312} +{"info/global_step": 313, "train_info/time_within_train_step": 27.723660707473755, "step": 313} +{"train_info/time_between_train_steps": 0.00520014762878418, "step": 313} +{"info/global_step": 314, "train_info/time_within_train_step": 27.719231128692627, "step": 314} +{"train_info/time_between_train_steps": 0.005212306976318359, "step": 314} +{"info/global_step": 315, "train_info/time_within_train_step": 27.70827031135559, "step": 315} +{"train_info/time_between_train_steps": 0.005383729934692383, "step": 315} +{"info/global_step": 316, "train_info/time_within_train_step": 27.725041389465332, "step": 316} +{"train_info/time_between_train_steps": 0.005240440368652344, "step": 316} +{"info/global_step": 317, "train_info/time_within_train_step": 27.741502046585083, "step": 317} +{"train_info/time_between_train_steps": 0.005340099334716797, "step": 317} +{"info/global_step": 318, "train_info/time_within_train_step": 29.144726037979126, "step": 318} +{"train_info/time_between_train_steps": 0.005445003509521484, "step": 318} +{"info/global_step": 319, "train_info/time_within_train_step": 28.95736598968506, "step": 319} +{"train_info/time_between_train_steps": 0.006117582321166992, "step": 319} +{"train_info/time_between_train_steps": 28.693572282791138, "step": 319} +{"info/global_step": 320, "train_info/time_within_train_step": 27.874958753585815, "step": 320} +{"train_info/time_between_train_steps": 0.0061190128326416016, "step": 320} +{"info/global_step": 321, "train_info/time_within_train_step": 28.04882550239563, "step": 321} +{"train_info/time_between_train_steps": 0.006096363067626953, "step": 321} +{"info/global_step": 322, "train_info/time_within_train_step": 27.799416303634644, "step": 322} +{"train_info/time_between_train_steps": 0.006063222885131836, "step": 322} +{"info/global_step": 323, "train_info/time_within_train_step": 27.99616289138794, "step": 323} +{"train_info/time_between_train_steps": 0.00568079948425293, "step": 323} +{"info/global_step": 324, "train_info/time_within_train_step": 27.799454927444458, "step": 324} +{"train_info/time_between_train_steps": 0.006193876266479492, "step": 324} +{"info/global_step": 325, "train_info/time_within_train_step": 29.568837642669678, "step": 325} +{"train_info/time_between_train_steps": 0.0059299468994140625, "step": 325} +{"info/global_step": 326, "train_info/time_within_train_step": 28.374645233154297, "step": 326} +{"train_info/time_between_train_steps": 0.006862163543701172, "step": 326} +{"info/global_step": 327, "train_info/time_within_train_step": 29.538015365600586, "step": 327} +{"train_info/time_between_train_steps": 0.006886959075927734, "step": 327} +{"info/global_step": 328, "train_info/time_within_train_step": 27.81042456626892, "step": 328} +{"train_info/time_between_train_steps": 0.006562471389770508, "step": 328} +{"info/global_step": 329, "train_info/time_within_train_step": 28.045231103897095, "step": 329} +{"train_info/time_between_train_steps": 0.006044149398803711, "step": 329} +{"info/global_step": 330, "train_info/time_within_train_step": 27.826660871505737, "step": 330} +{"train_info/time_between_train_steps": 0.005465030670166016, "step": 330} +{"info/global_step": 331, "train_info/time_within_train_step": 27.75992202758789, "step": 331} +{"train_info/time_between_train_steps": 0.005593299865722656, "step": 331} +{"info/global_step": 332, "train_info/time_within_train_step": 27.7623074054718, "step": 332} +{"train_info/time_between_train_steps": 0.005626201629638672, "step": 332} +{"info/global_step": 333, "train_info/time_within_train_step": 27.812530279159546, "step": 333} +{"train_info/time_between_train_steps": 0.007380962371826172, "step": 333} +{"info/global_step": 334, "train_info/time_within_train_step": 27.959190607070923, "step": 334} +{"train_info/time_between_train_steps": 0.005909442901611328, "step": 334} +{"info/global_step": 335, "train_info/time_within_train_step": 27.84936785697937, "step": 335} +{"train_info/time_between_train_steps": 0.00554347038269043, "step": 335} +{"info/global_step": 336, "train_info/time_within_train_step": 27.79937195777893, "step": 336} +{"train_info/time_between_train_steps": 0.005369663238525391, "step": 336} +{"info/global_step": 337, "train_info/time_within_train_step": 27.793566703796387, "step": 337} +{"train_info/time_between_train_steps": 0.014996528625488281, "step": 337} +{"info/global_step": 338, "train_info/time_within_train_step": 27.778894901275635, "step": 338} +{"train_info/time_between_train_steps": 0.005671977996826172, "step": 338} +{"info/global_step": 339, "train_info/time_within_train_step": 27.7534601688385, "step": 339} +{"train_info/time_between_train_steps": 0.005659580230712891, "step": 339} +{"info/global_step": 340, "train_info/time_within_train_step": 27.79952359199524, "step": 340} +{"train_info/time_between_train_steps": 0.007328510284423828, "step": 340} +{"info/global_step": 341, "train_info/time_within_train_step": 27.752216339111328, "step": 341} +{"train_info/time_between_train_steps": 0.007224082946777344, "step": 341} +{"info/global_step": 342, "train_info/time_within_train_step": 27.78761887550354, "step": 342} +{"train_info/time_between_train_steps": 0.008060932159423828, "step": 342} +{"info/global_step": 343, "train_info/time_within_train_step": 27.796578645706177, "step": 343} +{"train_info/time_between_train_steps": 0.006016731262207031, "step": 343} +{"info/global_step": 344, "train_info/time_within_train_step": 27.825434684753418, "step": 344} +{"train_info/time_between_train_steps": 0.006368398666381836, "step": 344} +{"info/global_step": 345, "train_info/time_within_train_step": 27.818132162094116, "step": 345} +{"train_info/time_between_train_steps": 0.006265878677368164, "step": 345} +{"info/global_step": 346, "train_info/time_within_train_step": 27.796895742416382, "step": 346} +{"train_info/time_between_train_steps": 0.006337404251098633, "step": 346} +{"info/global_step": 347, "train_info/time_within_train_step": 27.839130878448486, "step": 347} +{"train_info/time_between_train_steps": 0.0067327022552490234, "step": 347} +{"info/global_step": 348, "train_info/time_within_train_step": 27.82418394088745, "step": 348} +{"train_info/time_between_train_steps": 0.0062825679779052734, "step": 348} +{"train_info/time_between_train_steps": 28.415706634521484, "step": 348} +{"info/global_step": 349, "train_info/time_within_train_step": 27.852606773376465, "step": 349} +{"train_info/time_between_train_steps": 0.005385160446166992, "step": 349} +{"info/global_step": 350, "train_info/time_within_train_step": 27.84436273574829, "step": 350} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736113429, "_runtime": 10556}, "step": 350} +{"logs": {"train/loss": 5.7678, "train/learning_rate": 0.00047222222222222224, "train/epoch": 12.0, "_timestamp": 1736113429, "_runtime": 10556}, "step": 350} +{"train_info/time_between_train_steps": 0.031180620193481445, "step": 350} +{"info/global_step": 351, "train_info/time_within_train_step": 27.74829077720642, "step": 351} +{"train_info/time_between_train_steps": 0.0055544376373291016, "step": 351} +{"info/global_step": 352, "train_info/time_within_train_step": 27.911396265029907, "step": 352} +{"train_info/time_between_train_steps": 0.005425453186035156, "step": 352} +{"info/global_step": 353, "train_info/time_within_train_step": 27.72785997390747, "step": 353} +{"train_info/time_between_train_steps": 0.00525975227355957, "step": 353} +{"info/global_step": 354, "train_info/time_within_train_step": 27.886963367462158, "step": 354} +{"train_info/time_between_train_steps": 0.005402088165283203, "step": 354} +{"info/global_step": 355, "train_info/time_within_train_step": 27.76098108291626, "step": 355} +{"train_info/time_between_train_steps": 0.0054471492767333984, "step": 355} +{"info/global_step": 356, "train_info/time_within_train_step": 27.869651556015015, "step": 356} +{"train_info/time_between_train_steps": 0.005261659622192383, "step": 356} +{"info/global_step": 357, "train_info/time_within_train_step": 27.74770951271057, "step": 357} +{"train_info/time_between_train_steps": 0.005153656005859375, "step": 357} +{"info/global_step": 358, "train_info/time_within_train_step": 27.796512603759766, "step": 358} +{"train_info/time_between_train_steps": 0.005288362503051758, "step": 358} +{"info/global_step": 359, "train_info/time_within_train_step": 27.7258198261261, "step": 359} +{"train_info/time_between_train_steps": 0.005615234375, "step": 359} +{"info/global_step": 360, "train_info/time_within_train_step": 27.704244136810303, "step": 360} +{"train_info/time_between_train_steps": 0.004969120025634766, "step": 360} +{"info/global_step": 361, "train_info/time_within_train_step": 27.730162620544434, "step": 361} +{"train_info/time_between_train_steps": 0.005060672760009766, "step": 361} +{"info/global_step": 362, "train_info/time_within_train_step": 27.71820378303528, "step": 362} +{"train_info/time_between_train_steps": 0.005044460296630859, "step": 362} +{"info/global_step": 363, "train_info/time_within_train_step": 27.70815348625183, "step": 363} +{"train_info/time_between_train_steps": 0.00506591796875, "step": 363} +{"info/global_step": 364, "train_info/time_within_train_step": 27.715155124664307, "step": 364} +{"train_info/time_between_train_steps": 0.005144834518432617, "step": 364} +{"info/global_step": 365, "train_info/time_within_train_step": 27.827803373336792, "step": 365} +{"train_info/time_between_train_steps": 0.005155086517333984, "step": 365} +{"info/global_step": 366, "train_info/time_within_train_step": 27.71249485015869, "step": 366} +{"train_info/time_between_train_steps": 0.0050640106201171875, "step": 366} +{"info/global_step": 367, "train_info/time_within_train_step": 27.74108338356018, "step": 367} +{"train_info/time_between_train_steps": 0.005059242248535156, "step": 367} +{"info/global_step": 368, "train_info/time_within_train_step": 27.724030017852783, "step": 368} +{"train_info/time_between_train_steps": 0.005458831787109375, "step": 368} +{"info/global_step": 369, "train_info/time_within_train_step": 27.731540203094482, "step": 369} +{"train_info/time_between_train_steps": 0.005102634429931641, "step": 369} +{"info/global_step": 370, "train_info/time_within_train_step": 27.715794563293457, "step": 370} +{"train_info/time_between_train_steps": 0.005025148391723633, "step": 370} +{"info/global_step": 371, "train_info/time_within_train_step": 27.71910524368286, "step": 371} +{"train_info/time_between_train_steps": 0.0051348209381103516, "step": 371} +{"info/global_step": 372, "train_info/time_within_train_step": 27.72474193572998, "step": 372} +{"train_info/time_between_train_steps": 0.005122661590576172, "step": 372} +{"info/global_step": 373, "train_info/time_within_train_step": 27.737311601638794, "step": 373} +{"train_info/time_between_train_steps": 0.0052089691162109375, "step": 373} +{"info/global_step": 374, "train_info/time_within_train_step": 27.756798028945923, "step": 374} +{"train_info/time_between_train_steps": 0.0062160491943359375, "step": 374} +{"info/global_step": 375, "train_info/time_within_train_step": 27.740784883499146, "step": 375} +{"train_info/time_between_train_steps": 0.005471944808959961, "step": 375} +{"info/global_step": 376, "train_info/time_within_train_step": 27.742762804031372, "step": 376} +{"train_info/time_between_train_steps": 0.00562596321105957, "step": 376} +{"info/global_step": 377, "train_info/time_within_train_step": 27.74854874610901, "step": 377} +{"train_info/time_between_train_steps": 0.005982875823974609, "step": 377} +{"train_info/time_between_train_steps": 28.43128752708435, "step": 377} +{"info/global_step": 378, "train_info/time_within_train_step": 27.74929165840149, "step": 378} +{"train_info/time_between_train_steps": 0.005270719528198242, "step": 378} +{"info/global_step": 379, "train_info/time_within_train_step": 27.877185106277466, "step": 379} +{"train_info/time_between_train_steps": 0.005345582962036133, "step": 379} +{"info/global_step": 380, "train_info/time_within_train_step": 27.81783890724182, "step": 380} +{"train_info/time_between_train_steps": 0.007439374923706055, "step": 380} +{"info/global_step": 381, "train_info/time_within_train_step": 27.90093421936035, "step": 381} +{"train_info/time_between_train_steps": 0.007706403732299805, "step": 381} +{"info/global_step": 382, "train_info/time_within_train_step": 27.759474277496338, "step": 382} +{"train_info/time_between_train_steps": 0.00577092170715332, "step": 382} +{"info/global_step": 383, "train_info/time_within_train_step": 27.922983169555664, "step": 383} +{"train_info/time_between_train_steps": 0.005871295928955078, "step": 383} +{"info/global_step": 384, "train_info/time_within_train_step": 27.7674560546875, "step": 384} +{"train_info/time_between_train_steps": 0.005835533142089844, "step": 384} +{"info/global_step": 385, "train_info/time_within_train_step": 27.95083999633789, "step": 385} +{"train_info/time_between_train_steps": 0.005630970001220703, "step": 385} +{"info/global_step": 386, "train_info/time_within_train_step": 27.73434829711914, "step": 386} +{"train_info/time_between_train_steps": 0.005697965621948242, "step": 386} +{"info/global_step": 387, "train_info/time_within_train_step": 27.814706802368164, "step": 387} +{"train_info/time_between_train_steps": 0.00534820556640625, "step": 387} +{"info/global_step": 388, "train_info/time_within_train_step": 27.735024452209473, "step": 388} +{"train_info/time_between_train_steps": 0.006029844284057617, "step": 388} +{"info/global_step": 389, "train_info/time_within_train_step": 27.736433506011963, "step": 389} +{"train_info/time_between_train_steps": 0.005240678787231445, "step": 389} +{"info/global_step": 390, "train_info/time_within_train_step": 27.727480173110962, "step": 390} +{"train_info/time_between_train_steps": 0.005303621292114258, "step": 390} +{"info/global_step": 391, "train_info/time_within_train_step": 27.72565007209778, "step": 391} +{"train_info/time_between_train_steps": 0.005165576934814453, "step": 391} +{"info/global_step": 392, "train_info/time_within_train_step": 27.71850848197937, "step": 392} +{"train_info/time_between_train_steps": 0.005257368087768555, "step": 392} +{"info/global_step": 393, "train_info/time_within_train_step": 27.72860097885132, "step": 393} +{"train_info/time_between_train_steps": 0.005089759826660156, "step": 393} +{"info/global_step": 394, "train_info/time_within_train_step": 27.751819610595703, "step": 394} +{"train_info/time_between_train_steps": 0.0051937103271484375, "step": 394} +{"info/global_step": 395, "train_info/time_within_train_step": 27.840775728225708, "step": 395} +{"train_info/time_between_train_steps": 0.0052700042724609375, "step": 395} +{"info/global_step": 396, "train_info/time_within_train_step": 27.72861099243164, "step": 396} +{"train_info/time_between_train_steps": 0.00504755973815918, "step": 396} +{"info/global_step": 397, "train_info/time_within_train_step": 27.720431089401245, "step": 397} +{"train_info/time_between_train_steps": 0.005475044250488281, "step": 397} +{"info/global_step": 398, "train_info/time_within_train_step": 27.72881293296814, "step": 398} +{"train_info/time_between_train_steps": 0.0051136016845703125, "step": 398} +{"info/global_step": 399, "train_info/time_within_train_step": 27.786668300628662, "step": 399} +{"train_info/time_between_train_steps": 0.0050656795501708984, "step": 399} +{"info/global_step": 400, "train_info/time_within_train_step": 27.991379737854004, "step": 400} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736114848, "_runtime": 11975}, "step": 400} +{"logs": {"train/loss": 5.2976, "train/learning_rate": 0.00044444444444444436, "train/epoch": 13.02, "_timestamp": 1736114848, "_runtime": 11975}, "step": 400} +{"train_info/time_between_train_steps": 104.43470764160156, "step": 400} +{"info/global_step": 401, "train_info/time_within_train_step": 27.742829084396362, "step": 401} +{"train_info/time_between_train_steps": 0.005302906036376953, "step": 401} +{"info/global_step": 402, "train_info/time_within_train_step": 27.744525909423828, "step": 402} +{"train_info/time_between_train_steps": 0.005707263946533203, "step": 402} +{"info/global_step": 403, "train_info/time_within_train_step": 27.73121213912964, "step": 403} +{"train_info/time_between_train_steps": 0.0054302215576171875, "step": 403} +{"info/global_step": 404, "train_info/time_within_train_step": 27.74853491783142, "step": 404} +{"train_info/time_between_train_steps": 0.006678104400634766, "step": 404} +{"info/global_step": 405, "train_info/time_within_train_step": 27.738510370254517, "step": 405} +{"train_info/time_between_train_steps": 0.0061931610107421875, "step": 405} +{"info/global_step": 406, "train_info/time_within_train_step": 27.76262068748474, "step": 406} +{"train_info/time_between_train_steps": 0.00614166259765625, "step": 406} +{"train_info/time_between_train_steps": 27.976065158843994, "step": 406} +{"info/global_step": 407, "train_info/time_within_train_step": 27.76115894317627, "step": 407} +{"train_info/time_between_train_steps": 0.005620002746582031, "step": 407} +{"info/global_step": 408, "train_info/time_within_train_step": 27.922311544418335, "step": 408} +{"train_info/time_between_train_steps": 0.005378007888793945, "step": 408} +{"info/global_step": 409, "train_info/time_within_train_step": 27.730096101760864, "step": 409} +{"train_info/time_between_train_steps": 0.005669593811035156, "step": 409} +{"info/global_step": 410, "train_info/time_within_train_step": 28.01870846748352, "step": 410} +{"train_info/time_between_train_steps": 0.005557537078857422, "step": 410} +{"info/global_step": 411, "train_info/time_within_train_step": 27.861884832382202, "step": 411} +{"train_info/time_between_train_steps": 0.0053253173828125, "step": 411} +{"info/global_step": 412, "train_info/time_within_train_step": 27.849634170532227, "step": 412} +{"train_info/time_between_train_steps": 0.005583286285400391, "step": 412} +{"info/global_step": 413, "train_info/time_within_train_step": 27.74424123764038, "step": 413} +{"train_info/time_between_train_steps": 0.005561351776123047, "step": 413} +{"info/global_step": 414, "train_info/time_within_train_step": 28.490196466445923, "step": 414} +{"train_info/time_between_train_steps": 0.005761861801147461, "step": 414} +{"info/global_step": 415, "train_info/time_within_train_step": 27.78615665435791, "step": 415} +{"train_info/time_between_train_steps": 0.005998134613037109, "step": 415} +{"info/global_step": 416, "train_info/time_within_train_step": 27.855063676834106, "step": 416} +{"train_info/time_between_train_steps": 0.005529165267944336, "step": 416} +{"info/global_step": 417, "train_info/time_within_train_step": 27.76516366004944, "step": 417} +{"train_info/time_between_train_steps": 0.005321502685546875, "step": 417} +{"info/global_step": 418, "train_info/time_within_train_step": 27.73392391204834, "step": 418} +{"train_info/time_between_train_steps": 0.0052509307861328125, "step": 418} +{"info/global_step": 419, "train_info/time_within_train_step": 27.8544499874115, "step": 419} +{"train_info/time_between_train_steps": 0.00601959228515625, "step": 419} +{"info/global_step": 420, "train_info/time_within_train_step": 29.233723163604736, "step": 420} +{"train_info/time_between_train_steps": 0.005852699279785156, "step": 420} +{"info/global_step": 421, "train_info/time_within_train_step": 28.1355562210083, "step": 421} +{"train_info/time_between_train_steps": 0.0059053897857666016, "step": 421} +{"info/global_step": 422, "train_info/time_within_train_step": 28.25674867630005, "step": 422} +{"train_info/time_between_train_steps": 0.0052983760833740234, "step": 422} +{"info/global_step": 423, "train_info/time_within_train_step": 27.75494623184204, "step": 423} +{"train_info/time_between_train_steps": 0.0053136348724365234, "step": 423} +{"info/global_step": 424, "train_info/time_within_train_step": 27.751149892807007, "step": 424} +{"train_info/time_between_train_steps": 0.005427122116088867, "step": 424} +{"info/global_step": 425, "train_info/time_within_train_step": 27.736330270767212, "step": 425} +{"train_info/time_between_train_steps": 0.005321025848388672, "step": 425} +{"info/global_step": 426, "train_info/time_within_train_step": 27.83108353614807, "step": 426} +{"train_info/time_between_train_steps": 0.005364418029785156, "step": 426} +{"info/global_step": 427, "train_info/time_within_train_step": 27.757363319396973, "step": 427} +{"train_info/time_between_train_steps": 0.005324125289916992, "step": 427} +{"info/global_step": 428, "train_info/time_within_train_step": 27.735028505325317, "step": 428} +{"train_info/time_between_train_steps": 0.005083322525024414, "step": 428} +{"info/global_step": 429, "train_info/time_within_train_step": 27.7623291015625, "step": 429} +{"train_info/time_between_train_steps": 0.00528264045715332, "step": 429} +{"info/global_step": 430, "train_info/time_within_train_step": 27.727858781814575, "step": 430} +{"train_info/time_between_train_steps": 0.0051615238189697266, "step": 430} +{"info/global_step": 431, "train_info/time_within_train_step": 27.727722644805908, "step": 431} +{"train_info/time_between_train_steps": 0.005370140075683594, "step": 431} +{"info/global_step": 432, "train_info/time_within_train_step": 27.72981309890747, "step": 432} +{"train_info/time_between_train_steps": 0.005366325378417969, "step": 432} +{"info/global_step": 433, "train_info/time_within_train_step": 27.731850624084473, "step": 433} +{"train_info/time_between_train_steps": 0.005485057830810547, "step": 433} +{"info/global_step": 434, "train_info/time_within_train_step": 27.729937076568604, "step": 434} +{"train_info/time_between_train_steps": 0.0058040618896484375, "step": 434} +{"info/global_step": 435, "train_info/time_within_train_step": 27.765608549118042, "step": 435} +{"train_info/time_between_train_steps": 0.0057489871978759766, "step": 435} +{"train_info/time_between_train_steps": 28.485926628112793, "step": 435} +{"info/global_step": 436, "train_info/time_within_train_step": 27.780901193618774, "step": 436} +{"train_info/time_between_train_steps": 0.005780935287475586, "step": 436} +{"info/global_step": 437, "train_info/time_within_train_step": 27.917474031448364, "step": 437} +{"train_info/time_between_train_steps": 0.005698204040527344, "step": 437} +{"info/global_step": 438, "train_info/time_within_train_step": 27.742352724075317, "step": 438} +{"train_info/time_between_train_steps": 0.005764961242675781, "step": 438} +{"info/global_step": 439, "train_info/time_within_train_step": 27.939053773880005, "step": 439} +{"train_info/time_between_train_steps": 0.0053997039794921875, "step": 439} +{"info/global_step": 440, "train_info/time_within_train_step": 27.850269079208374, "step": 440} +{"train_info/time_between_train_steps": 0.005692005157470703, "step": 440} +{"info/global_step": 441, "train_info/time_within_train_step": 27.936115503311157, "step": 441} +{"train_info/time_between_train_steps": 0.005463123321533203, "step": 441} +{"info/global_step": 442, "train_info/time_within_train_step": 27.757587432861328, "step": 442} +{"train_info/time_between_train_steps": 0.01035308837890625, "step": 442} +{"info/global_step": 443, "train_info/time_within_train_step": 27.938100337982178, "step": 443} +{"train_info/time_between_train_steps": 0.005287647247314453, "step": 443} +{"info/global_step": 444, "train_info/time_within_train_step": 27.74896764755249, "step": 444} +{"train_info/time_between_train_steps": 0.005823612213134766, "step": 444} +{"info/global_step": 445, "train_info/time_within_train_step": 27.807800769805908, "step": 445} +{"train_info/time_between_train_steps": 0.008193492889404297, "step": 445} +{"info/global_step": 446, "train_info/time_within_train_step": 27.77563786506653, "step": 446} +{"train_info/time_between_train_steps": 0.0051805973052978516, "step": 446} +{"info/global_step": 447, "train_info/time_within_train_step": 27.72808814048767, "step": 447} +{"train_info/time_between_train_steps": 0.005153179168701172, "step": 447} +{"info/global_step": 448, "train_info/time_within_train_step": 27.732197761535645, "step": 448} +{"train_info/time_between_train_steps": 0.005301475524902344, "step": 448} +{"info/global_step": 449, "train_info/time_within_train_step": 27.72157120704651, "step": 449} +{"train_info/time_between_train_steps": 0.005022764205932617, "step": 449} +{"info/global_step": 450, "train_info/time_within_train_step": 27.715328454971313, "step": 450} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736116405, "_runtime": 13532}, "step": 450} +{"logs": {"train/loss": 5.0428, "train/learning_rate": 0.00041666666666666664, "train/epoch": 15.01, "_timestamp": 1736116405, "_runtime": 13532}, "step": 450} +{"train_info/time_between_train_steps": 0.02630758285522461, "step": 450} +{"info/global_step": 451, "train_info/time_within_train_step": 27.741604566574097, "step": 451} +{"train_info/time_between_train_steps": 0.005106687545776367, "step": 451} +{"info/global_step": 452, "train_info/time_within_train_step": 27.736271858215332, "step": 452} +{"train_info/time_between_train_steps": 0.00581812858581543, "step": 452} +{"info/global_step": 453, "train_info/time_within_train_step": 27.74180245399475, "step": 453} +{"train_info/time_between_train_steps": 0.0051996707916259766, "step": 453} +{"info/global_step": 454, "train_info/time_within_train_step": 27.73264741897583, "step": 454} +{"train_info/time_between_train_steps": 0.010438919067382812, "step": 454} +{"info/global_step": 455, "train_info/time_within_train_step": 27.75480842590332, "step": 455} +{"train_info/time_between_train_steps": 0.007393836975097656, "step": 455} +{"info/global_step": 456, "train_info/time_within_train_step": 27.824880361557007, "step": 456} +{"train_info/time_between_train_steps": 0.0057218074798583984, "step": 456} +{"info/global_step": 457, "train_info/time_within_train_step": 27.730045795440674, "step": 457} +{"train_info/time_between_train_steps": 0.0070493221282958984, "step": 457} +{"info/global_step": 458, "train_info/time_within_train_step": 27.77932906150818, "step": 458} +{"train_info/time_between_train_steps": 0.005278348922729492, "step": 458} +{"info/global_step": 459, "train_info/time_within_train_step": 27.752583742141724, "step": 459} +{"train_info/time_between_train_steps": 0.005658149719238281, "step": 459} +{"info/global_step": 460, "train_info/time_within_train_step": 27.7443790435791, "step": 460} +{"train_info/time_between_train_steps": 0.005616188049316406, "step": 460} +{"info/global_step": 461, "train_info/time_within_train_step": 27.71746277809143, "step": 461} +{"train_info/time_between_train_steps": 0.0053119659423828125, "step": 461} +{"info/global_step": 462, "train_info/time_within_train_step": 27.749714374542236, "step": 462} +{"train_info/time_between_train_steps": 0.006136417388916016, "step": 462} +{"info/global_step": 463, "train_info/time_within_train_step": 27.78159523010254, "step": 463} +{"train_info/time_between_train_steps": 0.005546092987060547, "step": 463} +{"info/global_step": 464, "train_info/time_within_train_step": 27.78865647315979, "step": 464} +{"train_info/time_between_train_steps": 0.006173610687255859, "step": 464} +{"train_info/time_between_train_steps": 28.474802494049072, "step": 464} +{"info/global_step": 465, "train_info/time_within_train_step": 27.724297761917114, "step": 465} +{"train_info/time_between_train_steps": 0.004899501800537109, "step": 465} +{"info/global_step": 466, "train_info/time_within_train_step": 27.900359630584717, "step": 466} +{"train_info/time_between_train_steps": 0.005798816680908203, "step": 466} +{"info/global_step": 467, "train_info/time_within_train_step": 27.82788372039795, "step": 467} +{"train_info/time_between_train_steps": 0.008547544479370117, "step": 467} +{"info/global_step": 468, "train_info/time_within_train_step": 27.866665363311768, "step": 468} +{"train_info/time_between_train_steps": 0.006298542022705078, "step": 468} +{"info/global_step": 469, "train_info/time_within_train_step": 27.725152254104614, "step": 469} +{"train_info/time_between_train_steps": 0.0053539276123046875, "step": 469} +{"info/global_step": 470, "train_info/time_within_train_step": 27.88173532485962, "step": 470} +{"train_info/time_between_train_steps": 0.006035804748535156, "step": 470} +{"info/global_step": 471, "train_info/time_within_train_step": 27.957380771636963, "step": 471} +{"train_info/time_between_train_steps": 0.007040977478027344, "step": 471} +{"info/global_step": 472, "train_info/time_within_train_step": 27.83140540122986, "step": 472} +{"train_info/time_between_train_steps": 0.005271434783935547, "step": 472} +{"info/global_step": 473, "train_info/time_within_train_step": 27.73973846435547, "step": 473} +{"train_info/time_between_train_steps": 0.0054357051849365234, "step": 473} +{"info/global_step": 474, "train_info/time_within_train_step": 27.813884735107422, "step": 474} +{"train_info/time_between_train_steps": 0.0053555965423583984, "step": 474} +{"info/global_step": 475, "train_info/time_within_train_step": 27.814845323562622, "step": 475} +{"train_info/time_between_train_steps": 0.005118131637573242, "step": 475} +{"info/global_step": 476, "train_info/time_within_train_step": 27.71215009689331, "step": 476} +{"train_info/time_between_train_steps": 0.005003929138183594, "step": 476} +{"info/global_step": 477, "train_info/time_within_train_step": 27.717044830322266, "step": 477} +{"train_info/time_between_train_steps": 0.005293130874633789, "step": 477} +{"info/global_step": 478, "train_info/time_within_train_step": 27.707486152648926, "step": 478} +{"train_info/time_between_train_steps": 0.00510859489440918, "step": 478} +{"info/global_step": 479, "train_info/time_within_train_step": 27.716894388198853, "step": 479} +{"train_info/time_between_train_steps": 0.005150318145751953, "step": 479} +{"info/global_step": 480, "train_info/time_within_train_step": 27.739220142364502, "step": 480} +{"train_info/time_between_train_steps": 0.005156993865966797, "step": 480} +{"info/global_step": 481, "train_info/time_within_train_step": 27.72716784477234, "step": 481} +{"train_info/time_between_train_steps": 0.00513148307800293, "step": 481} +{"info/global_step": 482, "train_info/time_within_train_step": 27.725928783416748, "step": 482} +{"train_info/time_between_train_steps": 0.005111217498779297, "step": 482} +{"info/global_step": 483, "train_info/time_within_train_step": 27.747100114822388, "step": 483} +{"train_info/time_between_train_steps": 0.005112886428833008, "step": 483} +{"info/global_step": 484, "train_info/time_within_train_step": 27.740217924118042, "step": 484} +{"train_info/time_between_train_steps": 0.005182743072509766, "step": 484} +{"info/global_step": 485, "train_info/time_within_train_step": 27.754188299179077, "step": 485} +{"train_info/time_between_train_steps": 0.005987882614135742, "step": 485} +{"info/global_step": 486, "train_info/time_within_train_step": 27.826626539230347, "step": 486} +{"train_info/time_between_train_steps": 0.005038738250732422, "step": 486} +{"info/global_step": 487, "train_info/time_within_train_step": 27.73464584350586, "step": 487} +{"train_info/time_between_train_steps": 0.005103111267089844, "step": 487} +{"info/global_step": 488, "train_info/time_within_train_step": 27.72778344154358, "step": 488} +{"train_info/time_between_train_steps": 0.005246162414550781, "step": 488} +{"info/global_step": 489, "train_info/time_within_train_step": 27.73047161102295, "step": 489} +{"train_info/time_between_train_steps": 0.0051422119140625, "step": 489} +{"info/global_step": 490, "train_info/time_within_train_step": 27.72092580795288, "step": 490} +{"train_info/time_between_train_steps": 0.005171298980712891, "step": 490} +{"info/global_step": 491, "train_info/time_within_train_step": 27.715967178344727, "step": 491} +{"train_info/time_between_train_steps": 0.005087614059448242, "step": 491} +{"info/global_step": 492, "train_info/time_within_train_step": 27.73338007926941, "step": 492} +{"train_info/time_between_train_steps": 0.0055196285247802734, "step": 492} +{"info/global_step": 493, "train_info/time_within_train_step": 27.755999326705933, "step": 493} +{"train_info/time_between_train_steps": 0.005782365798950195, "step": 493} +{"train_info/time_between_train_steps": 28.087055444717407, "step": 493} +{"info/global_step": 494, "train_info/time_within_train_step": 27.748627424240112, "step": 494} +{"train_info/time_between_train_steps": 0.005393266677856445, "step": 494} +{"info/global_step": 495, "train_info/time_within_train_step": 27.891380071640015, "step": 495} +{"train_info/time_between_train_steps": 0.0053064823150634766, "step": 495} +{"info/global_step": 496, "train_info/time_within_train_step": 27.75207233428955, "step": 496} +{"train_info/time_between_train_steps": 0.0052165985107421875, "step": 496} +{"info/global_step": 497, "train_info/time_within_train_step": 27.873074531555176, "step": 497} +{"train_info/time_between_train_steps": 0.005316495895385742, "step": 497} +{"info/global_step": 498, "train_info/time_within_train_step": 27.777745962142944, "step": 498} +{"train_info/time_between_train_steps": 0.005645275115966797, "step": 498} +{"info/global_step": 499, "train_info/time_within_train_step": 27.908764839172363, "step": 499} +{"train_info/time_between_train_steps": 0.0053560733795166016, "step": 499} +{"info/global_step": 500, "train_info/time_within_train_step": 28.0279643535614, "step": 500} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736117852, "_runtime": 14979}, "step": 500} +{"logs": {"train/loss": 4.7926, "train/learning_rate": 0.00038888888888888887, "train/epoch": 17.01, "_timestamp": 1736117852, "_runtime": 14979}, "step": 500} +{"train_info/time_between_train_steps": 112.54808235168457, "step": 500} +{"info/global_step": 501, "train_info/time_within_train_step": 28.0745267868042, "step": 501} +{"train_info/time_between_train_steps": 0.005778789520263672, "step": 501} +{"info/global_step": 502, "train_info/time_within_train_step": 27.765201091766357, "step": 502} +{"train_info/time_between_train_steps": 0.005772113800048828, "step": 502} +{"info/global_step": 503, "train_info/time_within_train_step": 27.833097219467163, "step": 503} +{"train_info/time_between_train_steps": 0.0054645538330078125, "step": 503} +{"info/global_step": 504, "train_info/time_within_train_step": 27.748271465301514, "step": 504} +{"train_info/time_between_train_steps": 0.005187273025512695, "step": 504} +{"info/global_step": 505, "train_info/time_within_train_step": 27.719705820083618, "step": 505} +{"train_info/time_between_train_steps": 0.005479574203491211, "step": 505} +{"info/global_step": 506, "train_info/time_within_train_step": 27.7109591960907, "step": 506} +{"train_info/time_between_train_steps": 0.005372524261474609, "step": 506} +{"info/global_step": 507, "train_info/time_within_train_step": 27.800211906433105, "step": 507} +{"train_info/time_between_train_steps": 0.005113363265991211, "step": 507} +{"info/global_step": 508, "train_info/time_within_train_step": 27.751632690429688, "step": 508} +{"train_info/time_between_train_steps": 0.005202770233154297, "step": 508} +{"info/global_step": 509, "train_info/time_within_train_step": 27.72174644470215, "step": 509} +{"train_info/time_between_train_steps": 0.0051822662353515625, "step": 509} +{"info/global_step": 510, "train_info/time_within_train_step": 28.055914640426636, "step": 510} +{"train_info/time_between_train_steps": 0.005408287048339844, "step": 510} +{"info/global_step": 511, "train_info/time_within_train_step": 27.918863773345947, "step": 511} +{"train_info/time_between_train_steps": 0.00535130500793457, "step": 511} +{"info/global_step": 512, "train_info/time_within_train_step": 27.718160390853882, "step": 512} +{"train_info/time_between_train_steps": 0.0052874088287353516, "step": 512} +{"info/global_step": 513, "train_info/time_within_train_step": 27.725386142730713, "step": 513} +{"train_info/time_between_train_steps": 0.005443572998046875, "step": 513} +{"info/global_step": 514, "train_info/time_within_train_step": 28.811856269836426, "step": 514} +{"train_info/time_between_train_steps": 0.0055522918701171875, "step": 514} +{"info/global_step": 515, "train_info/time_within_train_step": 28.495192289352417, "step": 515} +{"train_info/time_between_train_steps": 0.006041049957275391, "step": 515} +{"info/global_step": 516, "train_info/time_within_train_step": 28.20695734024048, "step": 516} +{"train_info/time_between_train_steps": 0.0066454410552978516, "step": 516} +{"info/global_step": 517, "train_info/time_within_train_step": 27.832042932510376, "step": 517} +{"train_info/time_between_train_steps": 0.0057735443115234375, "step": 517} +{"info/global_step": 518, "train_info/time_within_train_step": 27.741719722747803, "step": 518} +{"train_info/time_between_train_steps": 0.005506753921508789, "step": 518} +{"info/global_step": 519, "train_info/time_within_train_step": 27.73771643638611, "step": 519} +{"train_info/time_between_train_steps": 0.005480766296386719, "step": 519} +{"info/global_step": 520, "train_info/time_within_train_step": 27.749178171157837, "step": 520} +{"train_info/time_between_train_steps": 0.005367279052734375, "step": 520} +{"info/global_step": 521, "train_info/time_within_train_step": 27.738077640533447, "step": 521} +{"train_info/time_between_train_steps": 0.005871772766113281, "step": 521} +{"info/global_step": 522, "train_info/time_within_train_step": 27.752061128616333, "step": 522} +{"train_info/time_between_train_steps": 0.005959749221801758, "step": 522} +{"train_info/time_between_train_steps": 28.2372305393219, "step": 522} +{"info/global_step": 523, "train_info/time_within_train_step": 27.73048162460327, "step": 523} +{"train_info/time_between_train_steps": 0.005544424057006836, "step": 523} +{"info/global_step": 524, "train_info/time_within_train_step": 27.874781847000122, "step": 524} +{"train_info/time_between_train_steps": 0.0053446292877197266, "step": 524} +{"info/global_step": 525, "train_info/time_within_train_step": 27.75431513786316, "step": 525} +{"train_info/time_between_train_steps": 0.005192279815673828, "step": 525} +{"info/global_step": 526, "train_info/time_within_train_step": 27.90339493751526, "step": 526} +{"train_info/time_between_train_steps": 0.005425214767456055, "step": 526} +{"info/global_step": 527, "train_info/time_within_train_step": 27.757429838180542, "step": 527} +{"train_info/time_between_train_steps": 0.005093812942504883, "step": 527} +{"info/global_step": 528, "train_info/time_within_train_step": 27.89386558532715, "step": 528} +{"train_info/time_between_train_steps": 0.005475044250488281, "step": 528} +{"info/global_step": 529, "train_info/time_within_train_step": 27.725172996520996, "step": 529} +{"train_info/time_between_train_steps": 0.005231142044067383, "step": 529} +{"info/global_step": 530, "train_info/time_within_train_step": 27.911537647247314, "step": 530} +{"train_info/time_between_train_steps": 0.005204677581787109, "step": 530} +{"info/global_step": 531, "train_info/time_within_train_step": 27.80572819709778, "step": 531} +{"train_info/time_between_train_steps": 0.0052373409271240234, "step": 531} +{"info/global_step": 532, "train_info/time_within_train_step": 27.76940679550171, "step": 532} +{"train_info/time_between_train_steps": 0.005216121673583984, "step": 532} +{"info/global_step": 533, "train_info/time_within_train_step": 27.724247932434082, "step": 533} +{"train_info/time_between_train_steps": 0.005164384841918945, "step": 533} +{"info/global_step": 534, "train_info/time_within_train_step": 27.73196244239807, "step": 534} +{"train_info/time_between_train_steps": 0.005062580108642578, "step": 534} +{"info/global_step": 535, "train_info/time_within_train_step": 27.716748476028442, "step": 535} +{"train_info/time_between_train_steps": 0.005006074905395508, "step": 535} +{"info/global_step": 536, "train_info/time_within_train_step": 27.71577286720276, "step": 536} +{"train_info/time_between_train_steps": 0.005245685577392578, "step": 536} +{"info/global_step": 537, "train_info/time_within_train_step": 27.732690811157227, "step": 537} +{"train_info/time_between_train_steps": 0.005979776382446289, "step": 537} +{"info/global_step": 538, "train_info/time_within_train_step": 27.727059602737427, "step": 538} +{"train_info/time_between_train_steps": 0.005074501037597656, "step": 538} +{"info/global_step": 539, "train_info/time_within_train_step": 27.742008686065674, "step": 539} +{"train_info/time_between_train_steps": 0.0053522586822509766, "step": 539} +{"info/global_step": 540, "train_info/time_within_train_step": 27.73997664451599, "step": 540} +{"train_info/time_between_train_steps": 0.005177736282348633, "step": 540} +{"info/global_step": 541, "train_info/time_within_train_step": 27.722702503204346, "step": 541} +{"train_info/time_between_train_steps": 0.0073773860931396484, "step": 541} +{"info/global_step": 542, "train_info/time_within_train_step": 27.747459650039673, "step": 542} +{"train_info/time_between_train_steps": 0.0054209232330322266, "step": 542} +{"info/global_step": 543, "train_info/time_within_train_step": 27.747273445129395, "step": 543} +{"train_info/time_between_train_steps": 0.006279468536376953, "step": 543} +{"info/global_step": 544, "train_info/time_within_train_step": 27.7359139919281, "step": 544} +{"train_info/time_between_train_steps": 0.00560307502746582, "step": 544} +{"info/global_step": 545, "train_info/time_within_train_step": 27.713670253753662, "step": 545} +{"train_info/time_between_train_steps": 0.005060434341430664, "step": 545} +{"info/global_step": 546, "train_info/time_within_train_step": 27.722408771514893, "step": 546} +{"train_info/time_between_train_steps": 0.005927324295043945, "step": 546} +{"info/global_step": 547, "train_info/time_within_train_step": 27.818594694137573, "step": 547} +{"train_info/time_between_train_steps": 0.005144357681274414, "step": 547} +{"info/global_step": 548, "train_info/time_within_train_step": 27.72934103012085, "step": 548} +{"train_info/time_between_train_steps": 0.005202293395996094, "step": 548} +{"info/global_step": 549, "train_info/time_within_train_step": 27.737372159957886, "step": 549} +{"train_info/time_between_train_steps": 0.005170106887817383, "step": 549} +{"info/global_step": 550, "train_info/time_within_train_step": 27.752654314041138, "step": 550} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736119388, "_runtime": 16515}, "step": 550} +{"logs": {"train/loss": 4.5128, "train/learning_rate": 0.0003611111111111111, "train/epoch": 18.02, "_timestamp": 1736119388, "_runtime": 16515}, "step": 550} +{"train_info/time_between_train_steps": 0.02933812141418457, "step": 550} +{"info/global_step": 551, "train_info/time_within_train_step": 27.749212980270386, "step": 551} +{"train_info/time_between_train_steps": 0.005939483642578125, "step": 551} +{"train_info/time_between_train_steps": 28.40969491004944, "step": 551} +{"info/global_step": 552, "train_info/time_within_train_step": 27.786473035812378, "step": 552} +{"train_info/time_between_train_steps": 0.00588536262512207, "step": 552} +{"info/global_step": 553, "train_info/time_within_train_step": 27.933167219161987, "step": 553} +{"train_info/time_between_train_steps": 0.00538945198059082, "step": 553} +{"info/global_step": 554, "train_info/time_within_train_step": 27.76450276374817, "step": 554} +{"train_info/time_between_train_steps": 0.006278514862060547, "step": 554} +{"info/global_step": 555, "train_info/time_within_train_step": 27.887146472930908, "step": 555} +{"train_info/time_between_train_steps": 0.00539851188659668, "step": 555} +{"info/global_step": 556, "train_info/time_within_train_step": 27.74937343597412, "step": 556} +{"train_info/time_between_train_steps": 0.005430698394775391, "step": 556} +{"info/global_step": 557, "train_info/time_within_train_step": 27.933852434158325, "step": 557} +{"train_info/time_between_train_steps": 0.005461692810058594, "step": 557} +{"info/global_step": 558, "train_info/time_within_train_step": 27.73021388053894, "step": 558} +{"train_info/time_between_train_steps": 0.005387783050537109, "step": 558} +{"info/global_step": 559, "train_info/time_within_train_step": 27.898510694503784, "step": 559} +{"train_info/time_between_train_steps": 0.00534820556640625, "step": 559} +{"info/global_step": 560, "train_info/time_within_train_step": 27.75010108947754, "step": 560} +{"train_info/time_between_train_steps": 0.005567073822021484, "step": 560} +{"info/global_step": 561, "train_info/time_within_train_step": 27.812175750732422, "step": 561} +{"train_info/time_between_train_steps": 0.0053827762603759766, "step": 561} +{"info/global_step": 562, "train_info/time_within_train_step": 27.83949065208435, "step": 562} +{"train_info/time_between_train_steps": 0.014244318008422852, "step": 562} +{"info/global_step": 563, "train_info/time_within_train_step": 27.786650896072388, "step": 563} +{"train_info/time_between_train_steps": 0.005093574523925781, "step": 563} +{"info/global_step": 564, "train_info/time_within_train_step": 27.712283611297607, "step": 564} +{"train_info/time_between_train_steps": 0.005085468292236328, "step": 564} +{"info/global_step": 565, "train_info/time_within_train_step": 27.715787172317505, "step": 565} +{"train_info/time_between_train_steps": 0.0051081180572509766, "step": 565} +{"info/global_step": 566, "train_info/time_within_train_step": 27.709468126296997, "step": 566} +{"train_info/time_between_train_steps": 0.005005598068237305, "step": 566} +{"info/global_step": 567, "train_info/time_within_train_step": 27.729517459869385, "step": 567} +{"train_info/time_between_train_steps": 0.005186796188354492, "step": 567} +{"info/global_step": 568, "train_info/time_within_train_step": 27.720805168151855, "step": 568} +{"train_info/time_between_train_steps": 0.005054950714111328, "step": 568} +{"info/global_step": 569, "train_info/time_within_train_step": 27.712117671966553, "step": 569} +{"train_info/time_between_train_steps": 0.005056858062744141, "step": 569} +{"info/global_step": 570, "train_info/time_within_train_step": 27.716901779174805, "step": 570} +{"train_info/time_between_train_steps": 0.005858659744262695, "step": 570} +{"info/global_step": 571, "train_info/time_within_train_step": 27.714181661605835, "step": 571} +{"train_info/time_between_train_steps": 0.005194664001464844, "step": 571} +{"info/global_step": 572, "train_info/time_within_train_step": 27.715977430343628, "step": 572} +{"train_info/time_between_train_steps": 0.0051805973052978516, "step": 572} +{"info/global_step": 573, "train_info/time_within_train_step": 27.71845555305481, "step": 573} +{"train_info/time_between_train_steps": 0.005213737487792969, "step": 573} +{"info/global_step": 574, "train_info/time_within_train_step": 27.711536645889282, "step": 574} +{"train_info/time_between_train_steps": 0.005141019821166992, "step": 574} +{"info/global_step": 575, "train_info/time_within_train_step": 27.743488788604736, "step": 575} +{"train_info/time_between_train_steps": 0.005263090133666992, "step": 575} +{"info/global_step": 576, "train_info/time_within_train_step": 27.741001844406128, "step": 576} +{"train_info/time_between_train_steps": 0.006291389465332031, "step": 576} +{"info/global_step": 577, "train_info/time_within_train_step": 27.828858375549316, "step": 577} +{"train_info/time_between_train_steps": 0.005079746246337891, "step": 577} +{"info/global_step": 578, "train_info/time_within_train_step": 27.754822492599487, "step": 578} +{"train_info/time_between_train_steps": 0.005438327789306641, "step": 578} +{"info/global_step": 579, "train_info/time_within_train_step": 27.73582124710083, "step": 579} +{"train_info/time_between_train_steps": 0.0053768157958984375, "step": 579} +{"info/global_step": 580, "train_info/time_within_train_step": 27.72495698928833, "step": 580} +{"train_info/time_between_train_steps": 0.0057637691497802734, "step": 580} +{"train_info/time_between_train_steps": 27.94375252723694, "step": 580} +{"info/global_step": 581, "train_info/time_within_train_step": 27.719273805618286, "step": 581} +{"train_info/time_between_train_steps": 0.005018472671508789, "step": 581} +{"info/global_step": 582, "train_info/time_within_train_step": 27.862693548202515, "step": 582} +{"train_info/time_between_train_steps": 0.005279064178466797, "step": 582} +{"info/global_step": 583, "train_info/time_within_train_step": 27.724621534347534, "step": 583} +{"train_info/time_between_train_steps": 0.005468606948852539, "step": 583} +{"info/global_step": 584, "train_info/time_within_train_step": 27.845540523529053, "step": 584} +{"train_info/time_between_train_steps": 0.005416393280029297, "step": 584} +{"info/global_step": 585, "train_info/time_within_train_step": 27.738194465637207, "step": 585} +{"train_info/time_between_train_steps": 0.005914449691772461, "step": 585} +{"info/global_step": 586, "train_info/time_within_train_step": 27.944038152694702, "step": 586} +{"train_info/time_between_train_steps": 0.0051839351654052734, "step": 586} +{"info/global_step": 587, "train_info/time_within_train_step": 27.725898265838623, "step": 587} +{"train_info/time_between_train_steps": 0.005415916442871094, "step": 587} +{"info/global_step": 588, "train_info/time_within_train_step": 27.91740655899048, "step": 588} +{"train_info/time_between_train_steps": 0.00531315803527832, "step": 588} +{"info/global_step": 589, "train_info/time_within_train_step": 27.741572380065918, "step": 589} +{"train_info/time_between_train_steps": 0.005265712738037109, "step": 589} +{"info/global_step": 590, "train_info/time_within_train_step": 27.800381422042847, "step": 590} +{"train_info/time_between_train_steps": 0.010723590850830078, "step": 590} +{"info/global_step": 591, "train_info/time_within_train_step": 27.816794395446777, "step": 591} +{"train_info/time_between_train_steps": 0.005476474761962891, "step": 591} +{"info/global_step": 592, "train_info/time_within_train_step": 27.85547924041748, "step": 592} +{"train_info/time_between_train_steps": 0.007097721099853516, "step": 592} +{"info/global_step": 593, "train_info/time_within_train_step": 27.731393575668335, "step": 593} +{"train_info/time_between_train_steps": 0.005326271057128906, "step": 593} +{"info/global_step": 594, "train_info/time_within_train_step": 27.723424434661865, "step": 594} +{"train_info/time_between_train_steps": 0.0051381587982177734, "step": 594} +{"info/global_step": 595, "train_info/time_within_train_step": 27.72516107559204, "step": 595} +{"train_info/time_between_train_steps": 0.005187273025512695, "step": 595} +{"info/global_step": 596, "train_info/time_within_train_step": 27.728723764419556, "step": 596} +{"train_info/time_between_train_steps": 0.0052602291107177734, "step": 596} +{"info/global_step": 597, "train_info/time_within_train_step": 27.754172325134277, "step": 597} +{"train_info/time_between_train_steps": 0.005148172378540039, "step": 597} +{"info/global_step": 598, "train_info/time_within_train_step": 27.71892285346985, "step": 598} +{"train_info/time_between_train_steps": 0.005170583724975586, "step": 598} +{"info/global_step": 599, "train_info/time_within_train_step": 27.731056451797485, "step": 599} +{"train_info/time_between_train_steps": 0.0053365230560302734, "step": 599} +{"info/global_step": 600, "train_info/time_within_train_step": 28.112848043441772, "step": 600} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736120836, "_runtime": 17963}, "step": 600} +{"logs": {"train/loss": 4.4343, "train/learning_rate": 0.0003333333333333333, "train/epoch": 20.02, "_timestamp": 1736120836, "_runtime": 17963}, "step": 600} +{"train_info/time_between_train_steps": 142.6777684688568, "step": 600} +{"info/global_step": 601, "train_info/time_within_train_step": 27.85255742073059, "step": 601} +{"train_info/time_between_train_steps": 0.006209850311279297, "step": 601} +{"info/global_step": 602, "train_info/time_within_train_step": 27.861101150512695, "step": 602} +{"train_info/time_between_train_steps": 0.006055593490600586, "step": 602} +{"info/global_step": 603, "train_info/time_within_train_step": 27.82768440246582, "step": 603} +{"train_info/time_between_train_steps": 0.006422758102416992, "step": 603} +{"info/global_step": 604, "train_info/time_within_train_step": 27.850786209106445, "step": 604} +{"train_info/time_between_train_steps": 0.00603795051574707, "step": 604} +{"info/global_step": 605, "train_info/time_within_train_step": 27.984695196151733, "step": 605} +{"train_info/time_between_train_steps": 0.0064809322357177734, "step": 605} +{"info/global_step": 606, "train_info/time_within_train_step": 27.85109233856201, "step": 606} +{"train_info/time_between_train_steps": 0.006359577178955078, "step": 606} +{"info/global_step": 607, "train_info/time_within_train_step": 28.143773317337036, "step": 607} +{"train_info/time_between_train_steps": 0.006737947463989258, "step": 607} +{"info/global_step": 608, "train_info/time_within_train_step": 28.804208517074585, "step": 608} +{"train_info/time_between_train_steps": 0.0067522525787353516, "step": 608} +{"info/global_step": 609, "train_info/time_within_train_step": 32.85030651092529, "step": 609} +{"train_info/time_between_train_steps": 0.006979942321777344, "step": 609} +{"train_info/time_between_train_steps": 38.21834683418274, "step": 609} +{"info/global_step": 610, "train_info/time_within_train_step": 29.195664167404175, "step": 610} +{"train_info/time_between_train_steps": 0.005919456481933594, "step": 610} +{"info/global_step": 611, "train_info/time_within_train_step": 29.264215230941772, "step": 611} +{"train_info/time_between_train_steps": 0.0055201053619384766, "step": 611} +{"info/global_step": 612, "train_info/time_within_train_step": 27.787980794906616, "step": 612} +{"train_info/time_between_train_steps": 0.005965709686279297, "step": 612} +{"info/global_step": 613, "train_info/time_within_train_step": 27.94835877418518, "step": 613} +{"train_info/time_between_train_steps": 0.00573420524597168, "step": 613} +{"info/global_step": 614, "train_info/time_within_train_step": 27.77298903465271, "step": 614} +{"train_info/time_between_train_steps": 0.005986690521240234, "step": 614} +{"info/global_step": 615, "train_info/time_within_train_step": 27.959126710891724, "step": 615} +{"train_info/time_between_train_steps": 0.005472898483276367, "step": 615} +{"info/global_step": 616, "train_info/time_within_train_step": 27.8256573677063, "step": 616} +{"train_info/time_between_train_steps": 0.005622148513793945, "step": 616} +{"info/global_step": 617, "train_info/time_within_train_step": 28.004091024398804, "step": 617} +{"train_info/time_between_train_steps": 0.00936436653137207, "step": 617} +{"info/global_step": 618, "train_info/time_within_train_step": 27.81272530555725, "step": 618} +{"train_info/time_between_train_steps": 0.005671977996826172, "step": 618} +{"info/global_step": 619, "train_info/time_within_train_step": 27.835759162902832, "step": 619} +{"train_info/time_between_train_steps": 0.005513906478881836, "step": 619} +{"info/global_step": 620, "train_info/time_within_train_step": 27.767208099365234, "step": 620} +{"train_info/time_between_train_steps": 0.005176544189453125, "step": 620} +{"info/global_step": 621, "train_info/time_within_train_step": 27.73206353187561, "step": 621} +{"train_info/time_between_train_steps": 0.0050356388092041016, "step": 621} +{"info/global_step": 622, "train_info/time_within_train_step": 27.82620406150818, "step": 622} +{"train_info/time_between_train_steps": 0.005317211151123047, "step": 622} +{"info/global_step": 623, "train_info/time_within_train_step": 27.72441554069519, "step": 623} +{"train_info/time_between_train_steps": 0.005084037780761719, "step": 623} +{"info/global_step": 624, "train_info/time_within_train_step": 27.73674964904785, "step": 624} +{"train_info/time_between_train_steps": 0.0051729679107666016, "step": 624} +{"info/global_step": 625, "train_info/time_within_train_step": 27.722872734069824, "step": 625} +{"train_info/time_between_train_steps": 0.0050563812255859375, "step": 625} +{"info/global_step": 626, "train_info/time_within_train_step": 27.732398986816406, "step": 626} +{"train_info/time_between_train_steps": 0.0053272247314453125, "step": 626} +{"info/global_step": 627, "train_info/time_within_train_step": 27.736034870147705, "step": 627} +{"train_info/time_between_train_steps": 0.0053310394287109375, "step": 627} +{"info/global_step": 628, "train_info/time_within_train_step": 27.764740705490112, "step": 628} +{"train_info/time_between_train_steps": 0.005121946334838867, "step": 628} +{"info/global_step": 629, "train_info/time_within_train_step": 27.759228229522705, "step": 629} +{"train_info/time_between_train_steps": 0.005459785461425781, "step": 629} +{"info/global_step": 630, "train_info/time_within_train_step": 27.75415325164795, "step": 630} +{"train_info/time_between_train_steps": 0.005202531814575195, "step": 630} +{"info/global_step": 631, "train_info/time_within_train_step": 27.739470720291138, "step": 631} +{"train_info/time_between_train_steps": 0.005250215530395508, "step": 631} +{"info/global_step": 632, "train_info/time_within_train_step": 27.73867678642273, "step": 632} +{"train_info/time_between_train_steps": 0.005132913589477539, "step": 632} +{"info/global_step": 633, "train_info/time_within_train_step": 27.969133138656616, "step": 633} +{"train_info/time_between_train_steps": 0.00518035888671875, "step": 633} +{"info/global_step": 634, "train_info/time_within_train_step": 27.748859643936157, "step": 634} +{"train_info/time_between_train_steps": 0.00536346435546875, "step": 634} +{"info/global_step": 635, "train_info/time_within_train_step": 27.814394235610962, "step": 635} +{"train_info/time_between_train_steps": 0.0052225589752197266, "step": 635} +{"info/global_step": 636, "train_info/time_within_train_step": 27.764273405075073, "step": 636} +{"train_info/time_between_train_steps": 0.010768890380859375, "step": 636} +{"info/global_step": 637, "train_info/time_within_train_step": 28.006014347076416, "step": 637} +{"train_info/time_between_train_steps": 0.0056574344635009766, "step": 637} +{"info/global_step": 638, "train_info/time_within_train_step": 27.85815191268921, "step": 638} +{"train_info/time_between_train_steps": 0.011345148086547852, "step": 638} +{"train_info/time_between_train_steps": 28.27283549308777, "step": 638} +{"info/global_step": 639, "train_info/time_within_train_step": 27.74469518661499, "step": 639} +{"train_info/time_between_train_steps": 0.00493168830871582, "step": 639} +{"info/global_step": 640, "train_info/time_within_train_step": 27.888309717178345, "step": 640} +{"train_info/time_between_train_steps": 0.005257844924926758, "step": 640} +{"info/global_step": 641, "train_info/time_within_train_step": 27.742627143859863, "step": 641} +{"train_info/time_between_train_steps": 0.005463361740112305, "step": 641} +{"info/global_step": 642, "train_info/time_within_train_step": 27.85700488090515, "step": 642} +{"train_info/time_between_train_steps": 0.005524396896362305, "step": 642} +{"info/global_step": 643, "train_info/time_within_train_step": 27.745611906051636, "step": 643} +{"train_info/time_between_train_steps": 0.0053479671478271484, "step": 643} +{"info/global_step": 644, "train_info/time_within_train_step": 27.885743618011475, "step": 644} +{"train_info/time_between_train_steps": 0.0053558349609375, "step": 644} +{"info/global_step": 645, "train_info/time_within_train_step": 27.75915837287903, "step": 645} +{"train_info/time_between_train_steps": 0.0053751468658447266, "step": 645} +{"info/global_step": 646, "train_info/time_within_train_step": 27.93995976448059, "step": 646} +{"train_info/time_between_train_steps": 0.005351066589355469, "step": 646} +{"info/global_step": 647, "train_info/time_within_train_step": 27.824480056762695, "step": 647} +{"train_info/time_between_train_steps": 0.005238056182861328, "step": 647} +{"info/global_step": 648, "train_info/time_within_train_step": 27.811227798461914, "step": 648} +{"train_info/time_between_train_steps": 0.005394935607910156, "step": 648} +{"info/global_step": 649, "train_info/time_within_train_step": 27.739330291748047, "step": 649} +{"train_info/time_between_train_steps": 0.005154132843017578, "step": 649} +{"info/global_step": 650, "train_info/time_within_train_step": 27.718706607818604, "step": 650} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736122449, "_runtime": 19576}, "step": 650} +{"logs": {"train/loss": 4.2938, "train/learning_rate": 0.00030555555555555555, "train/epoch": 22.01, "_timestamp": 1736122449, "_runtime": 19576}, "step": 650} +{"train_info/time_between_train_steps": 0.02640843391418457, "step": 650} +{"info/global_step": 651, "train_info/time_within_train_step": 27.722339868545532, "step": 651} +{"train_info/time_between_train_steps": 0.005350589752197266, "step": 651} +{"info/global_step": 652, "train_info/time_within_train_step": 27.717827320098877, "step": 652} +{"train_info/time_between_train_steps": 0.005117654800415039, "step": 652} +{"info/global_step": 653, "train_info/time_within_train_step": 27.82704782485962, "step": 653} +{"train_info/time_between_train_steps": 0.005197286605834961, "step": 653} +{"info/global_step": 654, "train_info/time_within_train_step": 27.73149871826172, "step": 654} +{"train_info/time_between_train_steps": 0.005026578903198242, "step": 654} +{"info/global_step": 655, "train_info/time_within_train_step": 27.73880934715271, "step": 655} +{"train_info/time_between_train_steps": 0.005066394805908203, "step": 655} +{"info/global_step": 656, "train_info/time_within_train_step": 27.76718235015869, "step": 656} +{"train_info/time_between_train_steps": 0.00519251823425293, "step": 656} +{"info/global_step": 657, "train_info/time_within_train_step": 27.736141204833984, "step": 657} +{"train_info/time_between_train_steps": 0.005210399627685547, "step": 657} +{"info/global_step": 658, "train_info/time_within_train_step": 27.742504596710205, "step": 658} +{"train_info/time_between_train_steps": 0.0051419734954833984, "step": 658} +{"info/global_step": 659, "train_info/time_within_train_step": 27.81054449081421, "step": 659} +{"train_info/time_between_train_steps": 0.00520777702331543, "step": 659} +{"info/global_step": 660, "train_info/time_within_train_step": 27.77346634864807, "step": 660} +{"train_info/time_between_train_steps": 0.009726762771606445, "step": 660} +{"info/global_step": 661, "train_info/time_within_train_step": 27.748283863067627, "step": 661} +{"train_info/time_between_train_steps": 0.005101203918457031, "step": 661} +{"info/global_step": 662, "train_info/time_within_train_step": 27.765948057174683, "step": 662} +{"train_info/time_between_train_steps": 0.009798526763916016, "step": 662} +{"info/global_step": 663, "train_info/time_within_train_step": 27.743321895599365, "step": 663} +{"train_info/time_between_train_steps": 0.0052449703216552734, "step": 663} +{"info/global_step": 664, "train_info/time_within_train_step": 27.75096082687378, "step": 664} +{"train_info/time_between_train_steps": 0.009660005569458008, "step": 664} +{"info/global_step": 665, "train_info/time_within_train_step": 27.76267409324646, "step": 665} +{"train_info/time_between_train_steps": 0.005394458770751953, "step": 665} +{"info/global_step": 666, "train_info/time_within_train_step": 27.749244451522827, "step": 666} +{"train_info/time_between_train_steps": 0.0056972503662109375, "step": 666} +{"info/global_step": 667, "train_info/time_within_train_step": 28.094141006469727, "step": 667} +{"train_info/time_between_train_steps": 0.005741596221923828, "step": 667} +{"train_info/time_between_train_steps": 27.88044762611389, "step": 667} +{"info/global_step": 668, "train_info/time_within_train_step": 27.919629335403442, "step": 668} +{"train_info/time_between_train_steps": 0.005414485931396484, "step": 668} +{"info/global_step": 669, "train_info/time_within_train_step": 27.89409637451172, "step": 669} +{"train_info/time_between_train_steps": 0.014590978622436523, "step": 669} +{"info/global_step": 670, "train_info/time_within_train_step": 27.762561798095703, "step": 670} +{"train_info/time_between_train_steps": 0.005349874496459961, "step": 670} +{"info/global_step": 671, "train_info/time_within_train_step": 28.010265827178955, "step": 671} +{"train_info/time_between_train_steps": 0.005465984344482422, "step": 671} +{"info/global_step": 672, "train_info/time_within_train_step": 27.741317987442017, "step": 672} +{"train_info/time_between_train_steps": 0.005445957183837891, "step": 672} +{"info/global_step": 673, "train_info/time_within_train_step": 27.914186239242554, "step": 673} +{"train_info/time_between_train_steps": 0.010167360305786133, "step": 673} +{"info/global_step": 674, "train_info/time_within_train_step": 27.752678155899048, "step": 674} +{"train_info/time_between_train_steps": 0.005814790725708008, "step": 674} +{"info/global_step": 675, "train_info/time_within_train_step": 27.862817525863647, "step": 675} +{"train_info/time_between_train_steps": 0.005547285079956055, "step": 675} +{"info/global_step": 676, "train_info/time_within_train_step": 27.827616453170776, "step": 676} +{"train_info/time_between_train_steps": 0.01597428321838379, "step": 676} +{"info/global_step": 677, "train_info/time_within_train_step": 27.944106578826904, "step": 677} +{"train_info/time_between_train_steps": 0.006154298782348633, "step": 677} +{"info/global_step": 678, "train_info/time_within_train_step": 27.874382734298706, "step": 678} +{"train_info/time_between_train_steps": 0.014982938766479492, "step": 678} +{"info/global_step": 679, "train_info/time_within_train_step": 27.80135488510132, "step": 679} +{"train_info/time_between_train_steps": 0.005231618881225586, "step": 679} +{"info/global_step": 680, "train_info/time_within_train_step": 27.753684043884277, "step": 680} +{"train_info/time_between_train_steps": 0.005434274673461914, "step": 680} +{"info/global_step": 681, "train_info/time_within_train_step": 27.749427795410156, "step": 681} +{"train_info/time_between_train_steps": 0.015141010284423828, "step": 681} +{"info/global_step": 682, "train_info/time_within_train_step": 27.916401386260986, "step": 682} +{"train_info/time_between_train_steps": 0.009945392608642578, "step": 682} +{"info/global_step": 683, "train_info/time_within_train_step": 27.93022847175598, "step": 683} +{"train_info/time_between_train_steps": 0.0051958560943603516, "step": 683} +{"info/global_step": 684, "train_info/time_within_train_step": 27.777112245559692, "step": 684} +{"train_info/time_between_train_steps": 0.005283355712890625, "step": 684} +{"info/global_step": 685, "train_info/time_within_train_step": 27.762930870056152, "step": 685} +{"train_info/time_between_train_steps": 0.009886026382446289, "step": 685} +{"info/global_step": 686, "train_info/time_within_train_step": 27.927576780319214, "step": 686} +{"train_info/time_between_train_steps": 0.009663820266723633, "step": 686} +{"info/global_step": 687, "train_info/time_within_train_step": 27.900919198989868, "step": 687} +{"train_info/time_between_train_steps": 0.005406379699707031, "step": 687} +{"info/global_step": 688, "train_info/time_within_train_step": 27.84220790863037, "step": 688} +{"train_info/time_between_train_steps": 0.005205631256103516, "step": 688} +{"info/global_step": 689, "train_info/time_within_train_step": 27.732922792434692, "step": 689} +{"train_info/time_between_train_steps": 0.0052509307861328125, "step": 689} +{"info/global_step": 690, "train_info/time_within_train_step": 27.74621295928955, "step": 690} +{"train_info/time_between_train_steps": 0.005252361297607422, "step": 690} +{"info/global_step": 691, "train_info/time_within_train_step": 27.72623085975647, "step": 691} +{"train_info/time_between_train_steps": 0.005315303802490234, "step": 691} +{"info/global_step": 692, "train_info/time_within_train_step": 27.733689308166504, "step": 692} +{"train_info/time_between_train_steps": 0.005388498306274414, "step": 692} +{"info/global_step": 693, "train_info/time_within_train_step": 27.766822814941406, "step": 693} +{"train_info/time_between_train_steps": 0.005337715148925781, "step": 693} +{"info/global_step": 694, "train_info/time_within_train_step": 27.7941837310791, "step": 694} +{"train_info/time_between_train_steps": 0.005216360092163086, "step": 694} +{"info/global_step": 695, "train_info/time_within_train_step": 27.769726276397705, "step": 695} +{"train_info/time_between_train_steps": 0.005624055862426758, "step": 695} +{"info/global_step": 696, "train_info/time_within_train_step": 27.749473333358765, "step": 696} +{"train_info/time_between_train_steps": 0.005768299102783203, "step": 696} +{"train_info/time_between_train_steps": 28.117247104644775, "step": 696} +{"info/global_step": 697, "train_info/time_within_train_step": 27.731985569000244, "step": 697} +{"train_info/time_between_train_steps": 0.005416393280029297, "step": 697} +{"info/global_step": 698, "train_info/time_within_train_step": 27.976939916610718, "step": 698} +{"train_info/time_between_train_steps": 0.0053386688232421875, "step": 698} +{"info/global_step": 699, "train_info/time_within_train_step": 27.722643852233887, "step": 699} +{"train_info/time_between_train_steps": 0.006315708160400391, "step": 699} +{"info/global_step": 700, "train_info/time_within_train_step": 28.31178569793701, "step": 700} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736123898, "_runtime": 21025}, "step": 700} +{"logs": {"train/loss": 4.1808, "train/learning_rate": 0.0002777777777777778, "train/epoch": 24.0, "_timestamp": 1736123898, "_runtime": 21025}, "step": 700} +{"train_info/time_between_train_steps": 68.73479437828064, "step": 700} +{"info/global_step": 701, "train_info/time_within_train_step": 27.845584630966187, "step": 701} +{"train_info/time_between_train_steps": 0.0068972110748291016, "step": 701} +{"info/global_step": 702, "train_info/time_within_train_step": 28.033198356628418, "step": 702} +{"train_info/time_between_train_steps": 0.00689244270324707, "step": 702} +{"info/global_step": 703, "train_info/time_within_train_step": 27.85380220413208, "step": 703} +{"train_info/time_between_train_steps": 0.007061958312988281, "step": 703} +{"info/global_step": 704, "train_info/time_within_train_step": 28.53270173072815, "step": 704} +{"train_info/time_between_train_steps": 0.007687807083129883, "step": 704} +{"info/global_step": 705, "train_info/time_within_train_step": 28.004806756973267, "step": 705} +{"train_info/time_between_train_steps": 0.006070613861083984, "step": 705} +{"info/global_step": 706, "train_info/time_within_train_step": 28.72077202796936, "step": 706} +{"train_info/time_between_train_steps": 0.017410993576049805, "step": 706} +{"info/global_step": 707, "train_info/time_within_train_step": 28.030346155166626, "step": 707} +{"train_info/time_between_train_steps": 0.005824089050292969, "step": 707} +{"info/global_step": 708, "train_info/time_within_train_step": 27.82640242576599, "step": 708} +{"train_info/time_between_train_steps": 0.005624055862426758, "step": 708} +{"info/global_step": 709, "train_info/time_within_train_step": 27.86792540550232, "step": 709} +{"train_info/time_between_train_steps": 0.005745649337768555, "step": 709} +{"info/global_step": 710, "train_info/time_within_train_step": 27.80595374107361, "step": 710} +{"train_info/time_between_train_steps": 0.005454540252685547, "step": 710} +{"info/global_step": 711, "train_info/time_within_train_step": 27.824957370758057, "step": 711} +{"train_info/time_between_train_steps": 0.010544776916503906, "step": 711} +{"info/global_step": 712, "train_info/time_within_train_step": 27.920849084854126, "step": 712} +{"train_info/time_between_train_steps": 0.005536556243896484, "step": 712} +{"info/global_step": 713, "train_info/time_within_train_step": 27.90238642692566, "step": 713} +{"train_info/time_between_train_steps": 0.005556344985961914, "step": 713} +{"info/global_step": 714, "train_info/time_within_train_step": 27.805557250976562, "step": 714} +{"train_info/time_between_train_steps": 0.0055773258209228516, "step": 714} +{"info/global_step": 715, "train_info/time_within_train_step": 27.76563858985901, "step": 715} +{"train_info/time_between_train_steps": 0.005684375762939453, "step": 715} +{"info/global_step": 716, "train_info/time_within_train_step": 27.797173500061035, "step": 716} +{"train_info/time_between_train_steps": 0.005888938903808594, "step": 716} +{"info/global_step": 717, "train_info/time_within_train_step": 27.79113245010376, "step": 717} +{"train_info/time_between_train_steps": 0.006009340286254883, "step": 717} +{"info/global_step": 718, "train_info/time_within_train_step": 27.834242582321167, "step": 718} +{"train_info/time_between_train_steps": 0.00551605224609375, "step": 718} +{"info/global_step": 719, "train_info/time_within_train_step": 27.810641050338745, "step": 719} +{"train_info/time_between_train_steps": 0.006258964538574219, "step": 719} +{"info/global_step": 720, "train_info/time_within_train_step": 27.775606870651245, "step": 720} +{"train_info/time_between_train_steps": 0.006377220153808594, "step": 720} +{"info/global_step": 721, "train_info/time_within_train_step": 27.829561948776245, "step": 721} +{"train_info/time_between_train_steps": 0.0064029693603515625, "step": 721} +{"info/global_step": 722, "train_info/time_within_train_step": 27.81445026397705, "step": 722} +{"train_info/time_between_train_steps": 0.006234884262084961, "step": 722} +{"info/global_step": 723, "train_info/time_within_train_step": 27.80909252166748, "step": 723} +{"train_info/time_between_train_steps": 0.006424427032470703, "step": 723} +{"info/global_step": 724, "train_info/time_within_train_step": 27.824000120162964, "step": 724} +{"train_info/time_between_train_steps": 0.006171464920043945, "step": 724} +{"info/global_step": 725, "train_info/time_within_train_step": 27.81815457344055, "step": 725} +{"train_info/time_between_train_steps": 0.00668787956237793, "step": 725} +{"train_info/time_between_train_steps": 28.476121187210083, "step": 725} +{"info/global_step": 726, "train_info/time_within_train_step": 27.784157514572144, "step": 726} +{"train_info/time_between_train_steps": 0.00604557991027832, "step": 726} +{"info/global_step": 727, "train_info/time_within_train_step": 27.960570335388184, "step": 727} +{"train_info/time_between_train_steps": 0.00782465934753418, "step": 727} +{"info/global_step": 728, "train_info/time_within_train_step": 27.886006593704224, "step": 728} +{"train_info/time_between_train_steps": 0.005671977996826172, "step": 728} +{"info/global_step": 729, "train_info/time_within_train_step": 27.971362113952637, "step": 729} +{"train_info/time_between_train_steps": 0.006026744842529297, "step": 729} +{"info/global_step": 730, "train_info/time_within_train_step": 27.966532468795776, "step": 730} +{"train_info/time_between_train_steps": 0.00619196891784668, "step": 730} +{"info/global_step": 731, "train_info/time_within_train_step": 27.94818353652954, "step": 731} +{"train_info/time_between_train_steps": 0.00591731071472168, "step": 731} +{"info/global_step": 732, "train_info/time_within_train_step": 27.770686149597168, "step": 732} +{"train_info/time_between_train_steps": 0.006337881088256836, "step": 732} +{"info/global_step": 733, "train_info/time_within_train_step": 27.958037853240967, "step": 733} +{"train_info/time_between_train_steps": 0.005804777145385742, "step": 733} +{"info/global_step": 734, "train_info/time_within_train_step": 27.899795293807983, "step": 734} +{"train_info/time_between_train_steps": 0.0060274600982666016, "step": 734} +{"info/global_step": 735, "train_info/time_within_train_step": 27.85439157485962, "step": 735} +{"train_info/time_between_train_steps": 0.0054073333740234375, "step": 735} +{"info/global_step": 736, "train_info/time_within_train_step": 27.7592933177948, "step": 736} +{"train_info/time_between_train_steps": 0.005285978317260742, "step": 736} +{"info/global_step": 737, "train_info/time_within_train_step": 27.741132497787476, "step": 737} +{"train_info/time_between_train_steps": 0.005156755447387695, "step": 737} +{"info/global_step": 738, "train_info/time_within_train_step": 27.72325897216797, "step": 738} +{"train_info/time_between_train_steps": 0.005403995513916016, "step": 738} +{"info/global_step": 739, "train_info/time_within_train_step": 27.726786136627197, "step": 739} +{"train_info/time_between_train_steps": 0.005252361297607422, "step": 739} +{"info/global_step": 740, "train_info/time_within_train_step": 27.73082447052002, "step": 740} +{"train_info/time_between_train_steps": 0.005381107330322266, "step": 740} +{"info/global_step": 741, "train_info/time_within_train_step": 27.726539611816406, "step": 741} +{"train_info/time_between_train_steps": 0.0053369998931884766, "step": 741} +{"info/global_step": 742, "train_info/time_within_train_step": 27.72769594192505, "step": 742} +{"train_info/time_between_train_steps": 0.005219697952270508, "step": 742} +{"info/global_step": 743, "train_info/time_within_train_step": 27.731126308441162, "step": 743} +{"train_info/time_between_train_steps": 0.005364894866943359, "step": 743} +{"info/global_step": 744, "train_info/time_within_train_step": 27.83449912071228, "step": 744} +{"train_info/time_between_train_steps": 0.005207061767578125, "step": 744} +{"info/global_step": 745, "train_info/time_within_train_step": 27.716912508010864, "step": 745} +{"train_info/time_between_train_steps": 0.005289793014526367, "step": 745} +{"info/global_step": 746, "train_info/time_within_train_step": 27.72610306739807, "step": 746} +{"train_info/time_between_train_steps": 0.005425214767456055, "step": 746} +{"info/global_step": 747, "train_info/time_within_train_step": 27.727310180664062, "step": 747} +{"train_info/time_between_train_steps": 0.005225181579589844, "step": 747} +{"info/global_step": 748, "train_info/time_within_train_step": 27.729180574417114, "step": 748} +{"train_info/time_between_train_steps": 0.005263328552246094, "step": 748} +{"info/global_step": 749, "train_info/time_within_train_step": 27.723405361175537, "step": 749} +{"train_info/time_between_train_steps": 0.00511932373046875, "step": 749} +{"info/global_step": 750, "train_info/time_within_train_step": 27.73189949989319, "step": 750} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736125391, "_runtime": 22518}, "step": 750} +{"logs": {"train/loss": 4.0149, "train/learning_rate": 0.00025, "train/epoch": 25.02, "_timestamp": 1736125391, "_runtime": 22518}, "step": 750} +{"train_info/time_between_train_steps": 0.026511192321777344, "step": 750} +{"info/global_step": 751, "train_info/time_within_train_step": 27.736812353134155, "step": 751} +{"train_info/time_between_train_steps": 0.005465269088745117, "step": 751} +{"info/global_step": 752, "train_info/time_within_train_step": 27.914124488830566, "step": 752} +{"train_info/time_between_train_steps": 0.005376577377319336, "step": 752} +{"info/global_step": 753, "train_info/time_within_train_step": 27.745426654815674, "step": 753} +{"train_info/time_between_train_steps": 0.006898403167724609, "step": 753} +{"info/global_step": 754, "train_info/time_within_train_step": 27.751660108566284, "step": 754} +{"train_info/time_between_train_steps": 0.005768537521362305, "step": 754} +{"train_info/time_between_train_steps": 28.347161293029785, "step": 754} +{"info/global_step": 755, "train_info/time_within_train_step": 27.776426076889038, "step": 755} +{"train_info/time_between_train_steps": 0.005795717239379883, "step": 755} +{"info/global_step": 756, "train_info/time_within_train_step": 27.94241213798523, "step": 756} +{"train_info/time_between_train_steps": 0.005465269088745117, "step": 756} +{"info/global_step": 757, "train_info/time_within_train_step": 27.76442050933838, "step": 757} +{"train_info/time_between_train_steps": 0.0058209896087646484, "step": 757} +{"info/global_step": 758, "train_info/time_within_train_step": 27.93516731262207, "step": 758} +{"train_info/time_between_train_steps": 0.0055582523345947266, "step": 758} +{"info/global_step": 759, "train_info/time_within_train_step": 27.854877948760986, "step": 759} +{"train_info/time_between_train_steps": 0.00552821159362793, "step": 759} +{"info/global_step": 760, "train_info/time_within_train_step": 27.95793318748474, "step": 760} +{"train_info/time_between_train_steps": 0.005631923675537109, "step": 760} +{"info/global_step": 761, "train_info/time_within_train_step": 27.753573417663574, "step": 761} +{"train_info/time_between_train_steps": 0.005682706832885742, "step": 761} +{"info/global_step": 762, "train_info/time_within_train_step": 27.91869807243347, "step": 762} +{"train_info/time_between_train_steps": 0.005279064178466797, "step": 762} +{"info/global_step": 763, "train_info/time_within_train_step": 27.74924087524414, "step": 763} +{"train_info/time_between_train_steps": 0.005758047103881836, "step": 763} +{"info/global_step": 764, "train_info/time_within_train_step": 27.808910846710205, "step": 764} +{"train_info/time_between_train_steps": 0.005817890167236328, "step": 764} +{"info/global_step": 765, "train_info/time_within_train_step": 27.736297607421875, "step": 765} +{"train_info/time_between_train_steps": 0.005167961120605469, "step": 765} +{"info/global_step": 766, "train_info/time_within_train_step": 27.723625898361206, "step": 766} +{"train_info/time_between_train_steps": 0.0051496028900146484, "step": 766} +{"info/global_step": 767, "train_info/time_within_train_step": 27.718809843063354, "step": 767} +{"train_info/time_between_train_steps": 0.005294322967529297, "step": 767} +{"info/global_step": 768, "train_info/time_within_train_step": 27.722039222717285, "step": 768} +{"train_info/time_between_train_steps": 0.005211591720581055, "step": 768} +{"info/global_step": 769, "train_info/time_within_train_step": 27.722039461135864, "step": 769} +{"train_info/time_between_train_steps": 0.0052032470703125, "step": 769} +{"info/global_step": 770, "train_info/time_within_train_step": 27.722405672073364, "step": 770} +{"train_info/time_between_train_steps": 0.005194187164306641, "step": 770} +{"info/global_step": 771, "train_info/time_within_train_step": 27.72310447692871, "step": 771} +{"train_info/time_between_train_steps": 0.005282163619995117, "step": 771} +{"info/global_step": 772, "train_info/time_within_train_step": 27.728424310684204, "step": 772} +{"train_info/time_between_train_steps": 0.005158424377441406, "step": 772} +{"info/global_step": 773, "train_info/time_within_train_step": 27.751792907714844, "step": 773} +{"train_info/time_between_train_steps": 0.005392789840698242, "step": 773} +{"info/global_step": 774, "train_info/time_within_train_step": 27.83238196372986, "step": 774} +{"train_info/time_between_train_steps": 0.0051631927490234375, "step": 774} +{"info/global_step": 775, "train_info/time_within_train_step": 27.74286198616028, "step": 775} +{"train_info/time_between_train_steps": 0.006189107894897461, "step": 775} +{"info/global_step": 776, "train_info/time_within_train_step": 27.7366783618927, "step": 776} +{"train_info/time_between_train_steps": 0.005152225494384766, "step": 776} +{"info/global_step": 777, "train_info/time_within_train_step": 27.75180149078369, "step": 777} +{"train_info/time_between_train_steps": 0.0056650638580322266, "step": 777} +{"info/global_step": 778, "train_info/time_within_train_step": 27.732601165771484, "step": 778} +{"train_info/time_between_train_steps": 0.0053136348724365234, "step": 778} +{"info/global_step": 779, "train_info/time_within_train_step": 27.723783254623413, "step": 779} +{"train_info/time_between_train_steps": 0.00561213493347168, "step": 779} +{"info/global_step": 780, "train_info/time_within_train_step": 27.795050859451294, "step": 780} +{"train_info/time_between_train_steps": 0.0051021575927734375, "step": 780} +{"info/global_step": 781, "train_info/time_within_train_step": 27.73794984817505, "step": 781} +{"train_info/time_between_train_steps": 0.005213022232055664, "step": 781} +{"info/global_step": 782, "train_info/time_within_train_step": 27.743887901306152, "step": 782} +{"train_info/time_between_train_steps": 0.005682468414306641, "step": 782} +{"info/global_step": 783, "train_info/time_within_train_step": 27.734119653701782, "step": 783} +{"train_info/time_between_train_steps": 0.005616903305053711, "step": 783} +{"train_info/time_between_train_steps": 27.928677320480347, "step": 783} +{"info/global_step": 784, "train_info/time_within_train_step": 27.715904712677002, "step": 784} +{"train_info/time_between_train_steps": 0.005564689636230469, "step": 784} +{"info/global_step": 785, "train_info/time_within_train_step": 27.82736611366272, "step": 785} +{"train_info/time_between_train_steps": 0.005070209503173828, "step": 785} +{"info/global_step": 786, "train_info/time_within_train_step": 27.72585129737854, "step": 786} +{"train_info/time_between_train_steps": 0.005472421646118164, "step": 786} +{"info/global_step": 787, "train_info/time_within_train_step": 27.87717843055725, "step": 787} +{"train_info/time_between_train_steps": 0.005353212356567383, "step": 787} +{"info/global_step": 788, "train_info/time_within_train_step": 27.75053381919861, "step": 788} +{"train_info/time_between_train_steps": 0.005304098129272461, "step": 788} +{"info/global_step": 789, "train_info/time_within_train_step": 28.001919984817505, "step": 789} +{"train_info/time_between_train_steps": 0.005426645278930664, "step": 789} +{"info/global_step": 790, "train_info/time_within_train_step": 27.905392169952393, "step": 790} +{"train_info/time_between_train_steps": 0.005222320556640625, "step": 790} +{"info/global_step": 791, "train_info/time_within_train_step": 27.892054557800293, "step": 791} +{"train_info/time_between_train_steps": 0.005271434783935547, "step": 791} +{"info/global_step": 792, "train_info/time_within_train_step": 27.75130581855774, "step": 792} +{"train_info/time_between_train_steps": 0.005381107330322266, "step": 792} +{"info/global_step": 793, "train_info/time_within_train_step": 27.766241788864136, "step": 793} +{"train_info/time_between_train_steps": 0.007403373718261719, "step": 793} +{"info/global_step": 794, "train_info/time_within_train_step": 27.727052927017212, "step": 794} +{"train_info/time_between_train_steps": 0.005098104476928711, "step": 794} +{"info/global_step": 795, "train_info/time_within_train_step": 27.72290825843811, "step": 795} +{"train_info/time_between_train_steps": 0.005099058151245117, "step": 795} +{"info/global_step": 796, "train_info/time_within_train_step": 27.726658821105957, "step": 796} +{"train_info/time_between_train_steps": 0.0053234100341796875, "step": 796} +{"info/global_step": 797, "train_info/time_within_train_step": 27.724015951156616, "step": 797} +{"train_info/time_between_train_steps": 0.005824565887451172, "step": 797} +{"info/global_step": 798, "train_info/time_within_train_step": 27.726475715637207, "step": 798} +{"train_info/time_between_train_steps": 0.005078792572021484, "step": 798} +{"info/global_step": 799, "train_info/time_within_train_step": 27.729161262512207, "step": 799} +{"train_info/time_between_train_steps": 0.005078792572021484, "step": 799} +{"info/global_step": 800, "train_info/time_within_train_step": 27.815271139144897, "step": 800} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736126838, "_runtime": 23965}, "step": 800} +{"logs": {"train/loss": 4.0023, "train/learning_rate": 0.00022222222222222218, "train/epoch": 27.01, "_timestamp": 1736126838, "_runtime": 23965}, "step": 800} +{"train_info/time_between_train_steps": 164.04722619056702, "step": 800} +{"info/global_step": 801, "train_info/time_within_train_step": 28.546869039535522, "step": 801} +{"train_info/time_between_train_steps": 0.005223989486694336, "step": 801} +{"info/global_step": 802, "train_info/time_within_train_step": 28.48243546485901, "step": 802} +{"train_info/time_between_train_steps": 0.0052661895751953125, "step": 802} +{"info/global_step": 803, "train_info/time_within_train_step": 27.717870712280273, "step": 803} +{"train_info/time_between_train_steps": 0.005225658416748047, "step": 803} +{"info/global_step": 804, "train_info/time_within_train_step": 27.82937479019165, "step": 804} +{"train_info/time_between_train_steps": 0.005139350891113281, "step": 804} +{"info/global_step": 805, "train_info/time_within_train_step": 27.86553645133972, "step": 805} +{"train_info/time_between_train_steps": 0.010075807571411133, "step": 805} +{"info/global_step": 806, "train_info/time_within_train_step": 27.724897146224976, "step": 806} +{"train_info/time_between_train_steps": 0.0052416324615478516, "step": 806} +{"info/global_step": 807, "train_info/time_within_train_step": 27.71951413154602, "step": 807} +{"train_info/time_between_train_steps": 0.0049686431884765625, "step": 807} +{"info/global_step": 808, "train_info/time_within_train_step": 27.728168964385986, "step": 808} +{"train_info/time_between_train_steps": 0.005343914031982422, "step": 808} +{"info/global_step": 809, "train_info/time_within_train_step": 27.743237733840942, "step": 809} +{"train_info/time_between_train_steps": 0.00504612922668457, "step": 809} +{"info/global_step": 810, "train_info/time_within_train_step": 27.73880672454834, "step": 810} +{"train_info/time_between_train_steps": 0.005042076110839844, "step": 810} +{"info/global_step": 811, "train_info/time_within_train_step": 27.737317085266113, "step": 811} +{"train_info/time_between_train_steps": 0.00546717643737793, "step": 811} +{"info/global_step": 812, "train_info/time_within_train_step": 27.73478627204895, "step": 812} +{"train_info/time_between_train_steps": 0.005724906921386719, "step": 812} +{"train_info/time_between_train_steps": 28.150935649871826, "step": 812} +{"info/global_step": 813, "train_info/time_within_train_step": 27.72412419319153, "step": 813} +{"train_info/time_between_train_steps": 0.0049016475677490234, "step": 813} +{"info/global_step": 814, "train_info/time_within_train_step": 27.855002641677856, "step": 814} +{"train_info/time_between_train_steps": 0.005129098892211914, "step": 814} +{"info/global_step": 815, "train_info/time_within_train_step": 27.760403633117676, "step": 815} +{"train_info/time_between_train_steps": 0.005409955978393555, "step": 815} +{"info/global_step": 816, "train_info/time_within_train_step": 27.85553503036499, "step": 816} +{"train_info/time_between_train_steps": 0.005338191986083984, "step": 816} +{"info/global_step": 817, "train_info/time_within_train_step": 27.758752822875977, "step": 817} +{"train_info/time_between_train_steps": 0.010178327560424805, "step": 817} +{"info/global_step": 818, "train_info/time_within_train_step": 28.08501887321472, "step": 818} +{"train_info/time_between_train_steps": 0.005588054656982422, "step": 818} +{"info/global_step": 819, "train_info/time_within_train_step": 27.81386160850525, "step": 819} +{"train_info/time_between_train_steps": 0.005208015441894531, "step": 819} +{"info/global_step": 820, "train_info/time_within_train_step": 27.88863778114319, "step": 820} +{"train_info/time_between_train_steps": 0.005220174789428711, "step": 820} +{"info/global_step": 821, "train_info/time_within_train_step": 27.74039578437805, "step": 821} +{"train_info/time_between_train_steps": 0.005089282989501953, "step": 821} +{"info/global_step": 822, "train_info/time_within_train_step": 27.82820987701416, "step": 822} +{"train_info/time_between_train_steps": 0.00543522834777832, "step": 822} +{"info/global_step": 823, "train_info/time_within_train_step": 27.735331535339355, "step": 823} +{"train_info/time_between_train_steps": 0.005126237869262695, "step": 823} +{"info/global_step": 824, "train_info/time_within_train_step": 27.717377185821533, "step": 824} +{"train_info/time_between_train_steps": 0.005030155181884766, "step": 824} +{"info/global_step": 825, "train_info/time_within_train_step": 27.72901725769043, "step": 825} +{"train_info/time_between_train_steps": 0.005190134048461914, "step": 825} +{"info/global_step": 826, "train_info/time_within_train_step": 27.709188222885132, "step": 826} +{"train_info/time_between_train_steps": 0.00500178337097168, "step": 826} +{"info/global_step": 827, "train_info/time_within_train_step": 27.71910333633423, "step": 827} +{"train_info/time_between_train_steps": 0.0051136016845703125, "step": 827} +{"info/global_step": 828, "train_info/time_within_train_step": 27.716537714004517, "step": 828} +{"train_info/time_between_train_steps": 0.0052127838134765625, "step": 828} +{"info/global_step": 829, "train_info/time_within_train_step": 27.72495436668396, "step": 829} +{"train_info/time_between_train_steps": 0.005083560943603516, "step": 829} +{"info/global_step": 830, "train_info/time_within_train_step": 27.71476912498474, "step": 830} +{"train_info/time_between_train_steps": 0.005265235900878906, "step": 830} +{"info/global_step": 831, "train_info/time_within_train_step": 27.71377444267273, "step": 831} +{"train_info/time_between_train_steps": 0.005093812942504883, "step": 831} +{"info/global_step": 832, "train_info/time_within_train_step": 27.71559166908264, "step": 832} +{"train_info/time_between_train_steps": 0.005079507827758789, "step": 832} +{"info/global_step": 833, "train_info/time_within_train_step": 27.72071385383606, "step": 833} +{"train_info/time_between_train_steps": 0.005425453186035156, "step": 833} +{"info/global_step": 834, "train_info/time_within_train_step": 27.735682487487793, "step": 834} +{"train_info/time_between_train_steps": 0.005083322525024414, "step": 834} +{"info/global_step": 835, "train_info/time_within_train_step": 27.851974487304688, "step": 835} +{"train_info/time_between_train_steps": 0.005152702331542969, "step": 835} +{"info/global_step": 836, "train_info/time_within_train_step": 27.711071491241455, "step": 836} +{"train_info/time_between_train_steps": 0.005069732666015625, "step": 836} +{"info/global_step": 837, "train_info/time_within_train_step": 27.72368097305298, "step": 837} +{"train_info/time_between_train_steps": 0.00531768798828125, "step": 837} +{"info/global_step": 838, "train_info/time_within_train_step": 27.73309826850891, "step": 838} +{"train_info/time_between_train_steps": 0.006025552749633789, "step": 838} +{"info/global_step": 839, "train_info/time_within_train_step": 27.722257614135742, "step": 839} +{"train_info/time_between_train_steps": 0.0051691532135009766, "step": 839} +{"info/global_step": 840, "train_info/time_within_train_step": 27.736445426940918, "step": 840} +{"train_info/time_between_train_steps": 0.005663156509399414, "step": 840} +{"info/global_step": 841, "train_info/time_within_train_step": 27.733734846115112, "step": 841} +{"train_info/time_between_train_steps": 0.0066814422607421875, "step": 841} +{"train_info/time_between_train_steps": 28.39486598968506, "step": 841} +{"info/global_step": 842, "train_info/time_within_train_step": 27.759705305099487, "step": 842} +{"train_info/time_between_train_steps": 0.005133390426635742, "step": 842} +{"info/global_step": 843, "train_info/time_within_train_step": 27.88000178337097, "step": 843} +{"train_info/time_between_train_steps": 0.00534510612487793, "step": 843} +{"info/global_step": 844, "train_info/time_within_train_step": 27.75232744216919, "step": 844} +{"train_info/time_between_train_steps": 0.0053822994232177734, "step": 844} +{"info/global_step": 845, "train_info/time_within_train_step": 27.86904287338257, "step": 845} +{"train_info/time_between_train_steps": 0.005369901657104492, "step": 845} +{"info/global_step": 846, "train_info/time_within_train_step": 27.726075410842896, "step": 846} +{"train_info/time_between_train_steps": 0.005235195159912109, "step": 846} +{"info/global_step": 847, "train_info/time_within_train_step": 27.89066171646118, "step": 847} +{"train_info/time_between_train_steps": 0.006093502044677734, "step": 847} +{"info/global_step": 848, "train_info/time_within_train_step": 27.744527339935303, "step": 848} +{"train_info/time_between_train_steps": 0.005282402038574219, "step": 848} +{"info/global_step": 849, "train_info/time_within_train_step": 27.862494945526123, "step": 849} +{"train_info/time_between_train_steps": 0.006361484527587891, "step": 849} +{"info/global_step": 850, "train_info/time_within_train_step": 27.849733114242554, "step": 850} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736128451, "_runtime": 25578}, "step": 850} +{"logs": {"train/loss": 3.9298, "train/learning_rate": 0.00019444444444444443, "train/epoch": 29.01, "_timestamp": 1736128451, "_runtime": 25578}, "step": 850} +{"train_info/time_between_train_steps": 0.026654481887817383, "step": 850} +{"info/global_step": 851, "train_info/time_within_train_step": 27.804144859313965, "step": 851} +{"train_info/time_between_train_steps": 0.0053157806396484375, "step": 851} +{"info/global_step": 852, "train_info/time_within_train_step": 27.72882580757141, "step": 852} +{"train_info/time_between_train_steps": 0.00490880012512207, "step": 852} +{"info/global_step": 853, "train_info/time_within_train_step": 27.697625160217285, "step": 853} +{"train_info/time_between_train_steps": 0.004979372024536133, "step": 853} +{"info/global_step": 854, "train_info/time_within_train_step": 27.703519821166992, "step": 854} +{"train_info/time_between_train_steps": 0.0050585269927978516, "step": 854} +{"info/global_step": 855, "train_info/time_within_train_step": 27.700361490249634, "step": 855} +{"train_info/time_between_train_steps": 0.005860090255737305, "step": 855} +{"info/global_step": 856, "train_info/time_within_train_step": 27.73575520515442, "step": 856} +{"train_info/time_between_train_steps": 0.005188465118408203, "step": 856} +{"info/global_step": 857, "train_info/time_within_train_step": 27.719719648361206, "step": 857} +{"train_info/time_between_train_steps": 0.005164146423339844, "step": 857} +{"info/global_step": 858, "train_info/time_within_train_step": 27.713871479034424, "step": 858} +{"train_info/time_between_train_steps": 0.005378246307373047, "step": 858} +{"info/global_step": 859, "train_info/time_within_train_step": 27.715083122253418, "step": 859} +{"train_info/time_between_train_steps": 0.005328178405761719, "step": 859} +{"info/global_step": 860, "train_info/time_within_train_step": 27.72371768951416, "step": 860} +{"train_info/time_between_train_steps": 0.005044221878051758, "step": 860} +{"info/global_step": 861, "train_info/time_within_train_step": 27.706989526748657, "step": 861} +{"train_info/time_between_train_steps": 0.005180835723876953, "step": 861} +{"info/global_step": 862, "train_info/time_within_train_step": 27.72822332382202, "step": 862} +{"train_info/time_between_train_steps": 0.005177974700927734, "step": 862} +{"info/global_step": 863, "train_info/time_within_train_step": 27.72795009613037, "step": 863} +{"train_info/time_between_train_steps": 0.005471229553222656, "step": 863} +{"info/global_step": 864, "train_info/time_within_train_step": 27.734023809432983, "step": 864} +{"train_info/time_between_train_steps": 0.0051190853118896484, "step": 864} +{"info/global_step": 865, "train_info/time_within_train_step": 27.840927839279175, "step": 865} +{"train_info/time_between_train_steps": 0.005231380462646484, "step": 865} +{"info/global_step": 866, "train_info/time_within_train_step": 27.711944341659546, "step": 866} +{"train_info/time_between_train_steps": 0.005175113677978516, "step": 866} +{"info/global_step": 867, "train_info/time_within_train_step": 27.736652851104736, "step": 867} +{"train_info/time_between_train_steps": 0.005215167999267578, "step": 867} +{"info/global_step": 868, "train_info/time_within_train_step": 27.728875160217285, "step": 868} +{"train_info/time_between_train_steps": 0.005221843719482422, "step": 868} +{"info/global_step": 869, "train_info/time_within_train_step": 27.744427919387817, "step": 869} +{"train_info/time_between_train_steps": 0.005271196365356445, "step": 869} +{"info/global_step": 870, "train_info/time_within_train_step": 27.753859996795654, "step": 870} +{"train_info/time_between_train_steps": 0.005913257598876953, "step": 870} +{"train_info/time_between_train_steps": 28.445135831832886, "step": 870} +{"info/global_step": 871, "train_info/time_within_train_step": 27.781171083450317, "step": 871} +{"train_info/time_between_train_steps": 0.005314826965332031, "step": 871} +{"info/global_step": 872, "train_info/time_within_train_step": 27.88734769821167, "step": 872} +{"train_info/time_between_train_steps": 0.005584239959716797, "step": 872} +{"info/global_step": 873, "train_info/time_within_train_step": 27.776673793792725, "step": 873} +{"train_info/time_between_train_steps": 0.005560159683227539, "step": 873} +{"info/global_step": 874, "train_info/time_within_train_step": 27.936222553253174, "step": 874} +{"train_info/time_between_train_steps": 0.0059528350830078125, "step": 874} +{"info/global_step": 875, "train_info/time_within_train_step": 27.77925682067871, "step": 875} +{"train_info/time_between_train_steps": 0.008386373519897461, "step": 875} +{"info/global_step": 876, "train_info/time_within_train_step": 28.037261962890625, "step": 876} +{"train_info/time_between_train_steps": 0.0056972503662109375, "step": 876} +{"info/global_step": 877, "train_info/time_within_train_step": 27.749170064926147, "step": 877} +{"train_info/time_between_train_steps": 0.0056955814361572266, "step": 877} +{"info/global_step": 878, "train_info/time_within_train_step": 27.891170263290405, "step": 878} +{"train_info/time_between_train_steps": 0.005648612976074219, "step": 878} +{"info/global_step": 879, "train_info/time_within_train_step": 27.757108211517334, "step": 879} +{"train_info/time_between_train_steps": 0.005782604217529297, "step": 879} +{"info/global_step": 880, "train_info/time_within_train_step": 27.93164610862732, "step": 880} +{"train_info/time_between_train_steps": 0.00560450553894043, "step": 880} +{"info/global_step": 881, "train_info/time_within_train_step": 27.750410318374634, "step": 881} +{"train_info/time_between_train_steps": 0.005335807800292969, "step": 881} +{"info/global_step": 882, "train_info/time_within_train_step": 27.7277569770813, "step": 882} +{"train_info/time_between_train_steps": 0.005287647247314453, "step": 882} +{"info/global_step": 883, "train_info/time_within_train_step": 27.72386336326599, "step": 883} +{"train_info/time_between_train_steps": 0.005224704742431641, "step": 883} +{"info/global_step": 884, "train_info/time_within_train_step": 27.72679877281189, "step": 884} +{"train_info/time_between_train_steps": 0.0054569244384765625, "step": 884} +{"info/global_step": 885, "train_info/time_within_train_step": 27.727331161499023, "step": 885} +{"train_info/time_between_train_steps": 0.005527973175048828, "step": 885} +{"info/global_step": 886, "train_info/time_within_train_step": 27.737894773483276, "step": 886} +{"train_info/time_between_train_steps": 0.005403757095336914, "step": 886} +{"info/global_step": 887, "train_info/time_within_train_step": 27.726115942001343, "step": 887} +{"train_info/time_between_train_steps": 0.005604743957519531, "step": 887} +{"info/global_step": 888, "train_info/time_within_train_step": 27.725905895233154, "step": 888} +{"train_info/time_between_train_steps": 0.00540471076965332, "step": 888} +{"info/global_step": 889, "train_info/time_within_train_step": 27.72593116760254, "step": 889} +{"train_info/time_between_train_steps": 0.005301237106323242, "step": 889} +{"info/global_step": 890, "train_info/time_within_train_step": 27.79539942741394, "step": 890} +{"train_info/time_between_train_steps": 0.0055162906646728516, "step": 890} +{"info/global_step": 891, "train_info/time_within_train_step": 27.72958016395569, "step": 891} +{"train_info/time_between_train_steps": 0.0055158138275146484, "step": 891} +{"info/global_step": 892, "train_info/time_within_train_step": 27.72242569923401, "step": 892} +{"train_info/time_between_train_steps": 0.00542759895324707, "step": 892} +{"info/global_step": 893, "train_info/time_within_train_step": 27.73476815223694, "step": 893} +{"train_info/time_between_train_steps": 0.0054569244384765625, "step": 893} +{"info/global_step": 894, "train_info/time_within_train_step": 27.76372742652893, "step": 894} +{"train_info/time_between_train_steps": 0.0055065155029296875, "step": 894} +{"info/global_step": 895, "train_info/time_within_train_step": 27.737096786499023, "step": 895} +{"train_info/time_between_train_steps": 0.005555629730224609, "step": 895} +{"info/global_step": 896, "train_info/time_within_train_step": 27.86184549331665, "step": 896} +{"train_info/time_between_train_steps": 0.005308866500854492, "step": 896} +{"info/global_step": 897, "train_info/time_within_train_step": 27.79720902442932, "step": 897} +{"train_info/time_between_train_steps": 0.005652427673339844, "step": 897} +{"info/global_step": 898, "train_info/time_within_train_step": 28.582746744155884, "step": 898} +{"train_info/time_between_train_steps": 0.005713939666748047, "step": 898} +{"info/global_step": 899, "train_info/time_within_train_step": 28.918983697891235, "step": 899} +{"train_info/time_between_train_steps": 0.006573915481567383, "step": 899} +{"train_info/time_between_train_steps": 31.84884786605835, "step": 899} +{"info/global_step": 900, "train_info/time_within_train_step": 29.05559754371643, "step": 900} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736129906, "_runtime": 27033}, "step": 900} +{"logs": {"train/loss": 3.8642, "train/learning_rate": 0.00016666666666666666, "train/epoch": 31.0, "_timestamp": 1736129906, "_runtime": 27033}, "step": 900} +{"train_info/time_between_train_steps": 91.18283152580261, "step": 900} +{"info/global_step": 901, "train_info/time_within_train_step": 27.912604093551636, "step": 901} +{"train_info/time_between_train_steps": 0.005368471145629883, "step": 901} +{"info/global_step": 902, "train_info/time_within_train_step": 27.738902807235718, "step": 902} +{"train_info/time_between_train_steps": 0.005713224411010742, "step": 902} +{"info/global_step": 903, "train_info/time_within_train_step": 27.918638944625854, "step": 903} +{"train_info/time_between_train_steps": 0.005266666412353516, "step": 903} +{"info/global_step": 904, "train_info/time_within_train_step": 27.771395206451416, "step": 904} +{"train_info/time_between_train_steps": 0.005429983139038086, "step": 904} +{"info/global_step": 905, "train_info/time_within_train_step": 27.89513111114502, "step": 905} +{"train_info/time_between_train_steps": 0.005262136459350586, "step": 905} +{"info/global_step": 906, "train_info/time_within_train_step": 27.751255989074707, "step": 906} +{"train_info/time_between_train_steps": 0.007662534713745117, "step": 906} +{"info/global_step": 907, "train_info/time_within_train_step": 28.02739953994751, "step": 907} +{"train_info/time_between_train_steps": 0.00565338134765625, "step": 907} +{"info/global_step": 908, "train_info/time_within_train_step": 27.749682664871216, "step": 908} +{"train_info/time_between_train_steps": 0.005609273910522461, "step": 908} +{"info/global_step": 909, "train_info/time_within_train_step": 27.814855098724365, "step": 909} +{"train_info/time_between_train_steps": 0.005419015884399414, "step": 909} +{"info/global_step": 910, "train_info/time_within_train_step": 28.205881595611572, "step": 910} +{"train_info/time_between_train_steps": 0.004987239837646484, "step": 910} +{"info/global_step": 911, "train_info/time_within_train_step": 27.734834909439087, "step": 911} +{"train_info/time_between_train_steps": 0.005053043365478516, "step": 911} +{"info/global_step": 912, "train_info/time_within_train_step": 27.866926908493042, "step": 912} +{"train_info/time_between_train_steps": 0.005063533782958984, "step": 912} +{"info/global_step": 913, "train_info/time_within_train_step": 27.78654956817627, "step": 913} +{"train_info/time_between_train_steps": 0.0058705806732177734, "step": 913} +{"info/global_step": 914, "train_info/time_within_train_step": 27.72735571861267, "step": 914} +{"train_info/time_between_train_steps": 0.005104780197143555, "step": 914} +{"info/global_step": 915, "train_info/time_within_train_step": 27.704120874404907, "step": 915} +{"train_info/time_between_train_steps": 0.005273103713989258, "step": 915} +{"info/global_step": 916, "train_info/time_within_train_step": 27.717499256134033, "step": 916} +{"train_info/time_between_train_steps": 0.005063056945800781, "step": 916} +{"info/global_step": 917, "train_info/time_within_train_step": 27.727712631225586, "step": 917} +{"train_info/time_between_train_steps": 0.005139589309692383, "step": 917} +{"info/global_step": 918, "train_info/time_within_train_step": 27.74481439590454, "step": 918} +{"train_info/time_between_train_steps": 0.005197763442993164, "step": 918} +{"info/global_step": 919, "train_info/time_within_train_step": 27.7007315158844, "step": 919} +{"train_info/time_between_train_steps": 0.0050928592681884766, "step": 919} +{"info/global_step": 920, "train_info/time_within_train_step": 27.734130859375, "step": 920} +{"train_info/time_between_train_steps": 0.0051195621490478516, "step": 920} +{"info/global_step": 921, "train_info/time_within_train_step": 27.71886396408081, "step": 921} +{"train_info/time_between_train_steps": 0.0050656795501708984, "step": 921} +{"info/global_step": 922, "train_info/time_within_train_step": 27.705145835876465, "step": 922} +{"train_info/time_between_train_steps": 0.0050427913665771484, "step": 922} +{"info/global_step": 923, "train_info/time_within_train_step": 27.741603136062622, "step": 923} +{"train_info/time_between_train_steps": 0.004986286163330078, "step": 923} +{"info/global_step": 924, "train_info/time_within_train_step": 27.702200174331665, "step": 924} +{"train_info/time_between_train_steps": 0.005079746246337891, "step": 924} +{"info/global_step": 925, "train_info/time_within_train_step": 27.74872875213623, "step": 925} +{"train_info/time_between_train_steps": 0.005052328109741211, "step": 925} +{"info/global_step": 926, "train_info/time_within_train_step": 27.819527864456177, "step": 926} +{"train_info/time_between_train_steps": 0.0051822662353515625, "step": 926} +{"info/global_step": 927, "train_info/time_within_train_step": 27.71822452545166, "step": 927} +{"train_info/time_between_train_steps": 0.005635738372802734, "step": 927} +{"info/global_step": 928, "train_info/time_within_train_step": 27.73624277114868, "step": 928} +{"train_info/time_between_train_steps": 0.00554966926574707, "step": 928} +{"train_info/time_between_train_steps": 28.160440683364868, "step": 928} +{"info/global_step": 929, "train_info/time_within_train_step": 27.715572118759155, "step": 929} +{"train_info/time_between_train_steps": 0.004984617233276367, "step": 929} +{"info/global_step": 930, "train_info/time_within_train_step": 27.878559350967407, "step": 930} +{"train_info/time_between_train_steps": 0.005093097686767578, "step": 930} +{"info/global_step": 931, "train_info/time_within_train_step": 27.711472749710083, "step": 931} +{"train_info/time_between_train_steps": 0.0051920413970947266, "step": 931} +{"info/global_step": 932, "train_info/time_within_train_step": 27.846097707748413, "step": 932} +{"train_info/time_between_train_steps": 0.0051043033599853516, "step": 932} +{"info/global_step": 933, "train_info/time_within_train_step": 27.731575965881348, "step": 933} +{"train_info/time_between_train_steps": 0.0052394866943359375, "step": 933} +{"info/global_step": 934, "train_info/time_within_train_step": 27.881275177001953, "step": 934} +{"train_info/time_between_train_steps": 0.005164623260498047, "step": 934} +{"info/global_step": 935, "train_info/time_within_train_step": 27.7553870677948, "step": 935} +{"train_info/time_between_train_steps": 0.00518798828125, "step": 935} +{"info/global_step": 936, "train_info/time_within_train_step": 27.845930099487305, "step": 936} +{"train_info/time_between_train_steps": 0.0051631927490234375, "step": 936} +{"info/global_step": 937, "train_info/time_within_train_step": 27.797234535217285, "step": 937} +{"train_info/time_between_train_steps": 0.00510859489440918, "step": 937} +{"info/global_step": 938, "train_info/time_within_train_step": 27.765122175216675, "step": 938} +{"train_info/time_between_train_steps": 0.004980802536010742, "step": 938} +{"info/global_step": 939, "train_info/time_within_train_step": 27.754519939422607, "step": 939} +{"train_info/time_between_train_steps": 0.004894256591796875, "step": 939} +{"info/global_step": 940, "train_info/time_within_train_step": 27.68687152862549, "step": 940} +{"train_info/time_between_train_steps": 0.00500035285949707, "step": 940} +{"info/global_step": 941, "train_info/time_within_train_step": 27.833609104156494, "step": 941} +{"train_info/time_between_train_steps": 0.004952430725097656, "step": 941} +{"info/global_step": 942, "train_info/time_within_train_step": 27.69730281829834, "step": 942} +{"train_info/time_between_train_steps": 0.004850625991821289, "step": 942} +{"info/global_step": 943, "train_info/time_within_train_step": 27.729206800460815, "step": 943} +{"train_info/time_between_train_steps": 0.004883527755737305, "step": 943} +{"info/global_step": 944, "train_info/time_within_train_step": 27.696123600006104, "step": 944} +{"train_info/time_between_train_steps": 0.004979610443115234, "step": 944} +{"info/global_step": 945, "train_info/time_within_train_step": 27.730642080307007, "step": 945} +{"train_info/time_between_train_steps": 0.005063295364379883, "step": 945} +{"info/global_step": 946, "train_info/time_within_train_step": 27.691502332687378, "step": 946} +{"train_info/time_between_train_steps": 0.004952430725097656, "step": 946} +{"info/global_step": 947, "train_info/time_within_train_step": 27.70210075378418, "step": 947} +{"train_info/time_between_train_steps": 0.004886150360107422, "step": 947} +{"info/global_step": 948, "train_info/time_within_train_step": 27.738041162490845, "step": 948} +{"train_info/time_between_train_steps": 0.0049211978912353516, "step": 948} +{"info/global_step": 949, "train_info/time_within_train_step": 27.695039749145508, "step": 949} +{"train_info/time_between_train_steps": 0.005011796951293945, "step": 949} +{"info/global_step": 950, "train_info/time_within_train_step": 27.746883392333984, "step": 950} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736131416, "_runtime": 28543}, "step": 950} +{"logs": {"train/loss": 3.747, "train/learning_rate": 0.0001388888888888889, "train/epoch": 32.02, "_timestamp": 1736131416, "_runtime": 28543}, "step": 950} +{"train_info/time_between_train_steps": 0.026076316833496094, "step": 950} +{"info/global_step": 951, "train_info/time_within_train_step": 27.695655345916748, "step": 951} +{"train_info/time_between_train_steps": 0.005036354064941406, "step": 951} +{"info/global_step": 952, "train_info/time_within_train_step": 27.733635902404785, "step": 952} +{"train_info/time_between_train_steps": 0.004882097244262695, "step": 952} +{"info/global_step": 953, "train_info/time_within_train_step": 27.700870037078857, "step": 953} +{"train_info/time_between_train_steps": 0.008993864059448242, "step": 953} +{"info/global_step": 954, "train_info/time_within_train_step": 27.74347472190857, "step": 954} +{"train_info/time_between_train_steps": 0.0051326751708984375, "step": 954} +{"info/global_step": 955, "train_info/time_within_train_step": 27.7313973903656, "step": 955} +{"train_info/time_between_train_steps": 0.006170511245727539, "step": 955} +{"info/global_step": 956, "train_info/time_within_train_step": 27.844289302825928, "step": 956} +{"train_info/time_between_train_steps": 0.0052950382232666016, "step": 956} +{"info/global_step": 957, "train_info/time_within_train_step": 27.73881220817566, "step": 957} +{"train_info/time_between_train_steps": 0.0053861141204833984, "step": 957} +{"train_info/time_between_train_steps": 27.819703340530396, "step": 957} +{"info/global_step": 958, "train_info/time_within_train_step": 27.690579652786255, "step": 958} +{"train_info/time_between_train_steps": 0.004899501800537109, "step": 958} +{"info/global_step": 959, "train_info/time_within_train_step": 27.850926876068115, "step": 959} +{"train_info/time_between_train_steps": 0.005253314971923828, "step": 959} +{"info/global_step": 960, "train_info/time_within_train_step": 27.699447631835938, "step": 960} +{"train_info/time_between_train_steps": 0.005251884460449219, "step": 960} +{"info/global_step": 961, "train_info/time_within_train_step": 27.833648681640625, "step": 961} +{"train_info/time_between_train_steps": 0.010066986083984375, "step": 961} +{"info/global_step": 962, "train_info/time_within_train_step": 27.750997304916382, "step": 962} +{"train_info/time_between_train_steps": 0.005173921585083008, "step": 962} +{"info/global_step": 963, "train_info/time_within_train_step": 27.83824372291565, "step": 963} +{"train_info/time_between_train_steps": 0.005076408386230469, "step": 963} +{"info/global_step": 964, "train_info/time_within_train_step": 27.71992254257202, "step": 964} +{"train_info/time_between_train_steps": 0.005277395248413086, "step": 964} +{"info/global_step": 965, "train_info/time_within_train_step": 27.86046600341797, "step": 965} +{"train_info/time_between_train_steps": 0.005193233489990234, "step": 965} +{"info/global_step": 966, "train_info/time_within_train_step": 27.71577548980713, "step": 966} +{"train_info/time_between_train_steps": 0.0054433345794677734, "step": 966} +{"info/global_step": 967, "train_info/time_within_train_step": 27.755383014678955, "step": 967} +{"train_info/time_between_train_steps": 0.005106925964355469, "step": 967} +{"info/global_step": 968, "train_info/time_within_train_step": 27.722456216812134, "step": 968} +{"train_info/time_between_train_steps": 0.004888296127319336, "step": 968} +{"info/global_step": 969, "train_info/time_within_train_step": 27.686246395111084, "step": 969} +{"train_info/time_between_train_steps": 0.00487208366394043, "step": 969} +{"info/global_step": 970, "train_info/time_within_train_step": 27.730847597122192, "step": 970} +{"train_info/time_between_train_steps": 0.0049419403076171875, "step": 970} +{"info/global_step": 971, "train_info/time_within_train_step": 27.805574655532837, "step": 971} +{"train_info/time_between_train_steps": 0.005013465881347656, "step": 971} +{"info/global_step": 972, "train_info/time_within_train_step": 27.697978496551514, "step": 972} +{"train_info/time_between_train_steps": 0.005009174346923828, "step": 972} +{"info/global_step": 973, "train_info/time_within_train_step": 27.68879795074463, "step": 973} +{"train_info/time_between_train_steps": 0.004975080490112305, "step": 973} +{"info/global_step": 974, "train_info/time_within_train_step": 27.703014612197876, "step": 974} +{"train_info/time_between_train_steps": 0.004961252212524414, "step": 974} +{"info/global_step": 975, "train_info/time_within_train_step": 27.712605237960815, "step": 975} +{"train_info/time_between_train_steps": 0.0050811767578125, "step": 975} +{"info/global_step": 976, "train_info/time_within_train_step": 27.70385241508484, "step": 976} +{"train_info/time_between_train_steps": 0.005468606948852539, "step": 976} +{"info/global_step": 977, "train_info/time_within_train_step": 27.766276836395264, "step": 977} +{"train_info/time_between_train_steps": 0.004975318908691406, "step": 977} +{"info/global_step": 978, "train_info/time_within_train_step": 27.691380977630615, "step": 978} +{"train_info/time_between_train_steps": 0.004960060119628906, "step": 978} +{"info/global_step": 979, "train_info/time_within_train_step": 27.75498867034912, "step": 979} +{"train_info/time_between_train_steps": 0.0049266815185546875, "step": 979} +{"info/global_step": 980, "train_info/time_within_train_step": 27.710286617279053, "step": 980} +{"train_info/time_between_train_steps": 0.005127906799316406, "step": 980} +{"info/global_step": 981, "train_info/time_within_train_step": 27.746265172958374, "step": 981} +{"train_info/time_between_train_steps": 0.005131959915161133, "step": 981} +{"info/global_step": 982, "train_info/time_within_train_step": 27.705806255340576, "step": 982} +{"train_info/time_between_train_steps": 0.0050814151763916016, "step": 982} +{"info/global_step": 983, "train_info/time_within_train_step": 27.693408966064453, "step": 983} +{"train_info/time_between_train_steps": 0.004964590072631836, "step": 983} +{"info/global_step": 984, "train_info/time_within_train_step": 27.71525812149048, "step": 984} +{"train_info/time_between_train_steps": 0.005017757415771484, "step": 984} +{"info/global_step": 985, "train_info/time_within_train_step": 27.70702075958252, "step": 985} +{"train_info/time_between_train_steps": 0.00533604621887207, "step": 985} +{"info/global_step": 986, "train_info/time_within_train_step": 27.729597806930542, "step": 986} +{"train_info/time_between_train_steps": 0.005892276763916016, "step": 986} +{"train_info/time_between_train_steps": 28.113627195358276, "step": 986} +{"info/global_step": 987, "train_info/time_within_train_step": 27.702398777008057, "step": 987} +{"train_info/time_between_train_steps": 0.004932880401611328, "step": 987} +{"info/global_step": 988, "train_info/time_within_train_step": 27.79010796546936, "step": 988} +{"train_info/time_between_train_steps": 0.0048427581787109375, "step": 988} +{"info/global_step": 989, "train_info/time_within_train_step": 27.67968249320984, "step": 989} +{"train_info/time_between_train_steps": 0.0047914981842041016, "step": 989} +{"info/global_step": 990, "train_info/time_within_train_step": 27.81687831878662, "step": 990} +{"train_info/time_between_train_steps": 0.004993438720703125, "step": 990} +{"info/global_step": 991, "train_info/time_within_train_step": 27.75133991241455, "step": 991} +{"train_info/time_between_train_steps": 0.005222797393798828, "step": 991} +{"info/global_step": 992, "train_info/time_within_train_step": 27.821345567703247, "step": 992} +{"train_info/time_between_train_steps": 0.00522160530090332, "step": 992} +{"info/global_step": 993, "train_info/time_within_train_step": 27.703830003738403, "step": 993} +{"train_info/time_between_train_steps": 0.005070209503173828, "step": 993} +{"info/global_step": 994, "train_info/time_within_train_step": 27.804951429367065, "step": 994} +{"train_info/time_between_train_steps": 0.005097150802612305, "step": 994} +{"info/global_step": 995, "train_info/time_within_train_step": 27.737962007522583, "step": 995} +{"train_info/time_between_train_steps": 0.00513148307800293, "step": 995} +{"info/global_step": 996, "train_info/time_within_train_step": 27.782716274261475, "step": 996} +{"train_info/time_between_train_steps": 0.005063295364379883, "step": 996} +{"info/global_step": 997, "train_info/time_within_train_step": 27.71102237701416, "step": 997} +{"train_info/time_between_train_steps": 0.0049555301666259766, "step": 997} +{"info/global_step": 998, "train_info/time_within_train_step": 27.749486207962036, "step": 998} +{"train_info/time_between_train_steps": 0.00499725341796875, "step": 998} +{"info/global_step": 999, "train_info/time_within_train_step": 27.94791340827942, "step": 999} +{"train_info/time_between_train_steps": 0.0052340030670166016, "step": 999} +{"info/global_step": 1000, "train_info/time_within_train_step": 30.315719842910767, "step": 1000} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1736132864, "_runtime": 29991}, "step": 1000} +{"logs": {"train/loss": 3.7634, "train/learning_rate": 0.00011111111111111109, "train/epoch": 34.01, "_timestamp": 1736132864, "_runtime": 29991}, "step": 1000} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1736132870, "_runtime": 29997}, "step": 1000} +{"logs": {"eval/loss": 4.5402936935424805, "eval/runtime": 5.0343, "eval/samples_per_second": 20.857, "eval/steps_per_second": 1.39, "train/epoch": 34.01, "_timestamp": 1736132870, "_runtime": 29997}, "step": 1000} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1736132871, "_runtime": 29998}, "step": 1000} +{"logs": {"eval//scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_loss": 4.5402936935424805, "eval//scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_ppl": 93.7183205435465, "eval//scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_runtime": 5.0343, "eval//scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 20.857, "train/epoch": 34.01, "_timestamp": 1736132871, "_runtime": 29998}, "step": 1000} +{"train_info/time_between_train_steps": 59.73071074485779, "step": 1000} +{"info/global_step": 1001, "train_info/time_within_train_step": 27.780195474624634, "step": 1001} +{"train_info/time_between_train_steps": 0.004726886749267578, "step": 1001} +{"info/global_step": 1002, "train_info/time_within_train_step": 27.691222190856934, "step": 1002} +{"train_info/time_between_train_steps": 0.004767656326293945, "step": 1002} +{"info/global_step": 1003, "train_info/time_within_train_step": 27.68722176551819, "step": 1003} +{"train_info/time_between_train_steps": 0.004895448684692383, "step": 1003} +{"info/global_step": 1004, "train_info/time_within_train_step": 27.744560718536377, "step": 1004} +{"train_info/time_between_train_steps": 0.005211353302001953, "step": 1004} +{"info/global_step": 1005, "train_info/time_within_train_step": 27.709877967834473, "step": 1005} +{"train_info/time_between_train_steps": 0.0048580169677734375, "step": 1005} +{"info/global_step": 1006, "train_info/time_within_train_step": 27.791057348251343, "step": 1006} +{"train_info/time_between_train_steps": 0.0049419403076171875, "step": 1006} +{"info/global_step": 1007, "train_info/time_within_train_step": 27.6834454536438, "step": 1007} +{"train_info/time_between_train_steps": 0.004915714263916016, "step": 1007} +{"info/global_step": 1008, "train_info/time_within_train_step": 27.722854137420654, "step": 1008} +{"train_info/time_between_train_steps": 0.00484466552734375, "step": 1008} +{"info/global_step": 1009, "train_info/time_within_train_step": 27.697633504867554, "step": 1009} +{"train_info/time_between_train_steps": 0.0050199031829833984, "step": 1009} +{"info/global_step": 1010, "train_info/time_within_train_step": 27.700918436050415, "step": 1010} +{"train_info/time_between_train_steps": 0.004967689514160156, "step": 1010} +{"info/global_step": 1011, "train_info/time_within_train_step": 27.704754114151, "step": 1011} +{"train_info/time_between_train_steps": 0.005090951919555664, "step": 1011} +{"info/global_step": 1012, "train_info/time_within_train_step": 27.688910961151123, "step": 1012} +{"train_info/time_between_train_steps": 0.005100727081298828, "step": 1012} +{"info/global_step": 1013, "train_info/time_within_train_step": 27.8318452835083, "step": 1013} +{"train_info/time_between_train_steps": 0.006165981292724609, "step": 1013} +{"info/global_step": 1014, "train_info/time_within_train_step": 27.76443576812744, "step": 1014} +{"train_info/time_between_train_steps": 0.01235651969909668, "step": 1014} +{"info/global_step": 1015, "train_info/time_within_train_step": 27.844534397125244, "step": 1015} +{"train_info/time_between_train_steps": 0.006066799163818359, "step": 1015} +{"train_info/time_between_train_steps": 28.436143398284912, "step": 1015} +{"info/global_step": 1016, "train_info/time_within_train_step": 27.872860431671143, "step": 1016} +{"train_info/time_between_train_steps": 0.005105018615722656, "step": 1016} +{"info/global_step": 1017, "train_info/time_within_train_step": 27.841798305511475, "step": 1017} +{"train_info/time_between_train_steps": 0.005105495452880859, "step": 1017} +{"info/global_step": 1018, "train_info/time_within_train_step": 27.760215520858765, "step": 1018} +{"train_info/time_between_train_steps": 0.00522923469543457, "step": 1018} +{"info/global_step": 1019, "train_info/time_within_train_step": 27.917391538619995, "step": 1019} +{"train_info/time_between_train_steps": 0.00534820556640625, "step": 1019} +{"info/global_step": 1020, "train_info/time_within_train_step": 27.75432300567627, "step": 1020} +{"train_info/time_between_train_steps": 0.010345458984375, "step": 1020} +{"info/global_step": 1021, "train_info/time_within_train_step": 27.887906789779663, "step": 1021} +{"train_info/time_between_train_steps": 0.01000213623046875, "step": 1021} +{"info/global_step": 1022, "train_info/time_within_train_step": 27.995538473129272, "step": 1022} +{"train_info/time_between_train_steps": 0.0053310394287109375, "step": 1022} +{"info/global_step": 1023, "train_info/time_within_train_step": 27.917550086975098, "step": 1023} +{"train_info/time_between_train_steps": 0.005216360092163086, "step": 1023} +{"info/global_step": 1024, "train_info/time_within_train_step": 27.740589141845703, "step": 1024} +{"train_info/time_between_train_steps": 0.006178379058837891, "step": 1024} +{"info/global_step": 1025, "train_info/time_within_train_step": 27.830143690109253, "step": 1025} +{"train_info/time_between_train_steps": 0.005525827407836914, "step": 1025} +{"info/global_step": 1026, "train_info/time_within_train_step": 27.72873616218567, "step": 1026} +{"train_info/time_between_train_steps": 0.005084037780761719, "step": 1026} +{"info/global_step": 1027, "train_info/time_within_train_step": 27.729207038879395, "step": 1027} +{"train_info/time_between_train_steps": 0.004877805709838867, "step": 1027} +{"info/global_step": 1028, "train_info/time_within_train_step": 27.689318895339966, "step": 1028} +{"train_info/time_between_train_steps": 0.004826545715332031, "step": 1028} +{"info/global_step": 1029, "train_info/time_within_train_step": 27.745296716690063, "step": 1029} +{"train_info/time_between_train_steps": 0.005071163177490234, "step": 1029} +{"info/global_step": 1030, "train_info/time_within_train_step": 27.734262466430664, "step": 1030} +{"train_info/time_between_train_steps": 0.010543107986450195, "step": 1030} +{"info/global_step": 1031, "train_info/time_within_train_step": 27.763148546218872, "step": 1031} +{"train_info/time_between_train_steps": 0.005869150161743164, "step": 1031} +{"info/global_step": 1032, "train_info/time_within_train_step": 27.83618998527527, "step": 1032} +{"train_info/time_between_train_steps": 0.006716728210449219, "step": 1032} +{"info/global_step": 1033, "train_info/time_within_train_step": 27.789161443710327, "step": 1033} +{"train_info/time_between_train_steps": 0.0059282779693603516, "step": 1033} +{"info/global_step": 1034, "train_info/time_within_train_step": 27.763323068618774, "step": 1034} +{"train_info/time_between_train_steps": 0.005655765533447266, "step": 1034} +{"info/global_step": 1035, "train_info/time_within_train_step": 27.744834184646606, "step": 1035} +{"train_info/time_between_train_steps": 0.005744457244873047, "step": 1035} +{"info/global_step": 1036, "train_info/time_within_train_step": 27.758517503738403, "step": 1036} +{"train_info/time_between_train_steps": 0.005843400955200195, "step": 1036} +{"info/global_step": 1037, "train_info/time_within_train_step": 27.75192904472351, "step": 1037} +{"train_info/time_between_train_steps": 0.005782127380371094, "step": 1037} +{"info/global_step": 1038, "train_info/time_within_train_step": 27.802617073059082, "step": 1038} +{"train_info/time_between_train_steps": 0.0057146549224853516, "step": 1038} +{"info/global_step": 1039, "train_info/time_within_train_step": 27.761069536209106, "step": 1039} +{"train_info/time_between_train_steps": 0.005598306655883789, "step": 1039} +{"info/global_step": 1040, "train_info/time_within_train_step": 27.738460779190063, "step": 1040} +{"train_info/time_between_train_steps": 0.005672931671142578, "step": 1040} +{"info/global_step": 1041, "train_info/time_within_train_step": 27.730446577072144, "step": 1041} +{"train_info/time_between_train_steps": 0.005730152130126953, "step": 1041} +{"info/global_step": 1042, "train_info/time_within_train_step": 27.808751583099365, "step": 1042} +{"train_info/time_between_train_steps": 0.0058405399322509766, "step": 1042} +{"info/global_step": 1043, "train_info/time_within_train_step": 27.766647577285767, "step": 1043} +{"train_info/time_between_train_steps": 0.005988121032714844, "step": 1043} +{"info/global_step": 1044, "train_info/time_within_train_step": 27.83728051185608, "step": 1044} +{"train_info/time_between_train_steps": 0.006375789642333984, "step": 1044} +{"train_info/time_between_train_steps": 28.065595626831055, "step": 1044} +{"info/global_step": 1045, "train_info/time_within_train_step": 27.757710933685303, "step": 1045} +{"train_info/time_between_train_steps": 0.0054776668548583984, "step": 1045} +{"info/global_step": 1046, "train_info/time_within_train_step": 27.834089517593384, "step": 1046} +{"train_info/time_between_train_steps": 0.005075216293334961, "step": 1046} +{"info/global_step": 1047, "train_info/time_within_train_step": 27.849093437194824, "step": 1047} +{"train_info/time_between_train_steps": 0.005276918411254883, "step": 1047} +{"info/global_step": 1048, "train_info/time_within_train_step": 27.882730722427368, "step": 1048} +{"train_info/time_between_train_steps": 0.005311250686645508, "step": 1048} +{"info/global_step": 1049, "train_info/time_within_train_step": 27.716739654541016, "step": 1049} +{"train_info/time_between_train_steps": 0.005264759063720703, "step": 1049} +{"info/global_step": 1050, "train_info/time_within_train_step": 27.91495418548584, "step": 1050} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1736134371, "_runtime": 31498}, "step": 1050} +{"logs": {"train/loss": 3.7216, "train/learning_rate": 8.333333333333333e-05, "train/epoch": 36.01, "_timestamp": 1736134371, "_runtime": 31498}, "step": 1050} +{"train_info/time_between_train_steps": 0.028436660766601562, "step": 1050} +{"info/global_step": 1051, "train_info/time_within_train_step": 27.734606504440308, "step": 1051} +{"train_info/time_between_train_steps": 0.005141735076904297, "step": 1051} +{"info/global_step": 1052, "train_info/time_within_train_step": 27.913605213165283, "step": 1052} +{"train_info/time_between_train_steps": 0.005196571350097656, "step": 1052} +{"info/global_step": 1053, "train_info/time_within_train_step": 27.705066919326782, "step": 1053} +{"train_info/time_between_train_steps": 0.005292415618896484, "step": 1053} +{"info/global_step": 1054, "train_info/time_within_train_step": 27.775771141052246, "step": 1054} +{"train_info/time_between_train_steps": 0.0051975250244140625, "step": 1054} +{"info/global_step": 1055, "train_info/time_within_train_step": 27.705209255218506, "step": 1055} +{"train_info/time_between_train_steps": 0.005952596664428711, "step": 1055} +{"info/global_step": 1056, "train_info/time_within_train_step": 27.705628633499146, "step": 1056} +{"train_info/time_between_train_steps": 0.004945993423461914, "step": 1056} +{"info/global_step": 1057, "train_info/time_within_train_step": 27.707844972610474, "step": 1057} +{"train_info/time_between_train_steps": 0.004890918731689453, "step": 1057} +{"info/global_step": 1058, "train_info/time_within_train_step": 27.732770681381226, "step": 1058} +{"train_info/time_between_train_steps": 0.00944066047668457, "step": 1058} +{"info/global_step": 1059, "train_info/time_within_train_step": 27.7384033203125, "step": 1059} +{"train_info/time_between_train_steps": 0.004976749420166016, "step": 1059} +{"info/global_step": 1060, "train_info/time_within_train_step": 27.73669409751892, "step": 1060} +{"train_info/time_between_train_steps": 0.0048716068267822266, "step": 1060} +{"info/global_step": 1061, "train_info/time_within_train_step": 27.717657804489136, "step": 1061} +{"train_info/time_between_train_steps": 0.005049705505371094, "step": 1061} +{"info/global_step": 1062, "train_info/time_within_train_step": 27.813205242156982, "step": 1062} +{"train_info/time_between_train_steps": 0.005163431167602539, "step": 1062} +{"info/global_step": 1063, "train_info/time_within_train_step": 27.76213002204895, "step": 1063} +{"train_info/time_between_train_steps": 0.004973649978637695, "step": 1063} +{"info/global_step": 1064, "train_info/time_within_train_step": 27.716294527053833, "step": 1064} +{"train_info/time_between_train_steps": 0.004985809326171875, "step": 1064} +{"info/global_step": 1065, "train_info/time_within_train_step": 27.699418783187866, "step": 1065} +{"train_info/time_between_train_steps": 0.0049896240234375, "step": 1065} +{"info/global_step": 1066, "train_info/time_within_train_step": 27.683911085128784, "step": 1066} +{"train_info/time_between_train_steps": 0.004871845245361328, "step": 1066} +{"info/global_step": 1067, "train_info/time_within_train_step": 27.685236930847168, "step": 1067} +{"train_info/time_between_train_steps": 0.004976987838745117, "step": 1067} +{"info/global_step": 1068, "train_info/time_within_train_step": 27.692949056625366, "step": 1068} +{"train_info/time_between_train_steps": 0.005116701126098633, "step": 1068} +{"info/global_step": 1069, "train_info/time_within_train_step": 27.70618224143982, "step": 1069} +{"train_info/time_between_train_steps": 0.00511932373046875, "step": 1069} +{"info/global_step": 1070, "train_info/time_within_train_step": 27.711894512176514, "step": 1070} +{"train_info/time_between_train_steps": 0.0050201416015625, "step": 1070} +{"info/global_step": 1071, "train_info/time_within_train_step": 27.733126163482666, "step": 1071} +{"train_info/time_between_train_steps": 0.005168914794921875, "step": 1071} +{"info/global_step": 1072, "train_info/time_within_train_step": 27.711353540420532, "step": 1072} +{"train_info/time_between_train_steps": 0.005433320999145508, "step": 1072} +{"info/global_step": 1073, "train_info/time_within_train_step": 27.729803562164307, "step": 1073} +{"train_info/time_between_train_steps": 0.005723237991333008, "step": 1073} +{"train_info/time_between_train_steps": 28.044294118881226, "step": 1073} +{"info/global_step": 1074, "train_info/time_within_train_step": 27.69201946258545, "step": 1074} +{"train_info/time_between_train_steps": 0.004835844039916992, "step": 1074} +{"info/global_step": 1075, "train_info/time_within_train_step": 27.859382390975952, "step": 1075} +{"train_info/time_between_train_steps": 0.005007505416870117, "step": 1075} +{"info/global_step": 1076, "train_info/time_within_train_step": 27.715135097503662, "step": 1076} +{"train_info/time_between_train_steps": 0.005640506744384766, "step": 1076} +{"info/global_step": 1077, "train_info/time_within_train_step": 27.92181086540222, "step": 1077} +{"train_info/time_between_train_steps": 0.0049626827239990234, "step": 1077} +{"info/global_step": 1078, "train_info/time_within_train_step": 27.695631504058838, "step": 1078} +{"train_info/time_between_train_steps": 0.005309104919433594, "step": 1078} +{"info/global_step": 1079, "train_info/time_within_train_step": 27.881030797958374, "step": 1079} +{"train_info/time_between_train_steps": 0.005168437957763672, "step": 1079} +{"info/global_step": 1080, "train_info/time_within_train_step": 27.694580554962158, "step": 1080} +{"train_info/time_between_train_steps": 0.005183219909667969, "step": 1080} +{"info/global_step": 1081, "train_info/time_within_train_step": 27.860124588012695, "step": 1081} +{"train_info/time_between_train_steps": 0.005136966705322266, "step": 1081} +{"info/global_step": 1082, "train_info/time_within_train_step": 27.709147453308105, "step": 1082} +{"train_info/time_between_train_steps": 0.005007743835449219, "step": 1082} +{"info/global_step": 1083, "train_info/time_within_train_step": 27.785160541534424, "step": 1083} +{"train_info/time_between_train_steps": 0.004956245422363281, "step": 1083} +{"info/global_step": 1084, "train_info/time_within_train_step": 27.713815927505493, "step": 1084} +{"train_info/time_between_train_steps": 0.00483393669128418, "step": 1084} +{"info/global_step": 1085, "train_info/time_within_train_step": 27.72443699836731, "step": 1085} +{"train_info/time_between_train_steps": 0.0049669742584228516, "step": 1085} +{"info/global_step": 1086, "train_info/time_within_train_step": 27.70279812812805, "step": 1086} +{"train_info/time_between_train_steps": 0.006549835205078125, "step": 1086} +{"info/global_step": 1087, "train_info/time_within_train_step": 27.792162895202637, "step": 1087} +{"train_info/time_between_train_steps": 0.004891633987426758, "step": 1087} +{"info/global_step": 1088, "train_info/time_within_train_step": 27.67627239227295, "step": 1088} +{"train_info/time_between_train_steps": 0.004851579666137695, "step": 1088} +{"info/global_step": 1089, "train_info/time_within_train_step": 27.67721176147461, "step": 1089} +{"train_info/time_between_train_steps": 0.004863739013671875, "step": 1089} +{"info/global_step": 1090, "train_info/time_within_train_step": 27.6808021068573, "step": 1090} +{"train_info/time_between_train_steps": 0.004873037338256836, "step": 1090} +{"info/global_step": 1091, "train_info/time_within_train_step": 27.678874015808105, "step": 1091} +{"train_info/time_between_train_steps": 0.004862308502197266, "step": 1091} +{"info/global_step": 1092, "train_info/time_within_train_step": 27.736708641052246, "step": 1092} +{"train_info/time_between_train_steps": 0.0049779415130615234, "step": 1092} +{"info/global_step": 1093, "train_info/time_within_train_step": 27.775964736938477, "step": 1093} +{"train_info/time_between_train_steps": 0.005123615264892578, "step": 1093} +{"info/global_step": 1094, "train_info/time_within_train_step": 27.735278129577637, "step": 1094} +{"train_info/time_between_train_steps": 0.0049686431884765625, "step": 1094} +{"info/global_step": 1095, "train_info/time_within_train_step": 27.707167148590088, "step": 1095} +{"train_info/time_between_train_steps": 0.009288549423217773, "step": 1095} +{"info/global_step": 1096, "train_info/time_within_train_step": 27.741889715194702, "step": 1096} +{"train_info/time_between_train_steps": 0.006043434143066406, "step": 1096} +{"info/global_step": 1097, "train_info/time_within_train_step": 27.817983388900757, "step": 1097} +{"train_info/time_between_train_steps": 0.0060312747955322266, "step": 1097} +{"info/global_step": 1098, "train_info/time_within_train_step": 27.727185010910034, "step": 1098} +{"train_info/time_between_train_steps": 0.005307912826538086, "step": 1098} +{"info/global_step": 1099, "train_info/time_within_train_step": 27.995826482772827, "step": 1099} +{"train_info/time_between_train_steps": 0.005550384521484375, "step": 1099} +{"info/global_step": 1100, "train_info/time_within_train_step": 28.28774619102478, "step": 1100} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1736135789, "_runtime": 32916}, "step": 1100} +{"logs": {"train/loss": 3.6231, "train/learning_rate": 5.5555555555555545e-05, "train/epoch": 37.02, "_timestamp": 1736135789, "_runtime": 32916}, "step": 1100} +{"train_info/time_between_train_steps": 66.00846004486084, "step": 1100} +{"info/global_step": 1101, "train_info/time_within_train_step": 27.85650634765625, "step": 1101} +{"train_info/time_between_train_steps": 0.006811380386352539, "step": 1101} +{"info/global_step": 1102, "train_info/time_within_train_step": 27.760424613952637, "step": 1102} +{"train_info/time_between_train_steps": 0.005830287933349609, "step": 1102} +{"train_info/time_between_train_steps": 28.327836275100708, "step": 1102} +{"info/global_step": 1103, "train_info/time_within_train_step": 27.726232051849365, "step": 1103} +{"train_info/time_between_train_steps": 0.005079507827758789, "step": 1103} +{"info/global_step": 1104, "train_info/time_within_train_step": 27.84178638458252, "step": 1104} +{"train_info/time_between_train_steps": 0.005246400833129883, "step": 1104} +{"info/global_step": 1105, "train_info/time_within_train_step": 27.70911931991577, "step": 1105} +{"train_info/time_between_train_steps": 0.005180835723876953, "step": 1105} +{"info/global_step": 1106, "train_info/time_within_train_step": 27.809229612350464, "step": 1106} +{"train_info/time_between_train_steps": 0.005017518997192383, "step": 1106} +{"info/global_step": 1107, "train_info/time_within_train_step": 27.807958841323853, "step": 1107} +{"train_info/time_between_train_steps": 0.004999637603759766, "step": 1107} +{"info/global_step": 1108, "train_info/time_within_train_step": 27.846058130264282, "step": 1108} +{"train_info/time_between_train_steps": 0.005223989486694336, "step": 1108} +{"info/global_step": 1109, "train_info/time_within_train_step": 27.71522045135498, "step": 1109} +{"train_info/time_between_train_steps": 0.0053174495697021484, "step": 1109} +{"info/global_step": 1110, "train_info/time_within_train_step": 27.82814383506775, "step": 1110} +{"train_info/time_between_train_steps": 0.0052111148834228516, "step": 1110} +{"info/global_step": 1111, "train_info/time_within_train_step": 27.747639417648315, "step": 1111} +{"train_info/time_between_train_steps": 0.005118370056152344, "step": 1111} +{"info/global_step": 1112, "train_info/time_within_train_step": 27.744760751724243, "step": 1112} +{"train_info/time_between_train_steps": 0.00501561164855957, "step": 1112} +{"info/global_step": 1113, "train_info/time_within_train_step": 27.713324785232544, "step": 1113} +{"train_info/time_between_train_steps": 0.00509953498840332, "step": 1113} +{"info/global_step": 1114, "train_info/time_within_train_step": 27.70578408241272, "step": 1114} +{"train_info/time_between_train_steps": 0.005036592483520508, "step": 1114} +{"info/global_step": 1115, "train_info/time_within_train_step": 27.710663318634033, "step": 1115} +{"train_info/time_between_train_steps": 0.005154848098754883, "step": 1115} +{"info/global_step": 1116, "train_info/time_within_train_step": 27.692561864852905, "step": 1116} +{"train_info/time_between_train_steps": 0.004954338073730469, "step": 1116} +{"info/global_step": 1117, "train_info/time_within_train_step": 27.742871522903442, "step": 1117} +{"train_info/time_between_train_steps": 0.004927635192871094, "step": 1117} +{"info/global_step": 1118, "train_info/time_within_train_step": 27.70541787147522, "step": 1118} +{"train_info/time_between_train_steps": 0.00493168830871582, "step": 1118} +{"info/global_step": 1119, "train_info/time_within_train_step": 27.738579034805298, "step": 1119} +{"train_info/time_between_train_steps": 0.0049550533294677734, "step": 1119} +{"info/global_step": 1120, "train_info/time_within_train_step": 27.770931482315063, "step": 1120} +{"train_info/time_between_train_steps": 0.005048513412475586, "step": 1120} +{"info/global_step": 1121, "train_info/time_within_train_step": 27.744784593582153, "step": 1121} +{"train_info/time_between_train_steps": 0.005089759826660156, "step": 1121} +{"info/global_step": 1122, "train_info/time_within_train_step": 27.72831416130066, "step": 1122} +{"train_info/time_between_train_steps": 0.005038022994995117, "step": 1122} +{"info/global_step": 1123, "train_info/time_within_train_step": 27.827921628952026, "step": 1123} +{"train_info/time_between_train_steps": 0.005095481872558594, "step": 1123} +{"info/global_step": 1124, "train_info/time_within_train_step": 27.94075632095337, "step": 1124} +{"train_info/time_between_train_steps": 0.004886150360107422, "step": 1124} +{"info/global_step": 1125, "train_info/time_within_train_step": 27.72968816757202, "step": 1125} +{"train_info/time_between_train_steps": 0.004996776580810547, "step": 1125} +{"info/global_step": 1126, "train_info/time_within_train_step": 27.75183892250061, "step": 1126} +{"train_info/time_between_train_steps": 0.0075337886810302734, "step": 1126} +{"info/global_step": 1127, "train_info/time_within_train_step": 27.741681814193726, "step": 1127} +{"train_info/time_between_train_steps": 0.006253242492675781, "step": 1127} +{"info/global_step": 1128, "train_info/time_within_train_step": 28.20171093940735, "step": 1128} +{"train_info/time_between_train_steps": 0.00489497184753418, "step": 1128} +{"info/global_step": 1129, "train_info/time_within_train_step": 27.706650018692017, "step": 1129} +{"train_info/time_between_train_steps": 0.004988908767700195, "step": 1129} +{"info/global_step": 1130, "train_info/time_within_train_step": 27.747203826904297, "step": 1130} +{"train_info/time_between_train_steps": 0.006119251251220703, "step": 1130} +{"info/global_step": 1131, "train_info/time_within_train_step": 27.73687219619751, "step": 1131} +{"train_info/time_between_train_steps": 0.00568699836730957, "step": 1131} +{"train_info/time_between_train_steps": 27.809468746185303, "step": 1131} +{"info/global_step": 1132, "train_info/time_within_train_step": 27.718336582183838, "step": 1132} +{"train_info/time_between_train_steps": 0.004957675933837891, "step": 1132} +{"info/global_step": 1133, "train_info/time_within_train_step": 27.814160346984863, "step": 1133} +{"train_info/time_between_train_steps": 0.009678840637207031, "step": 1133} +{"info/global_step": 1134, "train_info/time_within_train_step": 27.98599362373352, "step": 1134} +{"train_info/time_between_train_steps": 0.005844593048095703, "step": 1134} +{"info/global_step": 1135, "train_info/time_within_train_step": 27.871105194091797, "step": 1135} +{"train_info/time_between_train_steps": 0.005482912063598633, "step": 1135} +{"info/global_step": 1136, "train_info/time_within_train_step": 28.070905685424805, "step": 1136} +{"train_info/time_between_train_steps": 0.015036821365356445, "step": 1136} +{"info/global_step": 1137, "train_info/time_within_train_step": 28.508182764053345, "step": 1137} +{"train_info/time_between_train_steps": 0.005167961120605469, "step": 1137} +{"info/global_step": 1138, "train_info/time_within_train_step": 27.809555530548096, "step": 1138} +{"train_info/time_between_train_steps": 0.005196332931518555, "step": 1138} +{"info/global_step": 1139, "train_info/time_within_train_step": 27.859195470809937, "step": 1139} +{"train_info/time_between_train_steps": 0.005357027053833008, "step": 1139} +{"info/global_step": 1140, "train_info/time_within_train_step": 27.734017610549927, "step": 1140} +{"train_info/time_between_train_steps": 0.0054166316986083984, "step": 1140} +{"info/global_step": 1141, "train_info/time_within_train_step": 27.794134616851807, "step": 1141} +{"train_info/time_between_train_steps": 0.005311727523803711, "step": 1141} +{"info/global_step": 1142, "train_info/time_within_train_step": 27.726329803466797, "step": 1142} +{"train_info/time_between_train_steps": 0.004904270172119141, "step": 1142} +{"info/global_step": 1143, "train_info/time_within_train_step": 27.734699964523315, "step": 1143} +{"train_info/time_between_train_steps": 0.00533747673034668, "step": 1143} +{"info/global_step": 1144, "train_info/time_within_train_step": 27.77899146080017, "step": 1144} +{"train_info/time_between_train_steps": 0.0048978328704833984, "step": 1144} +{"info/global_step": 1145, "train_info/time_within_train_step": 27.73080277442932, "step": 1145} +{"train_info/time_between_train_steps": 0.005240678787231445, "step": 1145} +{"info/global_step": 1146, "train_info/time_within_train_step": 27.72934603691101, "step": 1146} +{"train_info/time_between_train_steps": 0.005102872848510742, "step": 1146} +{"info/global_step": 1147, "train_info/time_within_train_step": 27.723572492599487, "step": 1147} +{"train_info/time_between_train_steps": 0.004952669143676758, "step": 1147} +{"info/global_step": 1148, "train_info/time_within_train_step": 27.719685316085815, "step": 1148} +{"train_info/time_between_train_steps": 0.00498509407043457, "step": 1148} +{"info/global_step": 1149, "train_info/time_within_train_step": 27.70410919189453, "step": 1149} +{"train_info/time_between_train_steps": 0.00496673583984375, "step": 1149} +{"info/global_step": 1150, "train_info/time_within_train_step": 27.70747685432434, "step": 1150} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1736137303, "_runtime": 34430}, "step": 1150} +{"logs": {"train/loss": 3.6623, "train/learning_rate": 2.7777777777777772e-05, "train/epoch": 39.02, "_timestamp": 1736137303, "_runtime": 34430}, "step": 1150} +{"train_info/time_between_train_steps": 0.025876760482788086, "step": 1150} +{"info/global_step": 1151, "train_info/time_within_train_step": 27.720163583755493, "step": 1151} +{"train_info/time_between_train_steps": 0.005101919174194336, "step": 1151} +{"info/global_step": 1152, "train_info/time_within_train_step": 27.75978946685791, "step": 1152} +{"train_info/time_between_train_steps": 0.0051152706146240234, "step": 1152} +{"info/global_step": 1153, "train_info/time_within_train_step": 27.8439519405365, "step": 1153} +{"train_info/time_between_train_steps": 0.005011320114135742, "step": 1153} +{"info/global_step": 1154, "train_info/time_within_train_step": 27.709709405899048, "step": 1154} +{"train_info/time_between_train_steps": 0.004910945892333984, "step": 1154} +{"info/global_step": 1155, "train_info/time_within_train_step": 27.706014156341553, "step": 1155} +{"train_info/time_between_train_steps": 0.005057334899902344, "step": 1155} +{"info/global_step": 1156, "train_info/time_within_train_step": 27.767952919006348, "step": 1156} +{"train_info/time_between_train_steps": 0.0050432682037353516, "step": 1156} +{"info/global_step": 1157, "train_info/time_within_train_step": 27.721507787704468, "step": 1157} +{"train_info/time_between_train_steps": 0.004977703094482422, "step": 1157} +{"info/global_step": 1158, "train_info/time_within_train_step": 27.72869086265564, "step": 1158} +{"train_info/time_between_train_steps": 0.005281209945678711, "step": 1158} +{"info/global_step": 1159, "train_info/time_within_train_step": 27.716321229934692, "step": 1159} +{"train_info/time_between_train_steps": 0.005339145660400391, "step": 1159} +{"info/global_step": 1160, "train_info/time_within_train_step": 27.74789261817932, "step": 1160} +{"train_info/time_between_train_steps": 0.0054929256439208984, "step": 1160} +{"train_info/time_between_train_steps": 27.986314058303833, "step": 1160} +{"info/global_step": 1161, "train_info/time_within_train_step": 27.705348253250122, "step": 1161} +{"train_info/time_between_train_steps": 0.004928112030029297, "step": 1161} +{"info/global_step": 1162, "train_info/time_within_train_step": 27.8427951335907, "step": 1162} +{"train_info/time_between_train_steps": 0.005341529846191406, "step": 1162} +{"info/global_step": 1163, "train_info/time_within_train_step": 27.710771083831787, "step": 1163} +{"train_info/time_between_train_steps": 0.005086660385131836, "step": 1163} +{"info/global_step": 1164, "train_info/time_within_train_step": 27.832611560821533, "step": 1164} +{"train_info/time_between_train_steps": 0.005245685577392578, "step": 1164} +{"info/global_step": 1165, "train_info/time_within_train_step": 27.731181621551514, "step": 1165} +{"train_info/time_between_train_steps": 0.005094289779663086, "step": 1165} +{"info/global_step": 1166, "train_info/time_within_train_step": 27.845214128494263, "step": 1166} +{"train_info/time_between_train_steps": 0.005010366439819336, "step": 1166} +{"info/global_step": 1167, "train_info/time_within_train_step": 27.72538161277771, "step": 1167} +{"train_info/time_between_train_steps": 0.00505518913269043, "step": 1167} +{"info/global_step": 1168, "train_info/time_within_train_step": 27.937150716781616, "step": 1168} +{"train_info/time_between_train_steps": 0.005017280578613281, "step": 1168} +{"info/global_step": 1169, "train_info/time_within_train_step": 27.74593210220337, "step": 1169} +{"train_info/time_between_train_steps": 0.005263805389404297, "step": 1169} +{"info/global_step": 1170, "train_info/time_within_train_step": 27.800718545913696, "step": 1170} +{"train_info/time_between_train_steps": 0.005128622055053711, "step": 1170} +{"info/global_step": 1171, "train_info/time_within_train_step": 27.707172393798828, "step": 1171} +{"train_info/time_between_train_steps": 0.0048732757568359375, "step": 1171} +{"info/global_step": 1172, "train_info/time_within_train_step": 27.736826419830322, "step": 1172} +{"train_info/time_between_train_steps": 0.005887269973754883, "step": 1172} +{"info/global_step": 1173, "train_info/time_within_train_step": 27.707690477371216, "step": 1173} +{"train_info/time_between_train_steps": 0.004811525344848633, "step": 1173} +{"info/global_step": 1174, "train_info/time_within_train_step": 27.711668491363525, "step": 1174} +{"train_info/time_between_train_steps": 0.004889249801635742, "step": 1174} +{"info/global_step": 1175, "train_info/time_within_train_step": 27.738938331604004, "step": 1175} +{"train_info/time_between_train_steps": 0.005013227462768555, "step": 1175} +{"info/global_step": 1176, "train_info/time_within_train_step": 27.729329347610474, "step": 1176} +{"train_info/time_between_train_steps": 0.005213260650634766, "step": 1176} +{"info/global_step": 1177, "train_info/time_within_train_step": 27.751056432724, "step": 1177} +{"train_info/time_between_train_steps": 0.004904747009277344, "step": 1177} +{"info/global_step": 1178, "train_info/time_within_train_step": 27.698702335357666, "step": 1178} +{"train_info/time_between_train_steps": 0.004915714263916016, "step": 1178} +{"info/global_step": 1179, "train_info/time_within_train_step": 27.702040910720825, "step": 1179} +{"train_info/time_between_train_steps": 0.004903316497802734, "step": 1179} +{"info/global_step": 1180, "train_info/time_within_train_step": 27.734394550323486, "step": 1180} +{"train_info/time_between_train_steps": 0.005075216293334961, "step": 1180} +{"info/global_step": 1181, "train_info/time_within_train_step": 27.733855485916138, "step": 1181} +{"train_info/time_between_train_steps": 0.0055217742919921875, "step": 1181} +{"info/global_step": 1182, "train_info/time_within_train_step": 27.74893617630005, "step": 1182} +{"train_info/time_between_train_steps": 0.00511932373046875, "step": 1182} +{"info/global_step": 1183, "train_info/time_within_train_step": 27.740796327590942, "step": 1183} +{"train_info/time_between_train_steps": 0.004971027374267578, "step": 1183} +{"info/global_step": 1184, "train_info/time_within_train_step": 27.800111770629883, "step": 1184} +{"train_info/time_between_train_steps": 0.005019187927246094, "step": 1184} +{"info/global_step": 1185, "train_info/time_within_train_step": 27.713293313980103, "step": 1185} +{"train_info/time_between_train_steps": 0.005179643630981445, "step": 1185} +{"info/global_step": 1186, "train_info/time_within_train_step": 27.71847367286682, "step": 1186} +{"train_info/time_between_train_steps": 0.004882097244262695, "step": 1186} +{"info/global_step": 1187, "train_info/time_within_train_step": 27.71031951904297, "step": 1187} +{"train_info/time_between_train_steps": 0.0050852298736572266, "step": 1187} +{"info/global_step": 1188, "train_info/time_within_train_step": 27.781174659729004, "step": 1188} +{"train_info/time_between_train_steps": 0.0055561065673828125, "step": 1188} +{"info/global_step": 1189, "train_info/time_within_train_step": 27.87650728225708, "step": 1189} +{"train_info/time_between_train_steps": 0.005548000335693359, "step": 1189} +{"train_info/time_between_train_steps": 28.176780939102173, "step": 1189} +{"info/global_step": 1190, "train_info/time_within_train_step": 27.70364761352539, "step": 1190} +{"train_info/time_between_train_steps": 0.004830360412597656, "step": 1190} +{"info/global_step": 1191, "train_info/time_within_train_step": 27.80752420425415, "step": 1191} +{"train_info/time_between_train_steps": 0.0048732757568359375, "step": 1191} +{"info/global_step": 1192, "train_info/time_within_train_step": 27.725708484649658, "step": 1192} +{"train_info/time_between_train_steps": 0.005166530609130859, "step": 1192} +{"info/global_step": 1193, "train_info/time_within_train_step": 27.84302282333374, "step": 1193} +{"train_info/time_between_train_steps": 0.005191802978515625, "step": 1193} +{"info/global_step": 1194, "train_info/time_within_train_step": 27.73277711868286, "step": 1194} +{"train_info/time_between_train_steps": 0.00525355339050293, "step": 1194} +{"info/global_step": 1195, "train_info/time_within_train_step": 27.8150851726532, "step": 1195} +{"train_info/time_between_train_steps": 0.0050525665283203125, "step": 1195} +{"info/global_step": 1196, "train_info/time_within_train_step": 27.70620608329773, "step": 1196} +{"train_info/time_between_train_steps": 0.01433873176574707, "step": 1196} +{"info/global_step": 1197, "train_info/time_within_train_step": 28.215913772583008, "step": 1197} +{"train_info/time_between_train_steps": 0.007143974304199219, "step": 1197} +{"info/global_step": 1198, "train_info/time_within_train_step": 27.836700916290283, "step": 1198} +{"train_info/time_between_train_steps": 0.005277395248413086, "step": 1198} +{"info/global_step": 1199, "train_info/time_within_train_step": 29.092109203338623, "step": 1199} +{"train_info/time_between_train_steps": 0.005331754684448242, "step": 1199} +{"info/global_step": 1200, "train_info/time_within_train_step": 34.811211824417114, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1736138759, "_runtime": 35886}, "step": 1200} +{"logs": {"train/loss": 3.6417, "train/learning_rate": 0.0, "train/epoch": 41.01, "_timestamp": 1736138759, "_runtime": 35886}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1736138838, "_runtime": 35965}, "step": 1200} +{"logs": {"train/train_runtime": 35965.7278, "train/train_samples_per_second": 17.083, "train/train_steps_per_second": 0.033, "train/total_flos": 3.3082445463552e+17, "train/train_loss": 4.848692913850148, "train/epoch": 41.01, "_timestamp": 1736138838, "_runtime": 35965}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1922.4453125, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1736138855, "_runtime": 35982}, "step": 1200} +{"logs": {"eval/loss": 4.528079509735107, "eval/runtime": 2.1451, "eval/samples_per_second": 48.95, "eval/steps_per_second": 3.263, "train/epoch": 41.01, "_timestamp": 1736138855, "_runtime": 35982}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1922.4453125, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1736138855, "_runtime": 35982}, "step": 1200} +{"logs": {"eval//scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_loss": 4.528079509735107, "eval//scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_ppl": 92.58059011901973, "eval//scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_runtime": 2.1451, "eval//scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py_samples_per_second": 48.95, "train/epoch": 41.01, "_timestamp": 1736138855, "_runtime": 35982}, "step": 1200} diff --git a/pytorch_model.bin b/pytorch_model.bin new file mode 100755 index 0000000000000000000000000000000000000000..8a4f7a589ad6f44c5c8185fa5d8542aa3993c838 --- /dev/null +++ b/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f28431ebc04471305544f66cbe239f2119b4bf1cd08403beea8aea8c2f1edb2 +size 510396521 diff --git a/shuffle_local3_it_IT_randinit_seed53.log b/shuffle_local3_it_IT_randinit_seed53.log new file mode 100755 index 0000000000000000000000000000000000000000..a92c95e2b69071e3f02d1b1591a4afc8756c9368 --- /dev/null +++ b/shuffle_local3_it_IT_randinit_seed53.log @@ -0,0 +1,121 @@ +|=>> 01/05 [19:46:18] - mistral - INFO :: Starting Run: shuffle_local3_it_IT_randinit_seed53... +|=>> 01/05 [19:46:18] - mistral - INFO :: Setting Random Seed to 53! +|=>> 01/05 [19:46:18] - mistral - INFO :: Building Tokenize and Initializing `gpt2-small` via AutoModel/AutoConfig... +|=>> 01/05 [19:46:18] - mistral - INFO :: Using Configs For Model From: /scratch/ykyao/projects/multilingual-LM/mistral/conf/models/gpt2-small-IT.json ... +|=>> 01/05 [19:46:18] - mistral.models.auto - INFO :: Building Hugging Face GPT2Config from provided configs: {'activation_function': 'gelu_new', 'architectures': ['GPT2LMHeadModel'], 'attn_pdrop': 0.1, 'bos_token_id': 0, 'embd_pdrop': 0.1, 'eos_token_id': 0, 'initializer_range': 0.02, 'layer_norm_epsilon': 1e-05, 'model_type': 'gpt2', 'n_ctx': 1024, 'n_embd': 768, 'n_head': 12, 'n_inner': None, 'n_layer': 12, 'n_positions': 1024, 'reorder_and_upcast_attn': True, 'resid_pdrop': 0.1, 'scale_attn_by_inverse_layer_idx': True, 'scale_attn_weights': True, 'summary_activation': None, 'summary_first_dropout': 0.2, 'summary_proj_to_labels': True, 'summary_type': 'cls_index', 'summary_use_proj': True, 'task_specific_params': {'text-generation': {'do_sample': True, 'max_length': 1024}}, 'torch_dtype': 'float32', 'transformers_version': '4.35.2', 'use_cache': False, 'vocab_size': 50257} ... +|=>> 01/05 [19:46:18] - mistral.models.auto - INFO :: Fetching Hugging Face [Fast] AutoTokenizer for Model: `gpt2`... +|=>> 01/05 [19:46:18] - mistral.models.auto - INFO :: Using a Pretokenized Dataset +|=>> 01/05 [19:46:18] - mistral.models.auto - INFO :: Initializing Custom GPT-2 Model from Configuration: `gpt2`... +|=>> 01/05 [19:46:22] - mistral - INFO :: Setting Training Arguments from Quinfig... +|=>> 01/05 [19:46:22] - mistral.args.training - INFO :: Setting Gradient Accumulation Steps = `64` [BSZ: 512 World Size: 1 Device BSZ: 8] +|=>> 01/05 [19:46:22] - mistral - INFO :: Downloading and Preprocessing Dataset `/scratch/ykyao/projects/multilingual-LM/training/multilingual_dataset.py`... +|=>> 01/05 [19:46:23] - datasets_modules.datasets.multilingual_dataset.da8dcd73a8173dca448c0c7479346f3e10871cc0982c18147f7a331aa5ed98df.multilingual_dataset - INFO :: Generating examples from = /scratch/ykyao/projects//multilingual-LM/data/multilingual/multilingual_data_perturbed/shuffle_local3_it/train +|=>> 01/05 [19:46:25] - datasets_modules.datasets.multilingual_dataset.da8dcd73a8173dca448c0c7479346f3e10871cc0982c18147f7a331aa5ed98df.multilingual_dataset - INFO :: Total sentences: 1120349 +|=>> 01/05 [19:46:25] - datasets_modules.datasets.multilingual_dataset.da8dcd73a8173dca448c0c7479346f3e10871cc0982c18147f7a331aa5ed98df.multilingual_dataset - INFO :: Loading pre-tokenized data +|=>> 01/05 [19:46:30] - datasets_modules.datasets.multilingual_dataset.da8dcd73a8173dca448c0c7479346f3e10871cc0982c18147f7a331aa5ed98df.multilingual_dataset - INFO :: Concatenating tokenized data using EOS token +|=>> 01/05 [19:46:31] - datasets_modules.datasets.multilingual_dataset.da8dcd73a8173dca448c0c7479346f3e10871cc0982c18147f7a331aa5ed98df.multilingual_dataset - INFO :: Chunking tokens into sublists of 1024 +|=>> 01/05 [19:46:32] - datasets_modules.datasets.multilingual_dataset.da8dcd73a8173dca448c0c7479346f3e10871cc0982c18147f7a331aa5ed98df.multilingual_dataset - INFO :: Writing dataset as space-separated sequences of tokens +|=>> 01/05 [19:46:38] - datasets_modules.datasets.multilingual_dataset.da8dcd73a8173dca448c0c7479346f3e10871cc0982c18147f7a331aa5ed98df.multilingual_dataset - INFO :: Generating examples from = /scratch/ykyao/projects//multilingual-LM/data/multilingual/multilingual_data_perturbed/shuffle_local3_it/dev +|=>> 01/05 [19:46:38] - datasets_modules.datasets.multilingual_dataset.da8dcd73a8173dca448c0c7479346f3e10871cc0982c18147f7a331aa5ed98df.multilingual_dataset - INFO :: Total sentences: 5829 +|=>> 01/05 [19:46:38] - datasets_modules.datasets.multilingual_dataset.da8dcd73a8173dca448c0c7479346f3e10871cc0982c18147f7a331aa5ed98df.multilingual_dataset - INFO :: Loading pre-tokenized data +|=>> 01/05 [19:46:38] - datasets_modules.datasets.multilingual_dataset.da8dcd73a8173dca448c0c7479346f3e10871cc0982c18147f7a331aa5ed98df.multilingual_dataset - INFO :: Concatenating tokenized data using EOS token +|=>> 01/05 [19:46:38] - datasets_modules.datasets.multilingual_dataset.da8dcd73a8173dca448c0c7479346f3e10871cc0982c18147f7a331aa5ed98df.multilingual_dataset - INFO :: Chunking tokens into sublists of 1024 +|=>> 01/05 [19:46:38] - datasets_modules.datasets.multilingual_dataset.da8dcd73a8173dca448c0c7479346f3e10871cc0982c18147f7a331aa5ed98df.multilingual_dataset - INFO :: Writing dataset as space-separated sequences of tokens +|=>> 01/05 [19:46:39] - mistral.corpora.auto - INFO :: Building Tokenized Indexed Dataset for {dataset_id}/{dataset_name}... +|=>> 01/05 [19:46:39] - mistral.corpora.auto - INFO :: Building Indexed Dataset for train +|=>> 01/05 [19:47:12] - mistral.corpora.auto - INFO :: Building Indexed Dataset for validation +|=>> 01/05 [19:47:13] - mistral - INFO :: Initializing Model Trainer... +|=>> 01/05 [19:47:13] - mistral - INFO :: Training Arguments: TrainingArguments( +_n_gpu=1, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +bf16=False, +bf16_full_eval=False, +data_seed=53, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=IntervalStrategy.STEPS, +fp16=True, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +gradient_accumulation_steps=64, +gradient_checkpointing=False, +greater_is_better=None, +group_by_length=False, +half_precision_backend=auto, +hub_model_id=None, +hub_strategy=HubStrategy.EVERY_SAVE, +hub_token=, +ignore_data_skip=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0006, +length_column_name=length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=logs, +logging_first_step=True, +logging_nan_inf_filter=True, +logging_steps=50, +logging_strategy=IntervalStrategy.STEPS, +lr_scheduler_type=SchedulerType.LINEAR, +max_grad_norm=1.0, +max_steps=1200, +metric_for_best_model=None, +mp_parameters=, +no_cuda=False, +num_train_epochs=3.0, +optim=OptimizerNames.ADAMW_HF, +output_dir=//scratch/ykyao/projects/multilingual_models/shuffle_local3_it_IT_randinit/babylm_shuffle_local3_it_IT_randinit_seed53/runs/shuffle_local3_it_IT_randinit_seed53, +overwrite_output_dir=False, +past_index=-1, +per_device_eval_batch_size=16, +per_device_train_batch_size=8, +prediction_loss_only=True, +push_to_hub=False, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +remove_unused_columns=True, +report_to=[], +resume_from_checkpoint=None, +run_name=shuffle_local3_it_IT_randinit_seed53, +save_on_each_node=False, +save_steps=1000, +save_strategy=IntervalStrategy.STEPS, +save_total_limit=None, +seed=53, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=120, +weight_decay=0.1, +xpu_backend=None, +) +|=>> 01/05 [19:47:21] - mistral.core.callbacks - INFO :: Setting W&B Project: ykyao +|=>> 01/05 [19:47:52] - mistral - INFO :: Training... +|=>> 01/05 [19:47:52] - mistral.core.callbacks - INFO :: Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" +|=>> 01/06 [05:47:32] - mistral - INFO :: ...and that's all folks! +|=>> 01/06 [05:47:32] - mistral - INFO :: Running final evaluation... diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100755 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/training_args.bin b/training_args.bin new file mode 100755 index 0000000000000000000000000000000000000000..549697e20517d083291acd771c7e4f73a0a43ccb --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1246cf5e18070da7948fefcc7820aec8b0a60d030d56ab5ce5ddfdd1b0e451a +size 3183