diff --git a/checkpoint-0/config.json b/checkpoint-0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-0/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-0/pytorch_model.bin b/checkpoint-0/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..e62f1c2a73e204dd71fd38dbd4de66ee58a3ae92 --- /dev/null +++ b/checkpoint-0/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4794ea0b9a495e87fc34e316efe1b585527d6e3d15e4d093362ab24ee74ab3f +size 510396521 diff --git a/checkpoint-0/special_tokens_map.json b/checkpoint-0/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-0/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-0/tokenizer_config.json b/checkpoint-0/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-0/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-0/training_args.bin b/checkpoint-0/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..eaccfd8316af7736489690ab9f6e133305fda6a5 --- /dev/null +++ b/checkpoint-0/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:293690fbed5a45d87250b9feccf0dd9aa14b6f83c629ffe357c2693865f5f1cd +size 3183 diff --git a/checkpoint-100/config.json b/checkpoint-100/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-100/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-100/optimizer.pt b/checkpoint-100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..298a53be638c4c089f257de23024a1e9e891bddb --- /dev/null +++ b/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90daf2d308885ed88c88c6e6fbbe7d818427a129be92b48de4db7f7148af7fee +size 995603825 diff --git a/checkpoint-100/pytorch_model.bin b/checkpoint-100/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..376290278cd5d7eaefa1084821587ddf982fbf33 --- /dev/null +++ b/checkpoint-100/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9705cff83f8be5b22149e504e77acae4752f7f4c964afbdbde4c22e05667f611 +size 510396521 diff --git a/checkpoint-100/rng_state.pth b/checkpoint-100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8ab6a7a8237cc6b11a1194922e1f5c7e2f5978fc --- /dev/null +++ b/checkpoint-100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c076c4d61881a04acf541cedd358bc26e0170e0e5d4906326dc0250212ef73de +size 14567 diff --git a/checkpoint-100/scaler.pt b/checkpoint-100/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..45cc4a33e17645cb0ed4a911b11c77cb2e7ce7f3 --- /dev/null +++ b/checkpoint-100/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13a3423b2fe42f204bc8fe2c666ff379f9fd753a0f13613064a5e71e86b519e8 +size 559 diff --git a/checkpoint-100/scheduler.pt b/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..595eec9094e91b3eb24c2de88c461df2c22026ab --- /dev/null +++ b/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75e9b9d31d11c624d89b0c04ad496adf4b5addd3e703848d2583972c703e8da6 +size 623 diff --git a/checkpoint-100/special_tokens_map.json b/checkpoint-100/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-100/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-100/tokenizer_config.json b/checkpoint-100/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-100/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-100/trainer_state.json b/checkpoint-100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..87ca22200e5cadd0b435cb3fd420d626a34a36fb --- /dev/null +++ b/checkpoint-100/trainer_state.json @@ -0,0 +1,34 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.015833333333333, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.019, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1576, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.8013, + "step": 100 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.725798477824e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-100/training_args.bin b/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..eaccfd8316af7736489690ab9f6e133305fda6a5 --- /dev/null +++ b/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:293690fbed5a45d87250b9feccf0dd9aa14b6f83c629ffe357c2693865f5f1cd +size 3183 diff --git a/checkpoint-1000/config.json b/checkpoint-1000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-1000/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-1000/optimizer.pt b/checkpoint-1000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4696fcf2716de334c31ed7581463830a2e8fdd11 --- /dev/null +++ b/checkpoint-1000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb4170465743a13e3459363e6d93299c6d8fabe833476844164ecdc59d598468 +size 995604017 diff --git a/checkpoint-1000/pytorch_model.bin b/checkpoint-1000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..c437a4bc31456b8d3d9c8bd87ae7888880eb7238 --- /dev/null +++ b/checkpoint-1000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30dbbe8e1e839b7f653d358ec50f0af406b8c2ae4686161e93445d2da1e9dc83 +size 510396521 diff --git a/checkpoint-1000/rng_state.pth b/checkpoint-1000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..435da7804be300623e3b86a46e99b648140b3756 --- /dev/null +++ b/checkpoint-1000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a77b8dad9792e314303401a6c4dcea2ad9b3d1529bdbcf2e884f288804d60af +size 14567 diff --git a/checkpoint-1000/scaler.pt b/checkpoint-1000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b04695d3a30e4bab2b78883d9c849c25c37ef7d7 --- /dev/null +++ b/checkpoint-1000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f810fc7b695697c440d8985f6042b4ba23a9e1027604c265718b518ca29f1b2b +size 559 diff --git a/checkpoint-1000/scheduler.pt b/checkpoint-1000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2a616899591a36b66ac0bd1ceeb324087c181a2a --- /dev/null +++ b/checkpoint-1000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0691206f4bd9ca409d6e7104087a4e0eb05df8f8f555a400f6ecc532edba52d8 +size 623 diff --git a/checkpoint-1000/special_tokens_map.json b/checkpoint-1000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-1000/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-1000/tokenizer_config.json b/checkpoint-1000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-1000/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-1000/trainer_state.json b/checkpoint-1000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c0b0976a0eb7dd5ddd721c018e4979b39afefb08 --- /dev/null +++ b/checkpoint-1000/trainer_state.json @@ -0,0 +1,158 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 37.00083333333333, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.019, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1576, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.8013, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 6.7167, + "step": 150 + }, + { + "epoch": 7.01, + "learning_rate": 0.0005555555555555556, + "loss": 6.6474, + "step": 200 + }, + { + "epoch": 9.01, + "learning_rate": 0.0005277777777777777, + "loss": 6.6081, + "step": 250 + }, + { + "epoch": 11.0, + "learning_rate": 0.0005, + "loss": 6.5764, + "step": 300 + }, + { + "epoch": 12.02, + "learning_rate": 0.00047222222222222224, + "loss": 6.4283, + "step": 350 + }, + { + "epoch": 14.02, + "learning_rate": 0.00044444444444444436, + "loss": 6.4218, + "step": 400 + }, + { + "epoch": 16.02, + "learning_rate": 0.00041666666666666664, + "loss": 6.2987, + "step": 450 + }, + { + "epoch": 18.01, + "learning_rate": 0.00038888888888888887, + "loss": 6.1713, + "step": 500 + }, + { + "epoch": 20.01, + "learning_rate": 0.0003611111111111111, + "loss": 6.0415, + "step": 550 + }, + { + "epoch": 22.0, + "learning_rate": 0.0003333333333333333, + "loss": 5.9176, + "step": 600 + }, + { + "epoch": 24.0, + "learning_rate": 0.00030555555555555555, + "loss": 5.7948, + "step": 650 + }, + { + "epoch": 25.02, + "learning_rate": 0.0002777777777777778, + "loss": 5.6087, + "step": 700 + }, + { + "epoch": 27.02, + "learning_rate": 0.00025, + "loss": 5.5749, + "step": 750 + }, + { + "epoch": 29.01, + "learning_rate": 0.00022222222222222218, + "loss": 5.4786, + "step": 800 + }, + { + "epoch": 31.01, + "learning_rate": 0.00019444444444444443, + "loss": 5.3877, + "step": 850 + }, + { + "epoch": 33.01, + "learning_rate": 0.00016666666666666666, + "loss": 5.3031, + "step": 900 + }, + { + "epoch": 35.0, + "learning_rate": 0.0001388888888888889, + "loss": 5.2301, + "step": 950 + }, + { + "epoch": 37.0, + "learning_rate": 0.00011111111111111109, + "loss": 5.1653, + "step": 1000 + }, + { + "epoch": 37.0, + "eval_loss": 5.702059268951416, + "eval_runtime": 1.9567, + "eval_samples_per_second": 51.105, + "eval_steps_per_second": 3.577, + "step": 1000 + }, + { + "epoch": 37.0, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_loss": 5.702059268951416, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_ppl": 299.4834834485122, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_runtime": 1.9567, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_samples_per_second": 51.105, + "step": 1000 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.7375043608576e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1000/training_args.bin b/checkpoint-1000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..eaccfd8316af7736489690ab9f6e133305fda6a5 --- /dev/null +++ b/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:293690fbed5a45d87250b9feccf0dd9aa14b6f83c629ffe357c2693865f5f1cd +size 3183 diff --git a/checkpoint-1100/config.json b/checkpoint-1100/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-1100/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-1100/optimizer.pt b/checkpoint-1100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4347a7f3acef44631a6a5eed0d3bfc95ac474265 --- /dev/null +++ b/checkpoint-1100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e95129a7ff677e9284ff9b6db63b714146c78dae783bf26a1e1813d7bbb85f07 +size 995604017 diff --git a/checkpoint-1100/pytorch_model.bin b/checkpoint-1100/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..27b95b0e0955bdb1b74a3b3d1fc8e0148a786678 --- /dev/null +++ b/checkpoint-1100/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c82ea965a53e6757374e8aea6f7b3347c951909091b2cb47fa7c88a2d772ab6 +size 510396521 diff --git a/checkpoint-1100/rng_state.pth b/checkpoint-1100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dafd91f85bd9152edc48c160bf612691f3e3c996 --- /dev/null +++ b/checkpoint-1100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae4b12213de4ea52e168a54df711a6f39cce0b06a1dd2eda33676de447c4e96b +size 14567 diff --git a/checkpoint-1100/scaler.pt b/checkpoint-1100/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..37bf049fbd5fd721203bf0238edc8ff67dbd8f94 --- /dev/null +++ b/checkpoint-1100/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fb16c30b686aa43e110b0d33f9d46bf3127b7124542ca8dc34831233d4675a0 +size 559 diff --git a/checkpoint-1100/scheduler.pt b/checkpoint-1100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c158169ec0bb09620952d544fd4b4edea0cc9cf4 --- /dev/null +++ b/checkpoint-1100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f1b7713e4bb40428f29080b7d08d4a52f779ac863737861e4724292b2cf6c59 +size 623 diff --git a/checkpoint-1100/special_tokens_map.json b/checkpoint-1100/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-1100/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-1100/tokenizer_config.json b/checkpoint-1100/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-1100/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-1100/trainer_state.json b/checkpoint-1100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8b758faf2fe24b18902856577d7f8dda7d6630f6 --- /dev/null +++ b/checkpoint-1100/trainer_state.json @@ -0,0 +1,170 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 40.016666666666666, + "global_step": 1100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.019, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1576, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.8013, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 6.7167, + "step": 150 + }, + { + "epoch": 7.01, + "learning_rate": 0.0005555555555555556, + "loss": 6.6474, + "step": 200 + }, + { + "epoch": 9.01, + "learning_rate": 0.0005277777777777777, + "loss": 6.6081, + "step": 250 + }, + { + "epoch": 11.0, + "learning_rate": 0.0005, + "loss": 6.5764, + "step": 300 + }, + { + "epoch": 12.02, + "learning_rate": 0.00047222222222222224, + "loss": 6.4283, + "step": 350 + }, + { + "epoch": 14.02, + "learning_rate": 0.00044444444444444436, + "loss": 6.4218, + "step": 400 + }, + { + "epoch": 16.02, + "learning_rate": 0.00041666666666666664, + "loss": 6.2987, + "step": 450 + }, + { + "epoch": 18.01, + "learning_rate": 0.00038888888888888887, + "loss": 6.1713, + "step": 500 + }, + { + "epoch": 20.01, + "learning_rate": 0.0003611111111111111, + "loss": 6.0415, + "step": 550 + }, + { + "epoch": 22.0, + "learning_rate": 0.0003333333333333333, + "loss": 5.9176, + "step": 600 + }, + { + "epoch": 24.0, + "learning_rate": 0.00030555555555555555, + "loss": 5.7948, + "step": 650 + }, + { + "epoch": 25.02, + "learning_rate": 0.0002777777777777778, + "loss": 5.6087, + "step": 700 + }, + { + "epoch": 27.02, + "learning_rate": 0.00025, + "loss": 5.5749, + "step": 750 + }, + { + "epoch": 29.01, + "learning_rate": 0.00022222222222222218, + "loss": 5.4786, + "step": 800 + }, + { + "epoch": 31.01, + "learning_rate": 0.00019444444444444443, + "loss": 5.3877, + "step": 850 + }, + { + "epoch": 33.01, + "learning_rate": 0.00016666666666666666, + "loss": 5.3031, + "step": 900 + }, + { + "epoch": 35.0, + "learning_rate": 0.0001388888888888889, + "loss": 5.2301, + "step": 950 + }, + { + "epoch": 37.0, + "learning_rate": 0.00011111111111111109, + "loss": 5.1653, + "step": 1000 + }, + { + "epoch": 37.0, + "eval_loss": 5.702059268951416, + "eval_runtime": 1.9567, + "eval_samples_per_second": 51.105, + "eval_steps_per_second": 3.577, + "step": 1000 + }, + { + "epoch": 37.0, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_loss": 5.702059268951416, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_ppl": 299.4834834485122, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_runtime": 1.9567, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_samples_per_second": 51.105, + "step": 1000 + }, + { + "epoch": 38.02, + "learning_rate": 8.333333333333333e-05, + "loss": 5.0501, + "step": 1050 + }, + { + "epoch": 40.02, + "learning_rate": 5.5555555555555545e-05, + "loss": 5.0694, + "step": 1100 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 3.01008420864e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1100/training_args.bin b/checkpoint-1100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..eaccfd8316af7736489690ab9f6e133305fda6a5 --- /dev/null +++ b/checkpoint-1100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:293690fbed5a45d87250b9feccf0dd9aa14b6f83c629ffe357c2693865f5f1cd +size 3183 diff --git a/checkpoint-1200/config.json b/checkpoint-1200/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-1200/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-1200/optimizer.pt b/checkpoint-1200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..46724f6a42f16a7b6756e02e32ca0de7dfd9edb3 --- /dev/null +++ b/checkpoint-1200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc7047c98a8faa312090ebec5e5315fdfdffb411ce66f8516ee3ff910029fcd2 +size 995604017 diff --git a/checkpoint-1200/pytorch_model.bin b/checkpoint-1200/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..15d72e26860b84ce66d0557941ba7380ec63d4a8 --- /dev/null +++ b/checkpoint-1200/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:264f42ae84028911e12cb342c2258bd2ee82c6eae74d62c32226255df708095a +size 510396521 diff --git a/checkpoint-1200/rng_state.pth b/checkpoint-1200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ceaeaf9abba415fd9d45caa25ea720936c0fd381 --- /dev/null +++ b/checkpoint-1200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21fd8e642f1fc407a858989aac44ac333d7bd92a7c3aeee227fbbeaef82fd28c +size 14567 diff --git a/checkpoint-1200/scaler.pt b/checkpoint-1200/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8953dddccbefc4703c09dcda27d83c15add2bade --- /dev/null +++ b/checkpoint-1200/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19c7277eaca0850ae3e9b6790b3d002d820169cce0671185e672c28c8ae8e056 +size 559 diff --git a/checkpoint-1200/scheduler.pt b/checkpoint-1200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..310d39c17fc616a9c83286ed00f0f4cefba9f5df --- /dev/null +++ b/checkpoint-1200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:935a8fb09a6e9698d9894853b05e181b3f56098deaaecddde08e55f06bf000c4 +size 623 diff --git a/checkpoint-1200/special_tokens_map.json b/checkpoint-1200/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-1200/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-1200/tokenizer_config.json b/checkpoint-1200/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-1200/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-1200/trainer_state.json b/checkpoint-1200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..45c83e6b4eef0c7c4a2b3614ce3c6c7796df15e3 --- /dev/null +++ b/checkpoint-1200/trainer_state.json @@ -0,0 +1,182 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 44.01, + "global_step": 1200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.019, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1576, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.8013, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 6.7167, + "step": 150 + }, + { + "epoch": 7.01, + "learning_rate": 0.0005555555555555556, + "loss": 6.6474, + "step": 200 + }, + { + "epoch": 9.01, + "learning_rate": 0.0005277777777777777, + "loss": 6.6081, + "step": 250 + }, + { + "epoch": 11.0, + "learning_rate": 0.0005, + "loss": 6.5764, + "step": 300 + }, + { + "epoch": 12.02, + "learning_rate": 0.00047222222222222224, + "loss": 6.4283, + "step": 350 + }, + { + "epoch": 14.02, + "learning_rate": 0.00044444444444444436, + "loss": 6.4218, + "step": 400 + }, + { + "epoch": 16.02, + "learning_rate": 0.00041666666666666664, + "loss": 6.2987, + "step": 450 + }, + { + "epoch": 18.01, + "learning_rate": 0.00038888888888888887, + "loss": 6.1713, + "step": 500 + }, + { + "epoch": 20.01, + "learning_rate": 0.0003611111111111111, + "loss": 6.0415, + "step": 550 + }, + { + "epoch": 22.0, + "learning_rate": 0.0003333333333333333, + "loss": 5.9176, + "step": 600 + }, + { + "epoch": 24.0, + "learning_rate": 0.00030555555555555555, + "loss": 5.7948, + "step": 650 + }, + { + "epoch": 25.02, + "learning_rate": 0.0002777777777777778, + "loss": 5.6087, + "step": 700 + }, + { + "epoch": 27.02, + "learning_rate": 0.00025, + "loss": 5.5749, + "step": 750 + }, + { + "epoch": 29.01, + "learning_rate": 0.00022222222222222218, + "loss": 5.4786, + "step": 800 + }, + { + "epoch": 31.01, + "learning_rate": 0.00019444444444444443, + "loss": 5.3877, + "step": 850 + }, + { + "epoch": 33.01, + "learning_rate": 0.00016666666666666666, + "loss": 5.3031, + "step": 900 + }, + { + "epoch": 35.0, + "learning_rate": 0.0001388888888888889, + "loss": 5.2301, + "step": 950 + }, + { + "epoch": 37.0, + "learning_rate": 0.00011111111111111109, + "loss": 5.1653, + "step": 1000 + }, + { + "epoch": 37.0, + "eval_loss": 5.702059268951416, + "eval_runtime": 1.9567, + "eval_samples_per_second": 51.105, + "eval_steps_per_second": 3.577, + "step": 1000 + }, + { + "epoch": 37.0, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_loss": 5.702059268951416, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_ppl": 299.4834834485122, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_runtime": 1.9567, + "eval_/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_samples_per_second": 51.105, + "step": 1000 + }, + { + "epoch": 38.02, + "learning_rate": 8.333333333333333e-05, + "loss": 5.0501, + "step": 1050 + }, + { + "epoch": 40.02, + "learning_rate": 5.5555555555555545e-05, + "loss": 5.0694, + "step": 1100 + }, + { + "epoch": 42.01, + "learning_rate": 2.7777777777777772e-05, + "loss": 5.0386, + "step": 1150 + }, + { + "epoch": 44.01, + "learning_rate": 0.0, + "loss": 5.019, + "step": 1200 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 3.2843363254272e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1200/training_args.bin b/checkpoint-1200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..eaccfd8316af7736489690ab9f6e133305fda6a5 --- /dev/null +++ b/checkpoint-1200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:293690fbed5a45d87250b9feccf0dd9aa14b6f83c629ffe357c2693865f5f1cd +size 3183 diff --git a/checkpoint-200/config.json b/checkpoint-200/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-200/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-200/optimizer.pt b/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a66f814aaa0f34bf7e849dcf6df477dfa4cf0fa9 --- /dev/null +++ b/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cdc63cdb2e00ba4a26fc0921a0fca6ad0d1f3687e946d9d91ec8aa70a8d20cb +size 995603825 diff --git a/checkpoint-200/pytorch_model.bin b/checkpoint-200/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..51efdd5085136592bff2021117a885c2a9de9dfa --- /dev/null +++ b/checkpoint-200/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:621fcce1e7fa3afc7d2702e78e0fcb6cbea8d68f3193c0fbe380dfc36a316340 +size 510396521 diff --git a/checkpoint-200/rng_state.pth b/checkpoint-200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..70975838e9e7e7207df5175bca61a37a6936e633 --- /dev/null +++ b/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9c7771ac7411eded4938df4b2a23053d4169b652df0a6d9618397bd596cd7ca +size 14567 diff --git a/checkpoint-200/scaler.pt b/checkpoint-200/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..365b52ebf376498237a843f6d7332e5a49b14902 --- /dev/null +++ b/checkpoint-200/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb6982c29cd162f49aeb531674acf574eccd46a8f556bec596040d7c3b95200a +size 559 diff --git a/checkpoint-200/scheduler.pt b/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..36cb8da739c80a63971a62f06f781c40ac0fceb2 --- /dev/null +++ b/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a992625ada6d884e508ff9392d16738b4a4163147f8fcbf9f46be82ecae9888 +size 623 diff --git a/checkpoint-200/special_tokens_map.json b/checkpoint-200/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-200/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-200/tokenizer_config.json b/checkpoint-200/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-200/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-200/trainer_state.json b/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..755e18c63f45ab6338803d3e70d2cb9edfe1090c --- /dev/null +++ b/checkpoint-200/trainer_state.json @@ -0,0 +1,46 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.009166666666666, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.019, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1576, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.8013, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 6.7167, + "step": 150 + }, + { + "epoch": 7.01, + "learning_rate": 0.0005555555555555556, + "loss": 6.6474, + "step": 200 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 5.468319645696e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-200/training_args.bin b/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..eaccfd8316af7736489690ab9f6e133305fda6a5 --- /dev/null +++ b/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:293690fbed5a45d87250b9feccf0dd9aa14b6f83c629ffe357c2693865f5f1cd +size 3183 diff --git a/checkpoint-300/config.json b/checkpoint-300/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-300/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-300/optimizer.pt b/checkpoint-300/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..57735885c7c84e72ff66487b14c197d25ec327d4 --- /dev/null +++ b/checkpoint-300/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07c445ae2e4f17acf1c99eab070dcea28a3c9e5baf106222ea3a192740667d55 +size 995604017 diff --git a/checkpoint-300/pytorch_model.bin b/checkpoint-300/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..d5c7fa94a1b14f2e57c8614190fe0a69216b3ad1 --- /dev/null +++ b/checkpoint-300/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78af2fbbcab0170a0211d1a48514384c8639d6969095e6330096300b40ce13a9 +size 510396521 diff --git a/checkpoint-300/rng_state.pth b/checkpoint-300/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..fd54e27f16342b13d9835e9d3b3c0ce69a2c4791 --- /dev/null +++ b/checkpoint-300/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25be6403baad896dcf7e2fc85c3c7cff3b0d479404c6f7c8332980a3aeea9483 +size 14567 diff --git a/checkpoint-300/scaler.pt b/checkpoint-300/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..01066cf4761ea9d2f7962f5181762f7b08690b79 --- /dev/null +++ b/checkpoint-300/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0967b9f865f16344c55f5ccc3cf7d6e8e97ca61dda304e931ca6bad130f48dd1 +size 559 diff --git a/checkpoint-300/scheduler.pt b/checkpoint-300/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..681987f21e79cd4685aac32a5e6b74341e25d936 --- /dev/null +++ b/checkpoint-300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1db899b266916f792a0898ceb27a87eaf76647f10c29cc0c13ce22f12a12efd +size 623 diff --git a/checkpoint-300/special_tokens_map.json b/checkpoint-300/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-300/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-300/tokenizer_config.json b/checkpoint-300/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-300/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-300/trainer_state.json b/checkpoint-300/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..15a02ba98166f79bd993536d3537abb0cc85f63b --- /dev/null +++ b/checkpoint-300/trainer_state.json @@ -0,0 +1,58 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 11.0025, + "global_step": 300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.019, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1576, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.8013, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 6.7167, + "step": 150 + }, + { + "epoch": 7.01, + "learning_rate": 0.0005555555555555556, + "loss": 6.6474, + "step": 200 + }, + { + "epoch": 9.01, + "learning_rate": 0.0005277777777777777, + "loss": 6.6081, + "step": 250 + }, + { + "epoch": 11.0, + "learning_rate": 0.0005, + "loss": 6.5764, + "step": 300 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 8.210840813568e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-300/training_args.bin b/checkpoint-300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..eaccfd8316af7736489690ab9f6e133305fda6a5 --- /dev/null +++ b/checkpoint-300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:293690fbed5a45d87250b9feccf0dd9aa14b6f83c629ffe357c2693865f5f1cd +size 3183 diff --git a/checkpoint-400/config.json b/checkpoint-400/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-400/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-400/optimizer.pt b/checkpoint-400/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9d0a633cb5d06db9d9022bb00b7c38dc65054fb6 --- /dev/null +++ b/checkpoint-400/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99a0e02010dfd00da76eb7cb93197c9c39fc5701aa8d8802491430f2ab6ee3fa +size 995604017 diff --git a/checkpoint-400/pytorch_model.bin b/checkpoint-400/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..d1556e17c9974f077b93e662af84302bb38f379f --- /dev/null +++ b/checkpoint-400/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b95b86c9dde105299250a1dd3ad5e71beeef35a10709e9c387c627ea0e2bb0e7 +size 510396521 diff --git a/checkpoint-400/rng_state.pth b/checkpoint-400/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d7cf6e164ded815c9fcb0b54d91ce4a86048b792 --- /dev/null +++ b/checkpoint-400/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35e1e79953c78f76c268c965a38a8332c73cdf485b454ba60ac2aaf12ca4f0be +size 14567 diff --git a/checkpoint-400/scaler.pt b/checkpoint-400/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9c7aef4199e98d81152810b661dbaffc01963383 --- /dev/null +++ b/checkpoint-400/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:476e510c8ea7edbd2b51d1e76a4e037820a5639381c0d8b5d32dafa492795a1e +size 559 diff --git a/checkpoint-400/scheduler.pt b/checkpoint-400/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..967673cfc91836d239beb5a7ede06992232ba309 --- /dev/null +++ b/checkpoint-400/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db087d678047b5c346bbb8511936612c1fdf223c6fd70321e97369bc31ed76a8 +size 623 diff --git a/checkpoint-400/special_tokens_map.json b/checkpoint-400/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-400/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-400/tokenizer_config.json b/checkpoint-400/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-400/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-400/trainer_state.json b/checkpoint-400/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f3b11773e4fa04ca16a64df7bd41cb4562dbd6d6 --- /dev/null +++ b/checkpoint-400/trainer_state.json @@ -0,0 +1,70 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 14.018333333333333, + "global_step": 400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.019, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1576, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.8013, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 6.7167, + "step": 150 + }, + { + "epoch": 7.01, + "learning_rate": 0.0005555555555555556, + "loss": 6.6474, + "step": 200 + }, + { + "epoch": 9.01, + "learning_rate": 0.0005277777777777777, + "loss": 6.6081, + "step": 250 + }, + { + "epoch": 11.0, + "learning_rate": 0.0005, + "loss": 6.5764, + "step": 300 + }, + { + "epoch": 12.02, + "learning_rate": 0.00047222222222222224, + "loss": 6.4283, + "step": 350 + }, + { + "epoch": 14.02, + "learning_rate": 0.00044444444444444436, + "loss": 6.4218, + "step": 400 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.0936639291392e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-400/training_args.bin b/checkpoint-400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..eaccfd8316af7736489690ab9f6e133305fda6a5 --- /dev/null +++ b/checkpoint-400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:293690fbed5a45d87250b9feccf0dd9aa14b6f83c629ffe357c2693865f5f1cd +size 3183 diff --git a/checkpoint-500/config.json b/checkpoint-500/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-500/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-500/optimizer.pt b/checkpoint-500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a0237939f3b53d4fe42f23d788fd2a2f55011a0 --- /dev/null +++ b/checkpoint-500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81696d8d3adaf698787708ddcf9bf8e66c4570f5e19dd5e131d5b47a645e88a3 +size 995604017 diff --git a/checkpoint-500/pytorch_model.bin b/checkpoint-500/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..fd6b17ba775ea38551ca476c4fcad008a4af059e --- /dev/null +++ b/checkpoint-500/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:949fe32dea47c3686da0cac1794db1bc1365656ef8e4ff49f765733711e57084 +size 510396521 diff --git a/checkpoint-500/rng_state.pth b/checkpoint-500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..bb3ce9d9efd9aa5b058081f50360de9f9a24fcb7 --- /dev/null +++ b/checkpoint-500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f84e18bfbb392505a5516aa51e71d91d87fdf2b09ce00b11d0b269f2c92c5cea +size 14567 diff --git a/checkpoint-500/scaler.pt b/checkpoint-500/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e8b96c9c2837f2c95b1d07d4fc3f245f9ad1ef62 --- /dev/null +++ b/checkpoint-500/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fa4c7be44c959599b8b43bb9bc3371e9e4e5bbc5758b3ab5afcccfda3e72e67 +size 559 diff --git a/checkpoint-500/scheduler.pt b/checkpoint-500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..77b958181a5b47b022b96b38a6207f274a1b6604 --- /dev/null +++ b/checkpoint-500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:026fae4a90d56c24de94b10dfa7a75b6ba4e43bd5c1a3fdb2d77356b81cd6f8a +size 623 diff --git a/checkpoint-500/special_tokens_map.json b/checkpoint-500/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-500/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-500/tokenizer_config.json b/checkpoint-500/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-500/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-500/trainer_state.json b/checkpoint-500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a1ff8d9bcffc2256cf892f86a26fc39ec37c1c9c --- /dev/null +++ b/checkpoint-500/trainer_state.json @@ -0,0 +1,82 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 18.011666666666667, + "global_step": 500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.019, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1576, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.8013, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 6.7167, + "step": 150 + }, + { + "epoch": 7.01, + "learning_rate": 0.0005555555555555556, + "loss": 6.6474, + "step": 200 + }, + { + "epoch": 9.01, + "learning_rate": 0.0005277777777777777, + "loss": 6.6081, + "step": 250 + }, + { + "epoch": 11.0, + "learning_rate": 0.0005, + "loss": 6.5764, + "step": 300 + }, + { + "epoch": 12.02, + "learning_rate": 0.00047222222222222224, + "loss": 6.4283, + "step": 350 + }, + { + "epoch": 14.02, + "learning_rate": 0.00044444444444444436, + "loss": 6.4218, + "step": 400 + }, + { + "epoch": 16.02, + "learning_rate": 0.00041666666666666664, + "loss": 6.2987, + "step": 450 + }, + { + "epoch": 18.01, + "learning_rate": 0.00038888888888888887, + "loss": 6.1713, + "step": 500 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.3679160459264e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-500/training_args.bin b/checkpoint-500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..eaccfd8316af7736489690ab9f6e133305fda6a5 --- /dev/null +++ b/checkpoint-500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:293690fbed5a45d87250b9feccf0dd9aa14b6f83c629ffe357c2693865f5f1cd +size 3183 diff --git a/checkpoint-600/config.json b/checkpoint-600/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-600/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-600/optimizer.pt b/checkpoint-600/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b7e3af7cc7f136afb9b3c9592e4119c616f271b9 --- /dev/null +++ b/checkpoint-600/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d4c18da9e80c6eff60d0e28bf7f20f4b2ddde6324cf8ebe4296f5fb9fa99d48 +size 995604017 diff --git a/checkpoint-600/pytorch_model.bin b/checkpoint-600/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..1d61766b694944bf9846b8b7e36aef2bfb24017b --- /dev/null +++ b/checkpoint-600/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7412438a009720851b71d41e6799e159ed50ed566efcff0ce53b913cd21338e3 +size 510396521 diff --git a/checkpoint-600/rng_state.pth b/checkpoint-600/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..6b4a0277c61e13f8785e50ef6698c794e378b18b --- /dev/null +++ b/checkpoint-600/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84b5041c5a3a3d6be3d439229ab16f92e0b526b23d897e94e8d36433dc338189 +size 14567 diff --git a/checkpoint-600/scaler.pt b/checkpoint-600/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f95b3e36da01561ec333a83ee8419ad225633e06 --- /dev/null +++ b/checkpoint-600/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e8415b86bbce347c0df306b84a695add049c2a3b2d0b6f4dda3bf036d341150 +size 559 diff --git a/checkpoint-600/scheduler.pt b/checkpoint-600/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..62e5359badd619e7dad2c51d98fa8043d9948f0b --- /dev/null +++ b/checkpoint-600/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:700940432b1c2117248896e2ce5a58d93c051d92ea97707f74d76bf1ef24deee +size 623 diff --git a/checkpoint-600/special_tokens_map.json b/checkpoint-600/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-600/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-600/tokenizer_config.json b/checkpoint-600/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-600/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-600/trainer_state.json b/checkpoint-600/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..60330a9e9a542a43ac30b5ed2aaf444a3237b61f --- /dev/null +++ b/checkpoint-600/trainer_state.json @@ -0,0 +1,94 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 22.005, + "global_step": 600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.019, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1576, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.8013, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 6.7167, + "step": 150 + }, + { + "epoch": 7.01, + "learning_rate": 0.0005555555555555556, + "loss": 6.6474, + "step": 200 + }, + { + "epoch": 9.01, + "learning_rate": 0.0005277777777777777, + "loss": 6.6081, + "step": 250 + }, + { + "epoch": 11.0, + "learning_rate": 0.0005, + "loss": 6.5764, + "step": 300 + }, + { + "epoch": 12.02, + "learning_rate": 0.00047222222222222224, + "loss": 6.4283, + "step": 350 + }, + { + "epoch": 14.02, + "learning_rate": 0.00044444444444444436, + "loss": 6.4218, + "step": 400 + }, + { + "epoch": 16.02, + "learning_rate": 0.00041666666666666664, + "loss": 6.2987, + "step": 450 + }, + { + "epoch": 18.01, + "learning_rate": 0.00038888888888888887, + "loss": 6.1713, + "step": 500 + }, + { + "epoch": 20.01, + "learning_rate": 0.0003611111111111111, + "loss": 6.0415, + "step": 550 + }, + { + "epoch": 22.0, + "learning_rate": 0.0003333333333333333, + "loss": 5.9176, + "step": 600 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.6421681627136e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-600/training_args.bin b/checkpoint-600/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..eaccfd8316af7736489690ab9f6e133305fda6a5 --- /dev/null +++ b/checkpoint-600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:293690fbed5a45d87250b9feccf0dd9aa14b6f83c629ffe357c2693865f5f1cd +size 3183 diff --git a/checkpoint-700/config.json b/checkpoint-700/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-700/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-700/optimizer.pt b/checkpoint-700/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0ca48365eada861edfc23279b699df44f1a519e0 --- /dev/null +++ b/checkpoint-700/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c03ab18fcf2ee653f9c54565516c0b50fd09ed9fd056ba9b823a70a8993f8672 +size 995604017 diff --git a/checkpoint-700/pytorch_model.bin b/checkpoint-700/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..977878620f520793b4e954f2c683f699bfde1175 --- /dev/null +++ b/checkpoint-700/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5859114ab95907650cfb596dc29addf64ca24dc14ae7928709de3e56b5e2844d +size 510396521 diff --git a/checkpoint-700/rng_state.pth b/checkpoint-700/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ee10689b1cf71bb0ced1da08c47b1725fc3a240c --- /dev/null +++ b/checkpoint-700/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e4775319740a38263bf231efef5599b84e10605be41be0620fcddeec2a960ce +size 14567 diff --git a/checkpoint-700/scaler.pt b/checkpoint-700/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..12f2b8ec834e54a2bd7cfdd0e07b0c6e125b6490 --- /dev/null +++ b/checkpoint-700/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fb213daf5cce18a5f92167ca14da9df084d907f2b9796efc4666630f312b58c +size 559 diff --git a/checkpoint-700/scheduler.pt b/checkpoint-700/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..42408b6253265af34ae78746144fbba9316e0d7e --- /dev/null +++ b/checkpoint-700/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2d4910fd408e002ebeff50d62bfb043dcae5ef658777d0c3ee4a3bbb515ec15 +size 623 diff --git a/checkpoint-700/special_tokens_map.json b/checkpoint-700/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-700/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-700/tokenizer_config.json b/checkpoint-700/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-700/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-700/trainer_state.json b/checkpoint-700/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a767f307c37af50de3f2db3ebdeb11fe6c3f79a7 --- /dev/null +++ b/checkpoint-700/trainer_state.json @@ -0,0 +1,106 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 25.020833333333332, + "global_step": 700, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.019, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1576, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.8013, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 6.7167, + "step": 150 + }, + { + "epoch": 7.01, + "learning_rate": 0.0005555555555555556, + "loss": 6.6474, + "step": 200 + }, + { + "epoch": 9.01, + "learning_rate": 0.0005277777777777777, + "loss": 6.6081, + "step": 250 + }, + { + "epoch": 11.0, + "learning_rate": 0.0005, + "loss": 6.5764, + "step": 300 + }, + { + "epoch": 12.02, + "learning_rate": 0.00047222222222222224, + "loss": 6.4283, + "step": 350 + }, + { + "epoch": 14.02, + "learning_rate": 0.00044444444444444436, + "loss": 6.4218, + "step": 400 + }, + { + "epoch": 16.02, + "learning_rate": 0.00041666666666666664, + "loss": 6.2987, + "step": 450 + }, + { + "epoch": 18.01, + "learning_rate": 0.00038888888888888887, + "loss": 6.1713, + "step": 500 + }, + { + "epoch": 20.01, + "learning_rate": 0.0003611111111111111, + "loss": 6.0415, + "step": 550 + }, + { + "epoch": 22.0, + "learning_rate": 0.0003333333333333333, + "loss": 5.9176, + "step": 600 + }, + { + "epoch": 24.0, + "learning_rate": 0.00030555555555555555, + "loss": 5.7948, + "step": 650 + }, + { + "epoch": 25.02, + "learning_rate": 0.0002777777777777778, + "loss": 5.6087, + "step": 700 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.914748010496e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-700/training_args.bin b/checkpoint-700/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..eaccfd8316af7736489690ab9f6e133305fda6a5 --- /dev/null +++ b/checkpoint-700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:293690fbed5a45d87250b9feccf0dd9aa14b6f83c629ffe357c2693865f5f1cd +size 3183 diff --git a/checkpoint-800/config.json b/checkpoint-800/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-800/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-800/optimizer.pt b/checkpoint-800/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a5a1fb7e21ea7f8bd3da62b18691ae68ac9b90b9 --- /dev/null +++ b/checkpoint-800/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c95b21e39bb2e8e9377f2abe4201c1123df9f020fd552fcabe7b0609c510a8ac +size 995604017 diff --git a/checkpoint-800/pytorch_model.bin b/checkpoint-800/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..fb953b5d3ac002d5ab881f25cf6119de2ef95cb0 --- /dev/null +++ b/checkpoint-800/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a813321711cd1871b9c22b624d9b9f14b9eb85cb2c661a799ee1991b9191a11 +size 510396521 diff --git a/checkpoint-800/rng_state.pth b/checkpoint-800/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..b03c8507c8629db4dd634caf194618f84169ee90 --- /dev/null +++ b/checkpoint-800/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2578348da2c49bbe42cb4dc6723cbc400ec1f387d6ae54940db710e96d9160c +size 14567 diff --git a/checkpoint-800/scaler.pt b/checkpoint-800/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b3c73372264156b02df8dada2192ee3c96dd5fc4 --- /dev/null +++ b/checkpoint-800/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c2074cdcefbaa0a39f736d6b0f7bf018c350d49e85648bc8accc4f756ad816e +size 559 diff --git a/checkpoint-800/scheduler.pt b/checkpoint-800/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..75713dbce3771306ca00343ecc497c4f19a01d03 --- /dev/null +++ b/checkpoint-800/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b27fb255c84833fb6ab5d93679cb236a569de9a1c4f805f72a2f60a2bc7c7499 +size 623 diff --git a/checkpoint-800/special_tokens_map.json b/checkpoint-800/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-800/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-800/tokenizer_config.json b/checkpoint-800/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-800/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-800/trainer_state.json b/checkpoint-800/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1eb20eb289799d63bcd8187e162dbad97a1d0698 --- /dev/null +++ b/checkpoint-800/trainer_state.json @@ -0,0 +1,118 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 29.014166666666668, + "global_step": 800, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.019, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1576, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.8013, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 6.7167, + "step": 150 + }, + { + "epoch": 7.01, + "learning_rate": 0.0005555555555555556, + "loss": 6.6474, + "step": 200 + }, + { + "epoch": 9.01, + "learning_rate": 0.0005277777777777777, + "loss": 6.6081, + "step": 250 + }, + { + "epoch": 11.0, + "learning_rate": 0.0005, + "loss": 6.5764, + "step": 300 + }, + { + "epoch": 12.02, + "learning_rate": 0.00047222222222222224, + "loss": 6.4283, + "step": 350 + }, + { + "epoch": 14.02, + "learning_rate": 0.00044444444444444436, + "loss": 6.4218, + "step": 400 + }, + { + "epoch": 16.02, + "learning_rate": 0.00041666666666666664, + "loss": 6.2987, + "step": 450 + }, + { + "epoch": 18.01, + "learning_rate": 0.00038888888888888887, + "loss": 6.1713, + "step": 500 + }, + { + "epoch": 20.01, + "learning_rate": 0.0003611111111111111, + "loss": 6.0415, + "step": 550 + }, + { + "epoch": 22.0, + "learning_rate": 0.0003333333333333333, + "loss": 5.9176, + "step": 600 + }, + { + "epoch": 24.0, + "learning_rate": 0.00030555555555555555, + "loss": 5.7948, + "step": 650 + }, + { + "epoch": 25.02, + "learning_rate": 0.0002777777777777778, + "loss": 5.6087, + "step": 700 + }, + { + "epoch": 27.02, + "learning_rate": 0.00025, + "loss": 5.5749, + "step": 750 + }, + { + "epoch": 29.01, + "learning_rate": 0.00022222222222222218, + "loss": 5.4786, + "step": 800 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.1890001272832e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-800/training_args.bin b/checkpoint-800/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..eaccfd8316af7736489690ab9f6e133305fda6a5 --- /dev/null +++ b/checkpoint-800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:293690fbed5a45d87250b9feccf0dd9aa14b6f83c629ffe357c2693865f5f1cd +size 3183 diff --git a/checkpoint-900/config.json b/checkpoint-900/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/checkpoint-900/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/checkpoint-900/optimizer.pt b/checkpoint-900/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..da3362b929fa506730ee9c283bd675035d9a05b6 --- /dev/null +++ b/checkpoint-900/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb884f564eafcd30ee0778ea2b17293a9ad296df56b6f9b525f89bc70a4413be +size 995604017 diff --git a/checkpoint-900/pytorch_model.bin b/checkpoint-900/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..d1a9680e9f3430b11aa9c696c24a23a90169f494 --- /dev/null +++ b/checkpoint-900/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ea298d5a3c4e2eeb3304bf2d06523e9be31b7cccb66946dafd99110bb30e855 +size 510396521 diff --git a/checkpoint-900/rng_state.pth b/checkpoint-900/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..00e346c9f0290c7afbfe423e94edbc71f65488e3 --- /dev/null +++ b/checkpoint-900/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d5bcb8bbe1dd7b33bca82cdae630facce0f0087b984d10ffa6992d4b26559de +size 14567 diff --git a/checkpoint-900/scaler.pt b/checkpoint-900/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..be54cb13c777bc6feccb478ff218e7e21fad482a --- /dev/null +++ b/checkpoint-900/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8695f57df923e22b943b0b0f2b9cc7007008e80b53ccee275b3a35963fe67e9 +size 559 diff --git a/checkpoint-900/scheduler.pt b/checkpoint-900/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..539d7c83ea252818dc9cbffac08cf340bb05a454 --- /dev/null +++ b/checkpoint-900/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2cb09e5db72772a15094286e93cbb61d745d9b63863703cf53da0bcb9827821 +size 623 diff --git a/checkpoint-900/special_tokens_map.json b/checkpoint-900/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/checkpoint-900/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/checkpoint-900/tokenizer_config.json b/checkpoint-900/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/checkpoint-900/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/checkpoint-900/trainer_state.json b/checkpoint-900/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..642e4de0d32d784b1e7bb21aea832eecde6e0761 --- /dev/null +++ b/checkpoint-900/trainer_state.json @@ -0,0 +1,130 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 33.0075, + "global_step": 900, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999999999999996e-06, + "loss": 11.019, + "step": 1 + }, + { + "epoch": 1.02, + "learning_rate": 0.00025, + "loss": 8.1576, + "step": 50 + }, + { + "epoch": 3.02, + "learning_rate": 0.0005, + "loss": 6.8013, + "step": 100 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005833333333333333, + "loss": 6.7167, + "step": 150 + }, + { + "epoch": 7.01, + "learning_rate": 0.0005555555555555556, + "loss": 6.6474, + "step": 200 + }, + { + "epoch": 9.01, + "learning_rate": 0.0005277777777777777, + "loss": 6.6081, + "step": 250 + }, + { + "epoch": 11.0, + "learning_rate": 0.0005, + "loss": 6.5764, + "step": 300 + }, + { + "epoch": 12.02, + "learning_rate": 0.00047222222222222224, + "loss": 6.4283, + "step": 350 + }, + { + "epoch": 14.02, + "learning_rate": 0.00044444444444444436, + "loss": 6.4218, + "step": 400 + }, + { + "epoch": 16.02, + "learning_rate": 0.00041666666666666664, + "loss": 6.2987, + "step": 450 + }, + { + "epoch": 18.01, + "learning_rate": 0.00038888888888888887, + "loss": 6.1713, + "step": 500 + }, + { + "epoch": 20.01, + "learning_rate": 0.0003611111111111111, + "loss": 6.0415, + "step": 550 + }, + { + "epoch": 22.0, + "learning_rate": 0.0003333333333333333, + "loss": 5.9176, + "step": 600 + }, + { + "epoch": 24.0, + "learning_rate": 0.00030555555555555555, + "loss": 5.7948, + "step": 650 + }, + { + "epoch": 25.02, + "learning_rate": 0.0002777777777777778, + "loss": 5.6087, + "step": 700 + }, + { + "epoch": 27.02, + "learning_rate": 0.00025, + "loss": 5.5749, + "step": 750 + }, + { + "epoch": 29.01, + "learning_rate": 0.00022222222222222218, + "loss": 5.4786, + "step": 800 + }, + { + "epoch": 31.01, + "learning_rate": 0.00019444444444444443, + "loss": 5.3877, + "step": 850 + }, + { + "epoch": 33.01, + "learning_rate": 0.00016666666666666666, + "loss": 5.3031, + "step": 900 + } + ], + "max_steps": 1200, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.4632522440704e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-900/training_args.bin b/checkpoint-900/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..eaccfd8316af7736489690ab9f6e133305fda6a5 --- /dev/null +++ b/checkpoint-900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:293690fbed5a45d87250b9feccf0dd9aa14b6f83c629ffe357c2693865f5f1cd +size 3183 diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7bede11cc27223028006ed5b54091164954bbd41 --- /dev/null +++ b/config.json @@ -0,0 +1,38 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1024, + "reorder_and_upcast_attn": true, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": true, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.2, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 1024 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_cache": false, + "vocab_size": 50257 +} diff --git a/metrics.json b/metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..c835b0df4c4fc6fd2dec1e8e8558c0afdfdf7ca4 --- /dev/null +++ b/metrics.json @@ -0,0 +1,2505 @@ +{"num_parameters": 124439808, "trainable_parameters": 124439808, "step": 0} +{"train_info/time_between_train_steps": 3.4527134895324707, "step": 0} +{"info/global_step": 1, "train_info/time_within_train_step": 38.93634653091431, "step": 1} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 19761.71484375, "train_info/memory_reserved": 22624.0, "train_info/memory_max_reserved": 22624.0, "_timestamp": 1733766664, "_runtime": 45}, "step": 1} +{"logs": {"train/loss": 11.019, "train/learning_rate": 4.9999999999999996e-06, "train/epoch": 0.0, "_timestamp": 1733766664, "_runtime": 45}, "step": 1} +{"train_info/time_between_train_steps": 0.13353919982910156, "step": 1} +{"info/global_step": 2, "train_info/time_within_train_step": 37.828609466552734, "step": 2} +{"train_info/time_between_train_steps": 0.007358074188232422, "step": 2} +{"info/global_step": 3, "train_info/time_within_train_step": 38.37249255180359, "step": 3} +{"train_info/time_between_train_steps": 0.006016969680786133, "step": 3} +{"info/global_step": 4, "train_info/time_within_train_step": 37.79282832145691, "step": 4} +{"train_info/time_between_train_steps": 0.005666971206665039, "step": 4} +{"info/global_step": 5, "train_info/time_within_train_step": 38.381454944610596, "step": 5} +{"train_info/time_between_train_steps": 0.006276369094848633, "step": 5} +{"info/global_step": 6, "train_info/time_within_train_step": 38.01987862586975, "step": 6} +{"train_info/time_between_train_steps": 0.0065212249755859375, "step": 6} +{"info/global_step": 7, "train_info/time_within_train_step": 37.91178274154663, "step": 7} +{"train_info/time_between_train_steps": 0.005956888198852539, "step": 7} +{"info/global_step": 8, "train_info/time_within_train_step": 38.33696484565735, "step": 8} +{"train_info/time_between_train_steps": 0.006041049957275391, "step": 8} +{"info/global_step": 9, "train_info/time_within_train_step": 37.43039059638977, "step": 9} +{"train_info/time_between_train_steps": 0.005522489547729492, "step": 9} +{"info/global_step": 10, "train_info/time_within_train_step": 38.37068462371826, "step": 10} +{"train_info/time_between_train_steps": 0.005555868148803711, "step": 10} +{"info/global_step": 11, "train_info/time_within_train_step": 37.47371816635132, "step": 11} +{"train_info/time_between_train_steps": 0.0054759979248046875, "step": 11} +{"info/global_step": 12, "train_info/time_within_train_step": 38.34899950027466, "step": 12} +{"train_info/time_between_train_steps": 0.00549769401550293, "step": 12} +{"info/global_step": 13, "train_info/time_within_train_step": 37.54090976715088, "step": 13} +{"train_info/time_between_train_steps": 0.0055675506591796875, "step": 13} +{"info/global_step": 14, "train_info/time_within_train_step": 38.35472083091736, "step": 14} +{"train_info/time_between_train_steps": 0.005711555480957031, "step": 14} +{"info/global_step": 15, "train_info/time_within_train_step": 37.52582263946533, "step": 15} +{"train_info/time_between_train_steps": 0.005377292633056641, "step": 15} +{"info/global_step": 16, "train_info/time_within_train_step": 38.601133584976196, "step": 16} +{"train_info/time_between_train_steps": 0.011034250259399414, "step": 16} +{"info/global_step": 17, "train_info/time_within_train_step": 38.33004879951477, "step": 17} +{"train_info/time_between_train_steps": 0.0055294036865234375, "step": 17} +{"info/global_step": 18, "train_info/time_within_train_step": 37.54171347618103, "step": 18} +{"train_info/time_between_train_steps": 0.005390644073486328, "step": 18} +{"info/global_step": 19, "train_info/time_within_train_step": 38.36578297615051, "step": 19} +{"train_info/time_between_train_steps": 0.005427122116088867, "step": 19} +{"info/global_step": 20, "train_info/time_within_train_step": 37.426538705825806, "step": 20} +{"train_info/time_between_train_steps": 0.0058133602142333984, "step": 20} +{"info/global_step": 21, "train_info/time_within_train_step": 38.54641008377075, "step": 21} +{"train_info/time_between_train_steps": 0.005700588226318359, "step": 21} +{"info/global_step": 22, "train_info/time_within_train_step": 37.426862955093384, "step": 22} +{"train_info/time_between_train_steps": 0.0054168701171875, "step": 22} +{"info/global_step": 23, "train_info/time_within_train_step": 38.36196517944336, "step": 23} +{"train_info/time_between_train_steps": 0.011044740676879883, "step": 23} +{"info/global_step": 24, "train_info/time_within_train_step": 38.39323425292969, "step": 24} +{"train_info/time_between_train_steps": 0.005528450012207031, "step": 24} +{"info/global_step": 25, "train_info/time_within_train_step": 37.36389398574829, "step": 25} +{"train_info/time_between_train_steps": 0.005548238754272461, "step": 25} +{"info/global_step": 26, "train_info/time_within_train_step": 38.28212070465088, "step": 26} +{"train_info/time_between_train_steps": 0.005828142166137695, "step": 26} +{"info/global_step": 27, "train_info/time_within_train_step": 37.43771290779114, "step": 27} +{"train_info/time_between_train_steps": 0.005921840667724609, "step": 27} +{"train_info/time_between_train_steps": 27.400169610977173, "step": 27} +{"info/global_step": 28, "train_info/time_within_train_step": 37.78276062011719, "step": 28} +{"train_info/time_between_train_steps": 0.0057582855224609375, "step": 28} +{"info/global_step": 29, "train_info/time_within_train_step": 38.169788122177124, "step": 29} +{"train_info/time_between_train_steps": 0.005506753921508789, "step": 29} +{"info/global_step": 30, "train_info/time_within_train_step": 38.32091665267944, "step": 30} +{"train_info/time_between_train_steps": 0.0055255889892578125, "step": 30} +{"info/global_step": 31, "train_info/time_within_train_step": 37.76244139671326, "step": 31} +{"train_info/time_between_train_steps": 0.005526304244995117, "step": 31} +{"info/global_step": 32, "train_info/time_within_train_step": 38.37782073020935, "step": 32} +{"train_info/time_between_train_steps": 0.00556492805480957, "step": 32} +{"info/global_step": 33, "train_info/time_within_train_step": 37.538939237594604, "step": 33} +{"train_info/time_between_train_steps": 0.005644083023071289, "step": 33} +{"info/global_step": 34, "train_info/time_within_train_step": 38.30668377876282, "step": 34} +{"train_info/time_between_train_steps": 0.005555629730224609, "step": 34} +{"info/global_step": 35, "train_info/time_within_train_step": 37.57236576080322, "step": 35} +{"train_info/time_between_train_steps": 0.005543708801269531, "step": 35} +{"info/global_step": 36, "train_info/time_within_train_step": 38.287665605545044, "step": 36} +{"train_info/time_between_train_steps": 0.0054073333740234375, "step": 36} +{"info/global_step": 37, "train_info/time_within_train_step": 38.32649874687195, "step": 37} +{"train_info/time_between_train_steps": 0.005318164825439453, "step": 37} +{"info/global_step": 38, "train_info/time_within_train_step": 37.485737323760986, "step": 38} +{"train_info/time_between_train_steps": 0.00537109375, "step": 38} +{"info/global_step": 39, "train_info/time_within_train_step": 38.27906107902527, "step": 39} +{"train_info/time_between_train_steps": 0.005387783050537109, "step": 39} +{"info/global_step": 40, "train_info/time_within_train_step": 37.40184736251831, "step": 40} +{"train_info/time_between_train_steps": 0.0052509307861328125, "step": 40} +{"info/global_step": 41, "train_info/time_within_train_step": 38.3844940662384, "step": 41} +{"train_info/time_between_train_steps": 0.005338430404663086, "step": 41} +{"info/global_step": 42, "train_info/time_within_train_step": 38.30250883102417, "step": 42} +{"train_info/time_between_train_steps": 0.005514860153198242, "step": 42} +{"info/global_step": 43, "train_info/time_within_train_step": 37.35001277923584, "step": 43} +{"train_info/time_between_train_steps": 0.005301713943481445, "step": 43} +{"info/global_step": 44, "train_info/time_within_train_step": 38.323458194732666, "step": 44} +{"train_info/time_between_train_steps": 0.005484104156494141, "step": 44} +{"info/global_step": 45, "train_info/time_within_train_step": 37.41256785392761, "step": 45} +{"train_info/time_between_train_steps": 0.005436420440673828, "step": 45} +{"info/global_step": 46, "train_info/time_within_train_step": 38.30450201034546, "step": 46} +{"train_info/time_between_train_steps": 0.005823612213134766, "step": 46} +{"info/global_step": 47, "train_info/time_within_train_step": 38.54246520996094, "step": 47} +{"train_info/time_between_train_steps": 0.005646705627441406, "step": 47} +{"info/global_step": 48, "train_info/time_within_train_step": 37.31101751327515, "step": 48} +{"train_info/time_between_train_steps": 0.005758047103881836, "step": 48} +{"info/global_step": 49, "train_info/time_within_train_step": 38.34628415107727, "step": 49} +{"train_info/time_between_train_steps": 0.006108760833740234, "step": 49} +{"info/global_step": 50, "train_info/time_within_train_step": 37.27569651603699, "step": 50} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1733768553, "_runtime": 1934}, "step": 50} +{"logs": {"train/loss": 8.1576, "train/learning_rate": 0.00025, "train/epoch": 1.02, "_timestamp": 1733768553, "_runtime": 1934}, "step": 50} +{"train_info/time_between_train_steps": 0.008629560470581055, "step": 50} +{"info/global_step": 51, "train_info/time_within_train_step": 38.165868282318115, "step": 51} +{"train_info/time_between_train_steps": 0.006372690200805664, "step": 51} +{"info/global_step": 52, "train_info/time_within_train_step": 38.17842125892639, "step": 52} +{"train_info/time_between_train_steps": 0.005531787872314453, "step": 52} +{"info/global_step": 53, "train_info/time_within_train_step": 37.41270709037781, "step": 53} +{"train_info/time_between_train_steps": 0.01109623908996582, "step": 53} +{"info/global_step": 54, "train_info/time_within_train_step": 38.29905986785889, "step": 54} +{"train_info/time_between_train_steps": 0.005966663360595703, "step": 54} +{"train_info/time_between_train_steps": 26.099669218063354, "step": 54} +{"info/global_step": 55, "train_info/time_within_train_step": 38.348421573638916, "step": 55} +{"train_info/time_between_train_steps": 0.005237102508544922, "step": 55} +{"info/global_step": 56, "train_info/time_within_train_step": 38.50822997093201, "step": 56} +{"train_info/time_between_train_steps": 0.005484580993652344, "step": 56} +{"info/global_step": 57, "train_info/time_within_train_step": 37.34101366996765, "step": 57} +{"train_info/time_between_train_steps": 0.005539894104003906, "step": 57} +{"info/global_step": 58, "train_info/time_within_train_step": 38.483242988586426, "step": 58} +{"train_info/time_between_train_steps": 0.011818647384643555, "step": 58} +{"info/global_step": 59, "train_info/time_within_train_step": 37.482346296310425, "step": 59} +{"train_info/time_between_train_steps": 0.00616908073425293, "step": 59} +{"info/global_step": 60, "train_info/time_within_train_step": 38.54427933692932, "step": 60} +{"train_info/time_between_train_steps": 0.011230707168579102, "step": 60} +{"info/global_step": 61, "train_info/time_within_train_step": 38.306960105895996, "step": 61} +{"train_info/time_between_train_steps": 0.011532783508300781, "step": 61} +{"info/global_step": 62, "train_info/time_within_train_step": 37.702821016311646, "step": 62} +{"train_info/time_between_train_steps": 0.012128114700317383, "step": 62} +{"info/global_step": 63, "train_info/time_within_train_step": 38.39034104347229, "step": 63} +{"train_info/time_between_train_steps": 0.013483762741088867, "step": 63} +{"info/global_step": 64, "train_info/time_within_train_step": 37.355360984802246, "step": 64} +{"train_info/time_between_train_steps": 0.005443096160888672, "step": 64} +{"info/global_step": 65, "train_info/time_within_train_step": 38.29259943962097, "step": 65} +{"train_info/time_between_train_steps": 0.0054073333740234375, "step": 65} +{"info/global_step": 66, "train_info/time_within_train_step": 38.321916580200195, "step": 66} +{"train_info/time_between_train_steps": 0.005448102951049805, "step": 66} +{"info/global_step": 67, "train_info/time_within_train_step": 37.31573677062988, "step": 67} +{"train_info/time_between_train_steps": 0.005450010299682617, "step": 67} +{"info/global_step": 68, "train_info/time_within_train_step": 38.28688907623291, "step": 68} +{"train_info/time_between_train_steps": 0.0055811405181884766, "step": 68} +{"info/global_step": 69, "train_info/time_within_train_step": 38.47065329551697, "step": 69} +{"train_info/time_between_train_steps": 0.0055255889892578125, "step": 69} +{"info/global_step": 70, "train_info/time_within_train_step": 37.32108545303345, "step": 70} +{"train_info/time_between_train_steps": 0.005429744720458984, "step": 70} +{"info/global_step": 71, "train_info/time_within_train_step": 38.422812700271606, "step": 71} +{"train_info/time_between_train_steps": 0.005635499954223633, "step": 71} +{"info/global_step": 72, "train_info/time_within_train_step": 37.29349422454834, "step": 72} +{"train_info/time_between_train_steps": 0.01041555404663086, "step": 72} +{"info/global_step": 73, "train_info/time_within_train_step": 38.286811113357544, "step": 73} +{"train_info/time_between_train_steps": 0.010725021362304688, "step": 73} +{"info/global_step": 74, "train_info/time_within_train_step": 38.328289270401, "step": 74} +{"train_info/time_between_train_steps": 0.00539851188659668, "step": 74} +{"info/global_step": 75, "train_info/time_within_train_step": 37.26349997520447, "step": 75} +{"train_info/time_between_train_steps": 0.005599021911621094, "step": 75} +{"info/global_step": 76, "train_info/time_within_train_step": 38.21145415306091, "step": 76} +{"train_info/time_between_train_steps": 0.005648612976074219, "step": 76} +{"info/global_step": 77, "train_info/time_within_train_step": 38.56019163131714, "step": 77} +{"train_info/time_between_train_steps": 0.009215116500854492, "step": 77} +{"info/global_step": 78, "train_info/time_within_train_step": 37.25722026824951, "step": 78} +{"train_info/time_between_train_steps": 0.005978584289550781, "step": 78} +{"info/global_step": 79, "train_info/time_within_train_step": 38.40076732635498, "step": 79} +{"train_info/time_between_train_steps": 0.005814552307128906, "step": 79} +{"info/global_step": 80, "train_info/time_within_train_step": 37.097615480422974, "step": 80} +{"train_info/time_between_train_steps": 0.006220817565917969, "step": 80} +{"info/global_step": 81, "train_info/time_within_train_step": 38.46241068840027, "step": 81} +{"train_info/time_between_train_steps": 0.006170034408569336, "step": 81} +{"train_info/time_between_train_steps": 27.392131090164185, "step": 81} +{"info/global_step": 82, "train_info/time_within_train_step": 36.811826944351196, "step": 82} +{"train_info/time_between_train_steps": 0.01116180419921875, "step": 82} +{"info/global_step": 83, "train_info/time_within_train_step": 38.41124701499939, "step": 83} +{"train_info/time_between_train_steps": 0.0056993961334228516, "step": 83} +{"info/global_step": 84, "train_info/time_within_train_step": 38.26133894920349, "step": 84} +{"train_info/time_between_train_steps": 0.005478858947753906, "step": 84} +{"info/global_step": 85, "train_info/time_within_train_step": 36.66755127906799, "step": 85} +{"train_info/time_between_train_steps": 0.00584101676940918, "step": 85} +{"info/global_step": 86, "train_info/time_within_train_step": 38.1735954284668, "step": 86} +{"train_info/time_between_train_steps": 0.005612611770629883, "step": 86} +{"info/global_step": 87, "train_info/time_within_train_step": 38.551355600357056, "step": 87} +{"train_info/time_between_train_steps": 0.011414766311645508, "step": 87} +{"info/global_step": 88, "train_info/time_within_train_step": 36.63382577896118, "step": 88} +{"train_info/time_between_train_steps": 0.005994319915771484, "step": 88} +{"info/global_step": 89, "train_info/time_within_train_step": 38.34947180747986, "step": 89} +{"train_info/time_between_train_steps": 0.00586247444152832, "step": 89} +{"info/global_step": 90, "train_info/time_within_train_step": 36.63717341423035, "step": 90} +{"train_info/time_between_train_steps": 0.005527496337890625, "step": 90} +{"info/global_step": 91, "train_info/time_within_train_step": 38.2908890247345, "step": 91} +{"train_info/time_between_train_steps": 0.010554075241088867, "step": 91} +{"info/global_step": 92, "train_info/time_within_train_step": 38.63645339012146, "step": 92} +{"train_info/time_between_train_steps": 0.015778779983520508, "step": 92} +{"info/global_step": 93, "train_info/time_within_train_step": 36.80811858177185, "step": 93} +{"train_info/time_between_train_steps": 0.009020090103149414, "step": 93} +{"info/global_step": 94, "train_info/time_within_train_step": 38.32431936264038, "step": 94} +{"train_info/time_between_train_steps": 0.0052490234375, "step": 94} +{"info/global_step": 95, "train_info/time_within_train_step": 29.129279613494873, "step": 95} +{"train_info/time_between_train_steps": 0.0055081844329833984, "step": 95} +{"info/global_step": 96, "train_info/time_within_train_step": 27.741363048553467, "step": 96} +{"train_info/time_between_train_steps": 0.005362987518310547, "step": 96} +{"info/global_step": 97, "train_info/time_within_train_step": 27.735130310058594, "step": 97} +{"train_info/time_between_train_steps": 0.0058290958404541016, "step": 97} +{"info/global_step": 98, "train_info/time_within_train_step": 27.742210388183594, "step": 98} +{"train_info/time_between_train_steps": 0.005553722381591797, "step": 98} +{"info/global_step": 99, "train_info/time_within_train_step": 27.77865242958069, "step": 99} +{"train_info/time_between_train_steps": 0.005316257476806641, "step": 99} +{"info/global_step": 100, "train_info/time_within_train_step": 27.730672359466553, "step": 100} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1733770443, "_runtime": 3824}, "step": 100} +{"logs": {"train/loss": 6.8013, "train/learning_rate": 0.0005, "train/epoch": 3.02, "_timestamp": 1733770443, "_runtime": 3824}, "step": 100} +{"train_info/time_between_train_steps": 2.425294876098633, "step": 100} +{"info/global_step": 101, "train_info/time_within_train_step": 35.96463990211487, "step": 101} +{"train_info/time_between_train_steps": 0.005402088165283203, "step": 101} +{"info/global_step": 102, "train_info/time_within_train_step": 37.818182706832886, "step": 102} +{"train_info/time_between_train_steps": 0.005635261535644531, "step": 102} +{"info/global_step": 103, "train_info/time_within_train_step": 35.67779779434204, "step": 103} +{"train_info/time_between_train_steps": 0.005498170852661133, "step": 103} +{"info/global_step": 104, "train_info/time_within_train_step": 37.80285024642944, "step": 104} +{"train_info/time_between_train_steps": 0.0054931640625, "step": 104} +{"info/global_step": 105, "train_info/time_within_train_step": 37.82252883911133, "step": 105} +{"train_info/time_between_train_steps": 0.005520343780517578, "step": 105} +{"info/global_step": 106, "train_info/time_within_train_step": 37.81902241706848, "step": 106} +{"train_info/time_between_train_steps": 0.005499124526977539, "step": 106} +{"info/global_step": 107, "train_info/time_within_train_step": 37.789506673812866, "step": 107} +{"train_info/time_between_train_steps": 0.006010770797729492, "step": 107} +{"info/global_step": 108, "train_info/time_within_train_step": 32.916154623031616, "step": 108} +{"train_info/time_between_train_steps": 0.006381988525390625, "step": 108} +{"train_info/time_between_train_steps": 21.194095849990845, "step": 108} +{"info/global_step": 109, "train_info/time_within_train_step": 27.866756677627563, "step": 109} +{"train_info/time_between_train_steps": 0.0059986114501953125, "step": 109} +{"info/global_step": 110, "train_info/time_within_train_step": 27.950493335723877, "step": 110} +{"train_info/time_between_train_steps": 0.0060155391693115234, "step": 110} +{"info/global_step": 111, "train_info/time_within_train_step": 27.77434277534485, "step": 111} +{"train_info/time_between_train_steps": 0.006040811538696289, "step": 111} +{"info/global_step": 112, "train_info/time_within_train_step": 27.97386598587036, "step": 112} +{"train_info/time_between_train_steps": 0.011072397232055664, "step": 112} +{"info/global_step": 113, "train_info/time_within_train_step": 27.770883560180664, "step": 113} +{"train_info/time_between_train_steps": 0.011259078979492188, "step": 113} +{"info/global_step": 114, "train_info/time_within_train_step": 28.028172492980957, "step": 114} +{"train_info/time_between_train_steps": 0.012162923812866211, "step": 114} +{"info/global_step": 115, "train_info/time_within_train_step": 27.777290105819702, "step": 115} +{"train_info/time_between_train_steps": 0.0060007572174072266, "step": 115} +{"info/global_step": 116, "train_info/time_within_train_step": 27.83554744720459, "step": 116} +{"train_info/time_between_train_steps": 0.005597591400146484, "step": 116} +{"info/global_step": 117, "train_info/time_within_train_step": 27.75230646133423, "step": 117} +{"train_info/time_between_train_steps": 0.0054090023040771484, "step": 117} +{"info/global_step": 118, "train_info/time_within_train_step": 27.75041127204895, "step": 118} +{"train_info/time_between_train_steps": 0.005477190017700195, "step": 118} +{"info/global_step": 119, "train_info/time_within_train_step": 27.725661277770996, "step": 119} +{"train_info/time_between_train_steps": 0.005445003509521484, "step": 119} +{"info/global_step": 120, "train_info/time_within_train_step": 27.74455213546753, "step": 120} +{"train_info/time_between_train_steps": 0.005501270294189453, "step": 120} +{"info/global_step": 121, "train_info/time_within_train_step": 27.72917604446411, "step": 121} +{"train_info/time_between_train_steps": 0.005403041839599609, "step": 121} +{"info/global_step": 122, "train_info/time_within_train_step": 27.72615957260132, "step": 122} +{"train_info/time_between_train_steps": 0.005503416061401367, "step": 122} +{"info/global_step": 123, "train_info/time_within_train_step": 27.84001588821411, "step": 123} +{"train_info/time_between_train_steps": 0.0054776668548583984, "step": 123} +{"info/global_step": 124, "train_info/time_within_train_step": 27.73280668258667, "step": 124} +{"train_info/time_between_train_steps": 0.005260944366455078, "step": 124} +{"info/global_step": 125, "train_info/time_within_train_step": 27.73345685005188, "step": 125} +{"train_info/time_between_train_steps": 0.005423784255981445, "step": 125} +{"info/global_step": 126, "train_info/time_within_train_step": 27.745503664016724, "step": 126} +{"train_info/time_between_train_steps": 0.005655050277709961, "step": 126} +{"info/global_step": 127, "train_info/time_within_train_step": 27.73186421394348, "step": 127} +{"train_info/time_between_train_steps": 0.005262613296508789, "step": 127} +{"info/global_step": 128, "train_info/time_within_train_step": 27.75238585472107, "step": 128} +{"train_info/time_between_train_steps": 0.005517482757568359, "step": 128} +{"info/global_step": 129, "train_info/time_within_train_step": 27.757834911346436, "step": 129} +{"train_info/time_between_train_steps": 0.005314826965332031, "step": 129} +{"info/global_step": 130, "train_info/time_within_train_step": 27.756678104400635, "step": 130} +{"train_info/time_between_train_steps": 0.005286455154418945, "step": 130} +{"info/global_step": 131, "train_info/time_within_train_step": 27.73662257194519, "step": 131} +{"train_info/time_between_train_steps": 0.005524396896362305, "step": 131} +{"info/global_step": 132, "train_info/time_within_train_step": 27.731279134750366, "step": 132} +{"train_info/time_between_train_steps": 0.005465269088745117, "step": 132} +{"info/global_step": 133, "train_info/time_within_train_step": 27.73097825050354, "step": 133} +{"train_info/time_between_train_steps": 0.005588531494140625, "step": 133} +{"info/global_step": 134, "train_info/time_within_train_step": 27.742738246917725, "step": 134} +{"train_info/time_between_train_steps": 0.0060863494873046875, "step": 134} +{"info/global_step": 135, "train_info/time_within_train_step": 27.755813360214233, "step": 135} +{"train_info/time_between_train_steps": 0.005850791931152344, "step": 135} +{"train_info/time_between_train_steps": 20.54612684249878, "step": 135} +{"info/global_step": 136, "train_info/time_within_train_step": 27.74640154838562, "step": 136} +{"train_info/time_between_train_steps": 0.005364418029785156, "step": 136} +{"info/global_step": 137, "train_info/time_within_train_step": 27.851665019989014, "step": 137} +{"train_info/time_between_train_steps": 0.005324125289916992, "step": 137} +{"info/global_step": 138, "train_info/time_within_train_step": 27.855072259902954, "step": 138} +{"train_info/time_between_train_steps": 0.005770444869995117, "step": 138} +{"info/global_step": 139, "train_info/time_within_train_step": 27.89194917678833, "step": 139} +{"train_info/time_between_train_steps": 0.005559682846069336, "step": 139} +{"info/global_step": 140, "train_info/time_within_train_step": 27.74132466316223, "step": 140} +{"train_info/time_between_train_steps": 0.005461931228637695, "step": 140} +{"info/global_step": 141, "train_info/time_within_train_step": 27.919901132583618, "step": 141} +{"train_info/time_between_train_steps": 0.00558018684387207, "step": 141} +{"info/global_step": 142, "train_info/time_within_train_step": 27.740297079086304, "step": 142} +{"train_info/time_between_train_steps": 0.011010408401489258, "step": 142} +{"info/global_step": 143, "train_info/time_within_train_step": 27.773708820343018, "step": 143} +{"train_info/time_between_train_steps": 0.010978460311889648, "step": 143} +{"info/global_step": 144, "train_info/time_within_train_step": 27.73321557044983, "step": 144} +{"train_info/time_between_train_steps": 0.010610342025756836, "step": 144} +{"info/global_step": 145, "train_info/time_within_train_step": 27.74796462059021, "step": 145} +{"train_info/time_between_train_steps": 0.010794401168823242, "step": 145} +{"info/global_step": 146, "train_info/time_within_train_step": 27.738525867462158, "step": 146} +{"train_info/time_between_train_steps": 0.005311489105224609, "step": 146} +{"info/global_step": 147, "train_info/time_within_train_step": 27.750019311904907, "step": 147} +{"train_info/time_between_train_steps": 0.005604267120361328, "step": 147} +{"info/global_step": 148, "train_info/time_within_train_step": 27.742377042770386, "step": 148} +{"train_info/time_between_train_steps": 0.0054531097412109375, "step": 148} +{"info/global_step": 149, "train_info/time_within_train_step": 27.752878665924072, "step": 149} +{"train_info/time_between_train_steps": 0.005525112152099609, "step": 149} +{"info/global_step": 150, "train_info/time_within_train_step": 27.744396448135376, "step": 150} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1733771948, "_runtime": 5329}, "step": 150} +{"logs": {"train/loss": 6.7167, "train/learning_rate": 0.0005833333333333333, "train/epoch": 5.01, "_timestamp": 1733771948, "_runtime": 5329}, "step": 150} +{"train_info/time_between_train_steps": 0.00739288330078125, "step": 150} +{"info/global_step": 151, "train_info/time_within_train_step": 27.73969292640686, "step": 151} +{"train_info/time_between_train_steps": 0.0054492950439453125, "step": 151} +{"info/global_step": 152, "train_info/time_within_train_step": 27.7532320022583, "step": 152} +{"train_info/time_between_train_steps": 0.005625009536743164, "step": 152} +{"info/global_step": 153, "train_info/time_within_train_step": 27.741329669952393, "step": 153} +{"train_info/time_between_train_steps": 0.0052568912506103516, "step": 153} +{"info/global_step": 154, "train_info/time_within_train_step": 27.84661054611206, "step": 154} +{"train_info/time_between_train_steps": 0.0053195953369140625, "step": 154} +{"info/global_step": 155, "train_info/time_within_train_step": 27.827303171157837, "step": 155} +{"train_info/time_between_train_steps": 0.0054128170013427734, "step": 155} +{"info/global_step": 156, "train_info/time_within_train_step": 27.730929851531982, "step": 156} +{"train_info/time_between_train_steps": 0.005350828170776367, "step": 156} +{"info/global_step": 157, "train_info/time_within_train_step": 27.736297130584717, "step": 157} +{"train_info/time_between_train_steps": 0.00556182861328125, "step": 157} +{"info/global_step": 158, "train_info/time_within_train_step": 27.728670358657837, "step": 158} +{"train_info/time_between_train_steps": 0.005320310592651367, "step": 158} +{"info/global_step": 159, "train_info/time_within_train_step": 27.738446474075317, "step": 159} +{"train_info/time_between_train_steps": 0.005646467208862305, "step": 159} +{"info/global_step": 160, "train_info/time_within_train_step": 27.744645357131958, "step": 160} +{"train_info/time_between_train_steps": 0.005860328674316406, "step": 160} +{"info/global_step": 161, "train_info/time_within_train_step": 27.740398645401, "step": 161} +{"train_info/time_between_train_steps": 0.005801200866699219, "step": 161} +{"info/global_step": 162, "train_info/time_within_train_step": 27.75733995437622, "step": 162} +{"train_info/time_between_train_steps": 0.0060253143310546875, "step": 162} +{"train_info/time_between_train_steps": 20.670987367630005, "step": 162} +{"info/global_step": 163, "train_info/time_within_train_step": 27.73252272605896, "step": 163} +{"train_info/time_between_train_steps": 0.005301952362060547, "step": 163} +{"info/global_step": 164, "train_info/time_within_train_step": 27.94309949874878, "step": 164} +{"train_info/time_between_train_steps": 0.005555152893066406, "step": 164} +{"info/global_step": 165, "train_info/time_within_train_step": 27.828842639923096, "step": 165} +{"train_info/time_between_train_steps": 0.005457162857055664, "step": 165} +{"info/global_step": 166, "train_info/time_within_train_step": 27.947349548339844, "step": 166} +{"train_info/time_between_train_steps": 0.005615711212158203, "step": 166} +{"info/global_step": 167, "train_info/time_within_train_step": 27.75945496559143, "step": 167} +{"train_info/time_between_train_steps": 0.005271434783935547, "step": 167} +{"info/global_step": 168, "train_info/time_within_train_step": 27.91556978225708, "step": 168} +{"train_info/time_between_train_steps": 0.011214733123779297, "step": 168} +{"info/global_step": 169, "train_info/time_within_train_step": 27.831359386444092, "step": 169} +{"train_info/time_between_train_steps": 0.011079072952270508, "step": 169} +{"info/global_step": 170, "train_info/time_within_train_step": 27.851022958755493, "step": 170} +{"train_info/time_between_train_steps": 0.005986213684082031, "step": 170} +{"info/global_step": 171, "train_info/time_within_train_step": 27.78358554840088, "step": 171} +{"train_info/time_between_train_steps": 0.010711669921875, "step": 171} +{"info/global_step": 172, "train_info/time_within_train_step": 27.72316598892212, "step": 172} +{"train_info/time_between_train_steps": 0.0053043365478515625, "step": 172} +{"info/global_step": 173, "train_info/time_within_train_step": 27.794499158859253, "step": 173} +{"train_info/time_between_train_steps": 0.005277872085571289, "step": 173} +{"info/global_step": 174, "train_info/time_within_train_step": 27.72447443008423, "step": 174} +{"train_info/time_between_train_steps": 0.005334377288818359, "step": 174} +{"info/global_step": 175, "train_info/time_within_train_step": 27.808926820755005, "step": 175} +{"train_info/time_between_train_steps": 0.005270481109619141, "step": 175} +{"info/global_step": 176, "train_info/time_within_train_step": 27.721093893051147, "step": 176} +{"train_info/time_between_train_steps": 0.005394935607910156, "step": 176} +{"info/global_step": 177, "train_info/time_within_train_step": 27.725677967071533, "step": 177} +{"train_info/time_between_train_steps": 0.005476951599121094, "step": 177} +{"info/global_step": 178, "train_info/time_within_train_step": 27.721839904785156, "step": 178} +{"train_info/time_between_train_steps": 0.010578632354736328, "step": 178} +{"info/global_step": 179, "train_info/time_within_train_step": 27.72495722770691, "step": 179} +{"train_info/time_between_train_steps": 0.010911941528320312, "step": 179} +{"info/global_step": 180, "train_info/time_within_train_step": 27.759665727615356, "step": 180} +{"train_info/time_between_train_steps": 0.010740995407104492, "step": 180} +{"info/global_step": 181, "train_info/time_within_train_step": 27.726526975631714, "step": 181} +{"train_info/time_between_train_steps": 0.010808706283569336, "step": 181} +{"info/global_step": 182, "train_info/time_within_train_step": 27.737905502319336, "step": 182} +{"train_info/time_between_train_steps": 0.01088714599609375, "step": 182} +{"info/global_step": 183, "train_info/time_within_train_step": 27.72287678718567, "step": 183} +{"train_info/time_between_train_steps": 0.010757684707641602, "step": 183} +{"info/global_step": 184, "train_info/time_within_train_step": 27.827795267105103, "step": 184} +{"train_info/time_between_train_steps": 0.010816335678100586, "step": 184} +{"info/global_step": 185, "train_info/time_within_train_step": 27.725996255874634, "step": 185} +{"train_info/time_between_train_steps": 0.005468130111694336, "step": 185} +{"info/global_step": 186, "train_info/time_within_train_step": 27.727951288223267, "step": 186} +{"train_info/time_between_train_steps": 0.005526304244995117, "step": 186} +{"info/global_step": 187, "train_info/time_within_train_step": 27.732794046401978, "step": 187} +{"train_info/time_between_train_steps": 0.006306886672973633, "step": 187} +{"info/global_step": 188, "train_info/time_within_train_step": 27.741127729415894, "step": 188} +{"train_info/time_between_train_steps": 0.011166095733642578, "step": 188} +{"info/global_step": 189, "train_info/time_within_train_step": 27.754586935043335, "step": 189} +{"train_info/time_between_train_steps": 0.005869626998901367, "step": 189} +{"train_info/time_between_train_steps": 20.761091470718384, "step": 189} +{"info/global_step": 190, "train_info/time_within_train_step": 27.716609477996826, "step": 190} +{"train_info/time_between_train_steps": 0.005172014236450195, "step": 190} +{"info/global_step": 191, "train_info/time_within_train_step": 27.82063913345337, "step": 191} +{"train_info/time_between_train_steps": 0.005161762237548828, "step": 191} +{"info/global_step": 192, "train_info/time_within_train_step": 27.723772525787354, "step": 192} +{"train_info/time_between_train_steps": 0.005477428436279297, "step": 192} +{"info/global_step": 193, "train_info/time_within_train_step": 27.87853693962097, "step": 193} +{"train_info/time_between_train_steps": 0.005499839782714844, "step": 193} +{"info/global_step": 194, "train_info/time_within_train_step": 27.731072425842285, "step": 194} +{"train_info/time_between_train_steps": 0.005501985549926758, "step": 194} +{"info/global_step": 195, "train_info/time_within_train_step": 27.85272455215454, "step": 195} +{"train_info/time_between_train_steps": 0.005387067794799805, "step": 195} +{"info/global_step": 196, "train_info/time_within_train_step": 27.74413514137268, "step": 196} +{"train_info/time_between_train_steps": 0.0063016414642333984, "step": 196} +{"info/global_step": 197, "train_info/time_within_train_step": 27.779496431350708, "step": 197} +{"train_info/time_between_train_steps": 0.005372047424316406, "step": 197} +{"info/global_step": 198, "train_info/time_within_train_step": 27.73417615890503, "step": 198} +{"train_info/time_between_train_steps": 0.0053026676177978516, "step": 198} +{"info/global_step": 199, "train_info/time_within_train_step": 27.821089267730713, "step": 199} +{"train_info/time_between_train_steps": 0.0054624080657958984, "step": 199} +{"info/global_step": 200, "train_info/time_within_train_step": 27.719847917556763, "step": 200} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1733773379, "_runtime": 6760}, "step": 200} +{"logs": {"train/loss": 6.6474, "train/learning_rate": 0.0005555555555555556, "train/epoch": 7.01, "_timestamp": 1733773379, "_runtime": 6760}, "step": 200} +{"train_info/time_between_train_steps": 2.481623411178589, "step": 200} +{"info/global_step": 201, "train_info/time_within_train_step": 27.71917200088501, "step": 201} +{"train_info/time_between_train_steps": 0.005189180374145508, "step": 201} +{"info/global_step": 202, "train_info/time_within_train_step": 27.7251296043396, "step": 202} +{"train_info/time_between_train_steps": 0.005398988723754883, "step": 202} +{"info/global_step": 203, "train_info/time_within_train_step": 27.723665952682495, "step": 203} +{"train_info/time_between_train_steps": 0.005291461944580078, "step": 203} +{"info/global_step": 204, "train_info/time_within_train_step": 27.722336530685425, "step": 204} +{"train_info/time_between_train_steps": 0.005326747894287109, "step": 204} +{"info/global_step": 205, "train_info/time_within_train_step": 27.72724485397339, "step": 205} +{"train_info/time_between_train_steps": 0.005307197570800781, "step": 205} +{"info/global_step": 206, "train_info/time_within_train_step": 27.723496675491333, "step": 206} +{"train_info/time_between_train_steps": 0.005341291427612305, "step": 206} +{"info/global_step": 207, "train_info/time_within_train_step": 27.72486925125122, "step": 207} +{"train_info/time_between_train_steps": 0.005416393280029297, "step": 207} +{"info/global_step": 208, "train_info/time_within_train_step": 27.72404384613037, "step": 208} +{"train_info/time_between_train_steps": 0.00528717041015625, "step": 208} +{"info/global_step": 209, "train_info/time_within_train_step": 27.732268571853638, "step": 209} +{"train_info/time_between_train_steps": 0.005324602127075195, "step": 209} +{"info/global_step": 210, "train_info/time_within_train_step": 27.725950241088867, "step": 210} +{"train_info/time_between_train_steps": 0.00553441047668457, "step": 210} +{"info/global_step": 211, "train_info/time_within_train_step": 27.722718477249146, "step": 211} +{"train_info/time_between_train_steps": 0.005480051040649414, "step": 211} +{"info/global_step": 212, "train_info/time_within_train_step": 27.725797653198242, "step": 212} +{"train_info/time_between_train_steps": 0.005458354949951172, "step": 212} +{"info/global_step": 213, "train_info/time_within_train_step": 27.72949743270874, "step": 213} +{"train_info/time_between_train_steps": 0.005727529525756836, "step": 213} +{"info/global_step": 214, "train_info/time_within_train_step": 27.734387397766113, "step": 214} +{"train_info/time_between_train_steps": 0.005708456039428711, "step": 214} +{"info/global_step": 215, "train_info/time_within_train_step": 27.84061312675476, "step": 215} +{"train_info/time_between_train_steps": 0.005956411361694336, "step": 215} +{"info/global_step": 216, "train_info/time_within_train_step": 27.754711866378784, "step": 216} +{"train_info/time_between_train_steps": 0.006112575531005859, "step": 216} +{"train_info/time_between_train_steps": 20.502310752868652, "step": 216} +{"info/global_step": 217, "train_info/time_within_train_step": 27.735637187957764, "step": 217} +{"train_info/time_between_train_steps": 0.00563812255859375, "step": 217} +{"info/global_step": 218, "train_info/time_within_train_step": 27.89487934112549, "step": 218} +{"train_info/time_between_train_steps": 0.005602121353149414, "step": 218} +{"info/global_step": 219, "train_info/time_within_train_step": 27.726799964904785, "step": 219} +{"train_info/time_between_train_steps": 0.005329608917236328, "step": 219} +{"info/global_step": 220, "train_info/time_within_train_step": 27.866040468215942, "step": 220} +{"train_info/time_between_train_steps": 0.005577564239501953, "step": 220} +{"info/global_step": 221, "train_info/time_within_train_step": 27.749855756759644, "step": 221} +{"train_info/time_between_train_steps": 0.005761623382568359, "step": 221} +{"info/global_step": 222, "train_info/time_within_train_step": 27.86705470085144, "step": 222} +{"train_info/time_between_train_steps": 0.005610227584838867, "step": 222} +{"info/global_step": 223, "train_info/time_within_train_step": 27.738126039505005, "step": 223} +{"train_info/time_between_train_steps": 0.005753755569458008, "step": 223} +{"info/global_step": 224, "train_info/time_within_train_step": 27.805463075637817, "step": 224} +{"train_info/time_between_train_steps": 0.0056459903717041016, "step": 224} +{"info/global_step": 225, "train_info/time_within_train_step": 27.72844362258911, "step": 225} +{"train_info/time_between_train_steps": 0.010534286499023438, "step": 225} +{"info/global_step": 226, "train_info/time_within_train_step": 27.72821855545044, "step": 226} +{"train_info/time_between_train_steps": 0.0052149295806884766, "step": 226} +{"info/global_step": 227, "train_info/time_within_train_step": 27.724745273590088, "step": 227} +{"train_info/time_between_train_steps": 0.005323648452758789, "step": 227} +{"info/global_step": 228, "train_info/time_within_train_step": 27.72770643234253, "step": 228} +{"train_info/time_between_train_steps": 0.005286693572998047, "step": 228} +{"info/global_step": 229, "train_info/time_within_train_step": 27.730223894119263, "step": 229} +{"train_info/time_between_train_steps": 0.005532264709472656, "step": 229} +{"info/global_step": 230, "train_info/time_within_train_step": 27.82423996925354, "step": 230} +{"train_info/time_between_train_steps": 0.005339622497558594, "step": 230} +{"info/global_step": 231, "train_info/time_within_train_step": 27.729910373687744, "step": 231} +{"train_info/time_between_train_steps": 0.005345344543457031, "step": 231} +{"info/global_step": 232, "train_info/time_within_train_step": 27.729791402816772, "step": 232} +{"train_info/time_between_train_steps": 0.010810136795043945, "step": 232} +{"info/global_step": 233, "train_info/time_within_train_step": 27.727124214172363, "step": 233} +{"train_info/time_between_train_steps": 0.005393028259277344, "step": 233} +{"info/global_step": 234, "train_info/time_within_train_step": 27.728814363479614, "step": 234} +{"train_info/time_between_train_steps": 0.005557060241699219, "step": 234} +{"info/global_step": 235, "train_info/time_within_train_step": 27.756532192230225, "step": 235} +{"train_info/time_between_train_steps": 0.005439281463623047, "step": 235} +{"info/global_step": 236, "train_info/time_within_train_step": 27.7296781539917, "step": 236} +{"train_info/time_between_train_steps": 0.005338430404663086, "step": 236} +{"info/global_step": 237, "train_info/time_within_train_step": 27.737428426742554, "step": 237} +{"train_info/time_between_train_steps": 0.005569934844970703, "step": 237} +{"info/global_step": 238, "train_info/time_within_train_step": 27.75440502166748, "step": 238} +{"train_info/time_between_train_steps": 0.005452871322631836, "step": 238} +{"info/global_step": 239, "train_info/time_within_train_step": 27.759413719177246, "step": 239} +{"train_info/time_between_train_steps": 0.005430459976196289, "step": 239} +{"info/global_step": 240, "train_info/time_within_train_step": 27.737401247024536, "step": 240} +{"train_info/time_between_train_steps": 0.005735635757446289, "step": 240} +{"info/global_step": 241, "train_info/time_within_train_step": 27.735336303710938, "step": 241} +{"train_info/time_between_train_steps": 0.005580425262451172, "step": 241} +{"info/global_step": 242, "train_info/time_within_train_step": 27.742631673812866, "step": 242} +{"train_info/time_between_train_steps": 0.00612187385559082, "step": 242} +{"info/global_step": 243, "train_info/time_within_train_step": 27.7557954788208, "step": 243} +{"train_info/time_between_train_steps": 0.005826711654663086, "step": 243} +{"train_info/time_between_train_steps": 20.647109270095825, "step": 243} +{"info/global_step": 244, "train_info/time_within_train_step": 27.767329931259155, "step": 244} +{"train_info/time_between_train_steps": 0.0056476593017578125, "step": 244} +{"info/global_step": 245, "train_info/time_within_train_step": 27.967870473861694, "step": 245} +{"train_info/time_between_train_steps": 0.005483388900756836, "step": 245} +{"info/global_step": 246, "train_info/time_within_train_step": 27.72744584083557, "step": 246} +{"train_info/time_between_train_steps": 0.0053517818450927734, "step": 246} +{"info/global_step": 247, "train_info/time_within_train_step": 27.887460708618164, "step": 247} +{"train_info/time_between_train_steps": 0.005533456802368164, "step": 247} +{"info/global_step": 248, "train_info/time_within_train_step": 27.738619089126587, "step": 248} +{"train_info/time_between_train_steps": 0.0055468082427978516, "step": 248} +{"info/global_step": 249, "train_info/time_within_train_step": 27.842833995819092, "step": 249} +{"train_info/time_between_train_steps": 0.0054857730865478516, "step": 249} +{"info/global_step": 250, "train_info/time_within_train_step": 27.735568284988403, "step": 250} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1733774810, "_runtime": 8191}, "step": 250} +{"logs": {"train/loss": 6.6081, "train/learning_rate": 0.0005277777777777777, "train/epoch": 9.01, "_timestamp": 1733774810, "_runtime": 8191}, "step": 250} +{"train_info/time_between_train_steps": 0.0075225830078125, "step": 250} +{"info/global_step": 251, "train_info/time_within_train_step": 27.786020517349243, "step": 251} +{"train_info/time_between_train_steps": 0.005533933639526367, "step": 251} +{"info/global_step": 252, "train_info/time_within_train_step": 27.737483024597168, "step": 252} +{"train_info/time_between_train_steps": 0.01050567626953125, "step": 252} +{"info/global_step": 253, "train_info/time_within_train_step": 27.717614889144897, "step": 253} +{"train_info/time_between_train_steps": 0.01048588752746582, "step": 253} +{"info/global_step": 254, "train_info/time_within_train_step": 27.720216751098633, "step": 254} +{"train_info/time_between_train_steps": 0.005409955978393555, "step": 254} +{"info/global_step": 255, "train_info/time_within_train_step": 27.71661877632141, "step": 255} +{"train_info/time_between_train_steps": 0.010493278503417969, "step": 255} +{"info/global_step": 256, "train_info/time_within_train_step": 27.721813917160034, "step": 256} +{"train_info/time_between_train_steps": 0.010782480239868164, "step": 256} +{"info/global_step": 257, "train_info/time_within_train_step": 27.72168254852295, "step": 257} +{"train_info/time_between_train_steps": 0.005338430404663086, "step": 257} +{"info/global_step": 258, "train_info/time_within_train_step": 27.72265887260437, "step": 258} +{"train_info/time_between_train_steps": 0.00531005859375, "step": 258} +{"info/global_step": 259, "train_info/time_within_train_step": 27.723676919937134, "step": 259} +{"train_info/time_between_train_steps": 0.0053942203521728516, "step": 259} +{"info/global_step": 260, "train_info/time_within_train_step": 27.810866117477417, "step": 260} +{"train_info/time_between_train_steps": 0.010859966278076172, "step": 260} +{"info/global_step": 261, "train_info/time_within_train_step": 27.78331971168518, "step": 261} +{"train_info/time_between_train_steps": 0.005354166030883789, "step": 261} +{"info/global_step": 262, "train_info/time_within_train_step": 27.742390871047974, "step": 262} +{"train_info/time_between_train_steps": 0.005357503890991211, "step": 262} +{"info/global_step": 263, "train_info/time_within_train_step": 27.719823837280273, "step": 263} +{"train_info/time_between_train_steps": 0.005259037017822266, "step": 263} +{"info/global_step": 264, "train_info/time_within_train_step": 27.724738836288452, "step": 264} +{"train_info/time_between_train_steps": 0.010827302932739258, "step": 264} +{"info/global_step": 265, "train_info/time_within_train_step": 27.738254070281982, "step": 265} +{"train_info/time_between_train_steps": 0.005237102508544922, "step": 265} +{"info/global_step": 266, "train_info/time_within_train_step": 27.72832465171814, "step": 266} +{"train_info/time_between_train_steps": 0.005275726318359375, "step": 266} +{"info/global_step": 267, "train_info/time_within_train_step": 27.730542421340942, "step": 267} +{"train_info/time_between_train_steps": 0.005645036697387695, "step": 267} +{"info/global_step": 268, "train_info/time_within_train_step": 27.729971647262573, "step": 268} +{"train_info/time_between_train_steps": 0.0053403377532958984, "step": 268} +{"info/global_step": 269, "train_info/time_within_train_step": 27.74011754989624, "step": 269} +{"train_info/time_between_train_steps": 0.006087779998779297, "step": 269} +{"info/global_step": 270, "train_info/time_within_train_step": 27.756770610809326, "step": 270} +{"train_info/time_between_train_steps": 0.005887269973754883, "step": 270} +{"train_info/time_between_train_steps": 20.74927520751953, "step": 270} +{"info/global_step": 271, "train_info/time_within_train_step": 27.722717761993408, "step": 271} +{"train_info/time_between_train_steps": 0.005605220794677734, "step": 271} +{"info/global_step": 272, "train_info/time_within_train_step": 27.86484122276306, "step": 272} +{"train_info/time_between_train_steps": 0.005652666091918945, "step": 272} +{"info/global_step": 273, "train_info/time_within_train_step": 27.726394414901733, "step": 273} +{"train_info/time_between_train_steps": 0.005289316177368164, "step": 273} +{"info/global_step": 274, "train_info/time_within_train_step": 27.872286319732666, "step": 274} +{"train_info/time_between_train_steps": 0.005600929260253906, "step": 274} +{"info/global_step": 275, "train_info/time_within_train_step": 27.831136226654053, "step": 275} +{"train_info/time_between_train_steps": 0.005574464797973633, "step": 275} +{"info/global_step": 276, "train_info/time_within_train_step": 27.851248025894165, "step": 276} +{"train_info/time_between_train_steps": 0.005522012710571289, "step": 276} +{"info/global_step": 277, "train_info/time_within_train_step": 27.735211849212646, "step": 277} +{"train_info/time_between_train_steps": 0.005602598190307617, "step": 277} +{"info/global_step": 278, "train_info/time_within_train_step": 27.7819402217865, "step": 278} +{"train_info/time_between_train_steps": 0.005513668060302734, "step": 278} +{"info/global_step": 279, "train_info/time_within_train_step": 27.720601081848145, "step": 279} +{"train_info/time_between_train_steps": 0.0053234100341796875, "step": 279} +{"info/global_step": 280, "train_info/time_within_train_step": 27.715641021728516, "step": 280} +{"train_info/time_between_train_steps": 0.005265951156616211, "step": 280} +{"info/global_step": 281, "train_info/time_within_train_step": 27.720683813095093, "step": 281} +{"train_info/time_between_train_steps": 0.005406379699707031, "step": 281} +{"info/global_step": 282, "train_info/time_within_train_step": 27.71694779396057, "step": 282} +{"train_info/time_between_train_steps": 0.0052776336669921875, "step": 282} +{"info/global_step": 283, "train_info/time_within_train_step": 27.720551013946533, "step": 283} +{"train_info/time_between_train_steps": 0.005440473556518555, "step": 283} +{"info/global_step": 284, "train_info/time_within_train_step": 27.72073745727539, "step": 284} +{"train_info/time_between_train_steps": 0.010628461837768555, "step": 284} +{"info/global_step": 285, "train_info/time_within_train_step": 27.7214252948761, "step": 285} +{"train_info/time_between_train_steps": 0.005338907241821289, "step": 285} +{"info/global_step": 286, "train_info/time_within_train_step": 27.724498748779297, "step": 286} +{"train_info/time_between_train_steps": 0.005358219146728516, "step": 286} +{"info/global_step": 287, "train_info/time_within_train_step": 27.74513602256775, "step": 287} +{"train_info/time_between_train_steps": 0.005298614501953125, "step": 287} +{"info/global_step": 288, "train_info/time_within_train_step": 27.72710657119751, "step": 288} +{"train_info/time_between_train_steps": 0.0054090023040771484, "step": 288} +{"info/global_step": 289, "train_info/time_within_train_step": 27.72235083580017, "step": 289} +{"train_info/time_between_train_steps": 0.0052585601806640625, "step": 289} +{"info/global_step": 290, "train_info/time_within_train_step": 27.720215320587158, "step": 290} +{"train_info/time_between_train_steps": 0.00531458854675293, "step": 290} +{"info/global_step": 291, "train_info/time_within_train_step": 27.8348548412323, "step": 291} +{"train_info/time_between_train_steps": 0.005489349365234375, "step": 291} +{"info/global_step": 292, "train_info/time_within_train_step": 27.72332787513733, "step": 292} +{"train_info/time_between_train_steps": 0.005216360092163086, "step": 292} +{"info/global_step": 293, "train_info/time_within_train_step": 27.725444555282593, "step": 293} +{"train_info/time_between_train_steps": 0.005391359329223633, "step": 293} +{"info/global_step": 294, "train_info/time_within_train_step": 27.72942566871643, "step": 294} +{"train_info/time_between_train_steps": 0.010912179946899414, "step": 294} +{"info/global_step": 295, "train_info/time_within_train_step": 27.72942304611206, "step": 295} +{"train_info/time_between_train_steps": 0.011067390441894531, "step": 295} +{"info/global_step": 296, "train_info/time_within_train_step": 27.74340033531189, "step": 296} +{"train_info/time_between_train_steps": 0.006013154983520508, "step": 296} +{"info/global_step": 297, "train_info/time_within_train_step": 27.750205278396606, "step": 297} +{"train_info/time_between_train_steps": 0.005804300308227539, "step": 297} +{"train_info/time_between_train_steps": 20.46677041053772, "step": 297} +{"info/global_step": 298, "train_info/time_within_train_step": 27.73127770423889, "step": 298} +{"train_info/time_between_train_steps": 0.00564265251159668, "step": 298} +{"info/global_step": 299, "train_info/time_within_train_step": 27.868708848953247, "step": 299} +{"train_info/time_between_train_steps": 0.00558924674987793, "step": 299} +{"info/global_step": 300, "train_info/time_within_train_step": 27.721440076828003, "step": 300} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1733776239, "_runtime": 9620}, "step": 300} +{"logs": {"train/loss": 6.5764, "train/learning_rate": 0.0005, "train/epoch": 11.0, "_timestamp": 1733776239, "_runtime": 9620}, "step": 300} +{"train_info/time_between_train_steps": 2.339794158935547, "step": 300} +{"info/global_step": 301, "train_info/time_within_train_step": 27.892654180526733, "step": 301} +{"train_info/time_between_train_steps": 0.005666017532348633, "step": 301} +{"info/global_step": 302, "train_info/time_within_train_step": 27.73995327949524, "step": 302} +{"train_info/time_between_train_steps": 0.005612373352050781, "step": 302} +{"info/global_step": 303, "train_info/time_within_train_step": 27.859591007232666, "step": 303} +{"train_info/time_between_train_steps": 0.005832672119140625, "step": 303} +{"info/global_step": 304, "train_info/time_within_train_step": 27.74377679824829, "step": 304} +{"train_info/time_between_train_steps": 0.005946636199951172, "step": 304} +{"info/global_step": 305, "train_info/time_within_train_step": 27.79812788963318, "step": 305} +{"train_info/time_between_train_steps": 0.005443572998046875, "step": 305} +{"info/global_step": 306, "train_info/time_within_train_step": 27.829793453216553, "step": 306} +{"train_info/time_between_train_steps": 0.0053424835205078125, "step": 306} +{"info/global_step": 307, "train_info/time_within_train_step": 27.754560232162476, "step": 307} +{"train_info/time_between_train_steps": 0.005803823471069336, "step": 307} +{"info/global_step": 308, "train_info/time_within_train_step": 27.72694420814514, "step": 308} +{"train_info/time_between_train_steps": 0.005381345748901367, "step": 308} +{"info/global_step": 309, "train_info/time_within_train_step": 27.727657079696655, "step": 309} +{"train_info/time_between_train_steps": 0.005343914031982422, "step": 309} +{"info/global_step": 310, "train_info/time_within_train_step": 27.724398374557495, "step": 310} +{"train_info/time_between_train_steps": 0.0054552555084228516, "step": 310} +{"info/global_step": 311, "train_info/time_within_train_step": 27.725050926208496, "step": 311} +{"train_info/time_between_train_steps": 0.00541996955871582, "step": 311} +{"info/global_step": 312, "train_info/time_within_train_step": 27.736623287200928, "step": 312} +{"train_info/time_between_train_steps": 0.005421161651611328, "step": 312} +{"info/global_step": 313, "train_info/time_within_train_step": 27.777621746063232, "step": 313} +{"train_info/time_between_train_steps": 0.005414009094238281, "step": 313} +{"info/global_step": 314, "train_info/time_within_train_step": 27.74818253517151, "step": 314} +{"train_info/time_between_train_steps": 0.00547027587890625, "step": 314} +{"info/global_step": 315, "train_info/time_within_train_step": 27.743916511535645, "step": 315} +{"train_info/time_between_train_steps": 0.0056645870208740234, "step": 315} +{"info/global_step": 316, "train_info/time_within_train_step": 27.72752094268799, "step": 316} +{"train_info/time_between_train_steps": 0.005355358123779297, "step": 316} +{"info/global_step": 317, "train_info/time_within_train_step": 27.728468656539917, "step": 317} +{"train_info/time_between_train_steps": 0.005322694778442383, "step": 317} +{"info/global_step": 318, "train_info/time_within_train_step": 27.727474689483643, "step": 318} +{"train_info/time_between_train_steps": 0.005524158477783203, "step": 318} +{"info/global_step": 319, "train_info/time_within_train_step": 27.726999044418335, "step": 319} +{"train_info/time_between_train_steps": 0.0052568912506103516, "step": 319} +{"info/global_step": 320, "train_info/time_within_train_step": 27.727426052093506, "step": 320} +{"train_info/time_between_train_steps": 0.0053255558013916016, "step": 320} +{"info/global_step": 321, "train_info/time_within_train_step": 27.73282265663147, "step": 321} +{"train_info/time_between_train_steps": 0.005580902099609375, "step": 321} +{"info/global_step": 322, "train_info/time_within_train_step": 27.818734169006348, "step": 322} +{"train_info/time_between_train_steps": 0.005421161651611328, "step": 322} +{"info/global_step": 323, "train_info/time_within_train_step": 27.735759735107422, "step": 323} +{"train_info/time_between_train_steps": 0.0061283111572265625, "step": 323} +{"info/global_step": 324, "train_info/time_within_train_step": 27.74269723892212, "step": 324} +{"train_info/time_between_train_steps": 0.006031036376953125, "step": 324} +{"train_info/time_between_train_steps": 20.48819875717163, "step": 324} +{"info/global_step": 325, "train_info/time_within_train_step": 27.72626256942749, "step": 325} +{"train_info/time_between_train_steps": 0.00561070442199707, "step": 325} +{"info/global_step": 326, "train_info/time_within_train_step": 27.873506784439087, "step": 326} +{"train_info/time_between_train_steps": 0.005646228790283203, "step": 326} +{"info/global_step": 327, "train_info/time_within_train_step": 27.722819089889526, "step": 327} +{"train_info/time_between_train_steps": 0.005290508270263672, "step": 327} +{"info/global_step": 328, "train_info/time_within_train_step": 27.8641357421875, "step": 328} +{"train_info/time_between_train_steps": 0.005352497100830078, "step": 328} +{"info/global_step": 329, "train_info/time_within_train_step": 27.73255467414856, "step": 329} +{"train_info/time_between_train_steps": 0.005465269088745117, "step": 329} +{"info/global_step": 330, "train_info/time_within_train_step": 27.83755588531494, "step": 330} +{"train_info/time_between_train_steps": 0.005367755889892578, "step": 330} +{"info/global_step": 331, "train_info/time_within_train_step": 27.736628770828247, "step": 331} +{"train_info/time_between_train_steps": 0.005750894546508789, "step": 331} +{"info/global_step": 332, "train_info/time_within_train_step": 27.815524101257324, "step": 332} +{"train_info/time_between_train_steps": 0.005445241928100586, "step": 332} +{"info/global_step": 333, "train_info/time_within_train_step": 27.766406536102295, "step": 333} +{"train_info/time_between_train_steps": 0.0051708221435546875, "step": 333} +{"info/global_step": 334, "train_info/time_within_train_step": 27.718495845794678, "step": 334} +{"train_info/time_between_train_steps": 0.005337238311767578, "step": 334} +{"info/global_step": 335, "train_info/time_within_train_step": 27.712730169296265, "step": 335} +{"train_info/time_between_train_steps": 0.005140542984008789, "step": 335} +{"info/global_step": 336, "train_info/time_within_train_step": 27.742344617843628, "step": 336} +{"train_info/time_between_train_steps": 0.0051152706146240234, "step": 336} +{"info/global_step": 337, "train_info/time_within_train_step": 27.872246026992798, "step": 337} +{"train_info/time_between_train_steps": 0.0052907466888427734, "step": 337} +{"info/global_step": 338, "train_info/time_within_train_step": 27.720880270004272, "step": 338} +{"train_info/time_between_train_steps": 0.005187511444091797, "step": 338} +{"info/global_step": 339, "train_info/time_within_train_step": 27.715768814086914, "step": 339} +{"train_info/time_between_train_steps": 0.005216121673583984, "step": 339} +{"info/global_step": 340, "train_info/time_within_train_step": 27.714877367019653, "step": 340} +{"train_info/time_between_train_steps": 0.0052754878997802734, "step": 340} +{"info/global_step": 341, "train_info/time_within_train_step": 27.724637269973755, "step": 341} +{"train_info/time_between_train_steps": 0.005234241485595703, "step": 341} +{"info/global_step": 342, "train_info/time_within_train_step": 27.713029146194458, "step": 342} +{"train_info/time_between_train_steps": 0.005263328552246094, "step": 342} +{"info/global_step": 343, "train_info/time_within_train_step": 27.716185331344604, "step": 343} +{"train_info/time_between_train_steps": 0.005201816558837891, "step": 343} +{"info/global_step": 344, "train_info/time_within_train_step": 27.720937490463257, "step": 344} +{"train_info/time_between_train_steps": 0.005232572555541992, "step": 344} +{"info/global_step": 345, "train_info/time_within_train_step": 27.72150206565857, "step": 345} +{"train_info/time_between_train_steps": 0.00534510612487793, "step": 345} +{"info/global_step": 346, "train_info/time_within_train_step": 27.714545488357544, "step": 346} +{"train_info/time_between_train_steps": 0.005084514617919922, "step": 346} +{"info/global_step": 347, "train_info/time_within_train_step": 27.716583490371704, "step": 347} +{"train_info/time_between_train_steps": 0.005349159240722656, "step": 347} +{"info/global_step": 348, "train_info/time_within_train_step": 27.725308895111084, "step": 348} +{"train_info/time_between_train_steps": 0.00555109977722168, "step": 348} +{"info/global_step": 349, "train_info/time_within_train_step": 27.71594548225403, "step": 349} +{"train_info/time_between_train_steps": 0.005359172821044922, "step": 349} +{"info/global_step": 350, "train_info/time_within_train_step": 27.7335262298584, "step": 350} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1733777650, "_runtime": 11031}, "step": 350} +{"logs": {"train/loss": 6.4283, "train/learning_rate": 0.00047222222222222224, "train/epoch": 12.02, "_timestamp": 1733777650, "_runtime": 11031}, "step": 350} +{"train_info/time_between_train_steps": 0.0073413848876953125, "step": 350} +{"info/global_step": 351, "train_info/time_within_train_step": 27.741496086120605, "step": 351} +{"train_info/time_between_train_steps": 0.0056874752044677734, "step": 351} +{"train_info/time_between_train_steps": 20.658155918121338, "step": 351} +{"info/global_step": 352, "train_info/time_within_train_step": 27.831920385360718, "step": 352} +{"train_info/time_between_train_steps": 0.005682468414306641, "step": 352} +{"info/global_step": 353, "train_info/time_within_train_step": 27.874319076538086, "step": 353} +{"train_info/time_between_train_steps": 0.005541563034057617, "step": 353} +{"info/global_step": 354, "train_info/time_within_train_step": 27.719534635543823, "step": 354} +{"train_info/time_between_train_steps": 0.0053255558013916016, "step": 354} +{"info/global_step": 355, "train_info/time_within_train_step": 27.857936143875122, "step": 355} +{"train_info/time_between_train_steps": 0.0054743289947509766, "step": 355} +{"info/global_step": 356, "train_info/time_within_train_step": 27.728989839553833, "step": 356} +{"train_info/time_between_train_steps": 0.005439043045043945, "step": 356} +{"info/global_step": 357, "train_info/time_within_train_step": 27.83651113510132, "step": 357} +{"train_info/time_between_train_steps": 0.005364656448364258, "step": 357} +{"info/global_step": 358, "train_info/time_within_train_step": 27.723691940307617, "step": 358} +{"train_info/time_between_train_steps": 0.005523204803466797, "step": 358} +{"info/global_step": 359, "train_info/time_within_train_step": 27.789486408233643, "step": 359} +{"train_info/time_between_train_steps": 0.005351066589355469, "step": 359} +{"info/global_step": 360, "train_info/time_within_train_step": 27.713162183761597, "step": 360} +{"train_info/time_between_train_steps": 0.005068302154541016, "step": 360} +{"info/global_step": 361, "train_info/time_within_train_step": 27.7244656085968, "step": 361} +{"train_info/time_between_train_steps": 0.005155801773071289, "step": 361} +{"info/global_step": 362, "train_info/time_within_train_step": 27.707757711410522, "step": 362} +{"train_info/time_between_train_steps": 0.005037546157836914, "step": 362} +{"info/global_step": 363, "train_info/time_within_train_step": 27.7416090965271, "step": 363} +{"train_info/time_between_train_steps": 0.005093097686767578, "step": 363} +{"info/global_step": 364, "train_info/time_within_train_step": 27.73816728591919, "step": 364} +{"train_info/time_between_train_steps": 0.00524449348449707, "step": 364} +{"info/global_step": 365, "train_info/time_within_train_step": 27.700913667678833, "step": 365} +{"train_info/time_between_train_steps": 0.005089998245239258, "step": 365} +{"info/global_step": 366, "train_info/time_within_train_step": 27.707506895065308, "step": 366} +{"train_info/time_between_train_steps": 0.0051763057708740234, "step": 366} +{"info/global_step": 367, "train_info/time_within_train_step": 27.799886465072632, "step": 367} +{"train_info/time_between_train_steps": 0.005103111267089844, "step": 367} +{"info/global_step": 368, "train_info/time_within_train_step": 27.703275442123413, "step": 368} +{"train_info/time_between_train_steps": 0.004968881607055664, "step": 368} +{"info/global_step": 369, "train_info/time_within_train_step": 27.756262063980103, "step": 369} +{"train_info/time_between_train_steps": 0.005085945129394531, "step": 369} +{"info/global_step": 370, "train_info/time_within_train_step": 27.718660831451416, "step": 370} +{"train_info/time_between_train_steps": 0.0050046443939208984, "step": 370} +{"info/global_step": 371, "train_info/time_within_train_step": 27.704365730285645, "step": 371} +{"train_info/time_between_train_steps": 0.0049707889556884766, "step": 371} +{"info/global_step": 372, "train_info/time_within_train_step": 27.72797417640686, "step": 372} +{"train_info/time_between_train_steps": 0.005119800567626953, "step": 372} +{"info/global_step": 373, "train_info/time_within_train_step": 27.698678970336914, "step": 373} +{"train_info/time_between_train_steps": 0.005051851272583008, "step": 373} +{"info/global_step": 374, "train_info/time_within_train_step": 27.69915533065796, "step": 374} +{"train_info/time_between_train_steps": 0.0050885677337646484, "step": 374} +{"info/global_step": 375, "train_info/time_within_train_step": 27.70141863822937, "step": 375} +{"train_info/time_between_train_steps": 0.0052297115325927734, "step": 375} +{"info/global_step": 376, "train_info/time_within_train_step": 27.71461057662964, "step": 376} +{"train_info/time_between_train_steps": 0.005059719085693359, "step": 376} +{"info/global_step": 377, "train_info/time_within_train_step": 27.71390986442566, "step": 377} +{"train_info/time_between_train_steps": 0.00530695915222168, "step": 377} +{"info/global_step": 378, "train_info/time_within_train_step": 27.718338012695312, "step": 378} +{"train_info/time_between_train_steps": 0.0055735111236572266, "step": 378} +{"train_info/time_between_train_steps": 20.698636293411255, "step": 378} +{"info/global_step": 379, "train_info/time_within_train_step": 27.720134973526, "step": 379} +{"train_info/time_between_train_steps": 0.005230426788330078, "step": 379} +{"info/global_step": 380, "train_info/time_within_train_step": 27.815088033676147, "step": 380} +{"train_info/time_between_train_steps": 0.005063295364379883, "step": 380} +{"info/global_step": 381, "train_info/time_within_train_step": 27.718114376068115, "step": 381} +{"train_info/time_between_train_steps": 0.005246400833129883, "step": 381} +{"info/global_step": 382, "train_info/time_within_train_step": 27.953912019729614, "step": 382} +{"train_info/time_between_train_steps": 0.005465030670166016, "step": 382} +{"info/global_step": 383, "train_info/time_within_train_step": 27.720717191696167, "step": 383} +{"train_info/time_between_train_steps": 0.005541324615478516, "step": 383} +{"info/global_step": 384, "train_info/time_within_train_step": 27.906747341156006, "step": 384} +{"train_info/time_between_train_steps": 0.0055201053619384766, "step": 384} +{"info/global_step": 385, "train_info/time_within_train_step": 27.721030712127686, "step": 385} +{"train_info/time_between_train_steps": 0.005255460739135742, "step": 385} +{"info/global_step": 386, "train_info/time_within_train_step": 27.76786518096924, "step": 386} +{"train_info/time_between_train_steps": 0.005410909652709961, "step": 386} +{"info/global_step": 387, "train_info/time_within_train_step": 27.72233486175537, "step": 387} +{"train_info/time_between_train_steps": 0.005224943161010742, "step": 387} +{"info/global_step": 388, "train_info/time_within_train_step": 27.706693172454834, "step": 388} +{"train_info/time_between_train_steps": 0.0050144195556640625, "step": 388} +{"info/global_step": 389, "train_info/time_within_train_step": 27.74471163749695, "step": 389} +{"train_info/time_between_train_steps": 0.005140542984008789, "step": 389} +{"info/global_step": 390, "train_info/time_within_train_step": 27.751256465911865, "step": 390} +{"train_info/time_between_train_steps": 0.005240917205810547, "step": 390} +{"info/global_step": 391, "train_info/time_within_train_step": 27.710596561431885, "step": 391} +{"train_info/time_between_train_steps": 0.005132436752319336, "step": 391} +{"info/global_step": 392, "train_info/time_within_train_step": 27.711233854293823, "step": 392} +{"train_info/time_between_train_steps": 0.005227565765380859, "step": 392} +{"info/global_step": 393, "train_info/time_within_train_step": 27.711905241012573, "step": 393} +{"train_info/time_between_train_steps": 0.005302906036376953, "step": 393} +{"info/global_step": 394, "train_info/time_within_train_step": 27.70932936668396, "step": 394} +{"train_info/time_between_train_steps": 0.005176544189453125, "step": 394} +{"info/global_step": 395, "train_info/time_within_train_step": 27.716891050338745, "step": 395} +{"train_info/time_between_train_steps": 0.005381345748901367, "step": 395} +{"info/global_step": 396, "train_info/time_within_train_step": 27.709603309631348, "step": 396} +{"train_info/time_between_train_steps": 0.0051538944244384766, "step": 396} +{"info/global_step": 397, "train_info/time_within_train_step": 27.71305775642395, "step": 397} +{"train_info/time_between_train_steps": 0.0052492618560791016, "step": 397} +{"info/global_step": 398, "train_info/time_within_train_step": 27.80952501296997, "step": 398} +{"train_info/time_between_train_steps": 0.005251884460449219, "step": 398} +{"info/global_step": 399, "train_info/time_within_train_step": 27.713884115219116, "step": 399} +{"train_info/time_between_train_steps": 0.0051422119140625, "step": 399} +{"info/global_step": 400, "train_info/time_within_train_step": 27.720698356628418, "step": 400} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1733779079, "_runtime": 12460}, "step": 400} +{"logs": {"train/loss": 6.4218, "train/learning_rate": 0.00044444444444444436, "train/epoch": 14.02, "_timestamp": 1733779079, "_runtime": 12460}, "step": 400} +{"train_info/time_between_train_steps": 2.3121609687805176, "step": 400} +{"info/global_step": 401, "train_info/time_within_train_step": 27.71777367591858, "step": 401} +{"train_info/time_between_train_steps": 0.005169868469238281, "step": 401} +{"info/global_step": 402, "train_info/time_within_train_step": 27.717411756515503, "step": 402} +{"train_info/time_between_train_steps": 0.005550384521484375, "step": 402} +{"info/global_step": 403, "train_info/time_within_train_step": 27.72557783126831, "step": 403} +{"train_info/time_between_train_steps": 0.005434513092041016, "step": 403} +{"info/global_step": 404, "train_info/time_within_train_step": 27.73148226737976, "step": 404} +{"train_info/time_between_train_steps": 0.0056591033935546875, "step": 404} +{"info/global_step": 405, "train_info/time_within_train_step": 27.751105070114136, "step": 405} +{"train_info/time_between_train_steps": 0.005938529968261719, "step": 405} +{"train_info/time_between_train_steps": 20.34439444541931, "step": 405} +{"info/global_step": 406, "train_info/time_within_train_step": 27.76087737083435, "step": 406} +{"train_info/time_between_train_steps": 0.005778074264526367, "step": 406} +{"info/global_step": 407, "train_info/time_within_train_step": 27.95245099067688, "step": 407} +{"train_info/time_between_train_steps": 0.00548863410949707, "step": 407} +{"info/global_step": 408, "train_info/time_within_train_step": 27.733964443206787, "step": 408} +{"train_info/time_between_train_steps": 0.005715847015380859, "step": 408} +{"info/global_step": 409, "train_info/time_within_train_step": 27.926204442977905, "step": 409} +{"train_info/time_between_train_steps": 0.0055942535400390625, "step": 409} +{"info/global_step": 410, "train_info/time_within_train_step": 27.764160871505737, "step": 410} +{"train_info/time_between_train_steps": 0.005834817886352539, "step": 410} +{"info/global_step": 411, "train_info/time_within_train_step": 27.96063995361328, "step": 411} +{"train_info/time_between_train_steps": 0.005525112152099609, "step": 411} +{"info/global_step": 412, "train_info/time_within_train_step": 27.749011516571045, "step": 412} +{"train_info/time_between_train_steps": 0.005651235580444336, "step": 412} +{"info/global_step": 413, "train_info/time_within_train_step": 27.970228910446167, "step": 413} +{"train_info/time_between_train_steps": 0.00542759895324707, "step": 413} +{"info/global_step": 414, "train_info/time_within_train_step": 27.74783682823181, "step": 414} +{"train_info/time_between_train_steps": 0.005191802978515625, "step": 414} +{"info/global_step": 415, "train_info/time_within_train_step": 27.742249488830566, "step": 415} +{"train_info/time_between_train_steps": 0.006340503692626953, "step": 415} +{"info/global_step": 416, "train_info/time_within_train_step": 27.71774697303772, "step": 416} +{"train_info/time_between_train_steps": 0.005357027053833008, "step": 416} +{"info/global_step": 417, "train_info/time_within_train_step": 27.716107845306396, "step": 417} +{"train_info/time_between_train_steps": 0.005426883697509766, "step": 417} +{"info/global_step": 418, "train_info/time_within_train_step": 27.713189363479614, "step": 418} +{"train_info/time_between_train_steps": 0.005187273025512695, "step": 418} +{"info/global_step": 419, "train_info/time_within_train_step": 27.719259023666382, "step": 419} +{"train_info/time_between_train_steps": 0.005379438400268555, "step": 419} +{"info/global_step": 420, "train_info/time_within_train_step": 27.713801622390747, "step": 420} +{"train_info/time_between_train_steps": 0.00518798828125, "step": 420} +{"info/global_step": 421, "train_info/time_within_train_step": 27.73161768913269, "step": 421} +{"train_info/time_between_train_steps": 0.005289316177368164, "step": 421} +{"info/global_step": 422, "train_info/time_within_train_step": 27.716870546340942, "step": 422} +{"train_info/time_between_train_steps": 0.005168914794921875, "step": 422} +{"info/global_step": 423, "train_info/time_within_train_step": 27.76643681526184, "step": 423} +{"train_info/time_between_train_steps": 0.005135774612426758, "step": 423} +{"info/global_step": 424, "train_info/time_within_train_step": 27.73130464553833, "step": 424} +{"train_info/time_between_train_steps": 0.005294084548950195, "step": 424} +{"info/global_step": 425, "train_info/time_within_train_step": 27.71772050857544, "step": 425} +{"train_info/time_between_train_steps": 0.005205631256103516, "step": 425} +{"info/global_step": 426, "train_info/time_within_train_step": 27.715405464172363, "step": 426} +{"train_info/time_between_train_steps": 0.0054476261138916016, "step": 426} +{"info/global_step": 427, "train_info/time_within_train_step": 27.74619746208191, "step": 427} +{"train_info/time_between_train_steps": 0.0052487850189208984, "step": 427} +{"info/global_step": 428, "train_info/time_within_train_step": 27.728160619735718, "step": 428} +{"train_info/time_between_train_steps": 0.005235195159912109, "step": 428} +{"info/global_step": 429, "train_info/time_within_train_step": 27.816872119903564, "step": 429} +{"train_info/time_between_train_steps": 0.0057299137115478516, "step": 429} +{"info/global_step": 430, "train_info/time_within_train_step": 27.726601123809814, "step": 430} +{"train_info/time_between_train_steps": 0.005341529846191406, "step": 430} +{"info/global_step": 431, "train_info/time_within_train_step": 27.73237657546997, "step": 431} +{"train_info/time_between_train_steps": 0.00561070442199707, "step": 431} +{"info/global_step": 432, "train_info/time_within_train_step": 27.74871563911438, "step": 432} +{"train_info/time_between_train_steps": 0.005806922912597656, "step": 432} +{"train_info/time_between_train_steps": 20.56004023551941, "step": 432} +{"info/global_step": 433, "train_info/time_within_train_step": 27.711645364761353, "step": 433} +{"train_info/time_between_train_steps": 0.010632514953613281, "step": 433} +{"info/global_step": 434, "train_info/time_within_train_step": 27.871891736984253, "step": 434} +{"train_info/time_between_train_steps": 0.00545501708984375, "step": 434} +{"info/global_step": 435, "train_info/time_within_train_step": 27.721043825149536, "step": 435} +{"train_info/time_between_train_steps": 0.0054035186767578125, "step": 435} +{"info/global_step": 436, "train_info/time_within_train_step": 27.870978593826294, "step": 436} +{"train_info/time_between_train_steps": 0.0053653717041015625, "step": 436} +{"info/global_step": 437, "train_info/time_within_train_step": 27.726111888885498, "step": 437} +{"train_info/time_between_train_steps": 0.0055694580078125, "step": 437} +{"info/global_step": 438, "train_info/time_within_train_step": 27.892069101333618, "step": 438} +{"train_info/time_between_train_steps": 0.005973339080810547, "step": 438} +{"info/global_step": 439, "train_info/time_within_train_step": 27.79847240447998, "step": 439} +{"train_info/time_between_train_steps": 0.006220579147338867, "step": 439} +{"info/global_step": 440, "train_info/time_within_train_step": 27.842593669891357, "step": 440} +{"train_info/time_between_train_steps": 0.005491495132446289, "step": 440} +{"info/global_step": 441, "train_info/time_within_train_step": 27.745887279510498, "step": 441} +{"train_info/time_between_train_steps": 0.005215644836425781, "step": 441} +{"info/global_step": 442, "train_info/time_within_train_step": 27.764376163482666, "step": 442} +{"train_info/time_between_train_steps": 0.005191802978515625, "step": 442} +{"info/global_step": 443, "train_info/time_within_train_step": 27.772676944732666, "step": 443} +{"train_info/time_between_train_steps": 0.005184650421142578, "step": 443} +{"info/global_step": 444, "train_info/time_within_train_step": 27.886042594909668, "step": 444} +{"train_info/time_between_train_steps": 0.005196571350097656, "step": 444} +{"info/global_step": 445, "train_info/time_within_train_step": 27.75616192817688, "step": 445} +{"train_info/time_between_train_steps": 0.005298614501953125, "step": 445} +{"info/global_step": 446, "train_info/time_within_train_step": 27.80873441696167, "step": 446} +{"train_info/time_between_train_steps": 0.00549769401550293, "step": 446} +{"info/global_step": 447, "train_info/time_within_train_step": 27.748908281326294, "step": 447} +{"train_info/time_between_train_steps": 0.0054340362548828125, "step": 447} +{"info/global_step": 448, "train_info/time_within_train_step": 27.761772394180298, "step": 448} +{"train_info/time_between_train_steps": 0.005414247512817383, "step": 448} +{"info/global_step": 449, "train_info/time_within_train_step": 27.761603116989136, "step": 449} +{"train_info/time_between_train_steps": 0.005461931228637695, "step": 449} +{"info/global_step": 450, "train_info/time_within_train_step": 27.75284242630005, "step": 450} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1733780511, "_runtime": 13892}, "step": 450} +{"logs": {"train/loss": 6.2987, "train/learning_rate": 0.00041666666666666664, "train/epoch": 16.02, "_timestamp": 1733780511, "_runtime": 13892}, "step": 450} +{"train_info/time_between_train_steps": 0.007547616958618164, "step": 450} +{"info/global_step": 451, "train_info/time_within_train_step": 27.81057071685791, "step": 451} +{"train_info/time_between_train_steps": 0.005513668060302734, "step": 451} +{"info/global_step": 452, "train_info/time_within_train_step": 27.75145649909973, "step": 452} +{"train_info/time_between_train_steps": 0.005341291427612305, "step": 452} +{"info/global_step": 453, "train_info/time_within_train_step": 27.749901056289673, "step": 453} +{"train_info/time_between_train_steps": 0.019484519958496094, "step": 453} +{"info/global_step": 454, "train_info/time_within_train_step": 27.74705696105957, "step": 454} +{"train_info/time_between_train_steps": 0.005349397659301758, "step": 454} +{"info/global_step": 455, "train_info/time_within_train_step": 27.800230979919434, "step": 455} +{"train_info/time_between_train_steps": 0.005334138870239258, "step": 455} +{"info/global_step": 456, "train_info/time_within_train_step": 27.79487633705139, "step": 456} +{"train_info/time_between_train_steps": 0.005593299865722656, "step": 456} +{"info/global_step": 457, "train_info/time_within_train_step": 27.76380181312561, "step": 457} +{"train_info/time_between_train_steps": 0.005362510681152344, "step": 457} +{"info/global_step": 458, "train_info/time_within_train_step": 27.802431106567383, "step": 458} +{"train_info/time_between_train_steps": 0.0057599544525146484, "step": 458} +{"info/global_step": 459, "train_info/time_within_train_step": 27.902540922164917, "step": 459} +{"train_info/time_between_train_steps": 0.005982637405395508, "step": 459} +{"train_info/time_between_train_steps": 20.948060035705566, "step": 459} +{"info/global_step": 460, "train_info/time_within_train_step": 27.747637033462524, "step": 460} +{"train_info/time_between_train_steps": 0.005705118179321289, "step": 460} +{"info/global_step": 461, "train_info/time_within_train_step": 27.890584230422974, "step": 461} +{"train_info/time_between_train_steps": 0.0055654048919677734, "step": 461} +{"info/global_step": 462, "train_info/time_within_train_step": 27.822877407073975, "step": 462} +{"train_info/time_between_train_steps": 0.005811452865600586, "step": 462} +{"info/global_step": 463, "train_info/time_within_train_step": 27.95158338546753, "step": 463} +{"train_info/time_between_train_steps": 0.005911588668823242, "step": 463} +{"info/global_step": 464, "train_info/time_within_train_step": 27.75122618675232, "step": 464} +{"train_info/time_between_train_steps": 0.005525827407836914, "step": 464} +{"info/global_step": 465, "train_info/time_within_train_step": 27.948463439941406, "step": 465} +{"train_info/time_between_train_steps": 0.0060384273529052734, "step": 465} +{"info/global_step": 466, "train_info/time_within_train_step": 27.74256157875061, "step": 466} +{"train_info/time_between_train_steps": 0.0059833526611328125, "step": 466} +{"info/global_step": 467, "train_info/time_within_train_step": 27.805288314819336, "step": 467} +{"train_info/time_between_train_steps": 0.005606889724731445, "step": 467} +{"info/global_step": 468, "train_info/time_within_train_step": 27.720407962799072, "step": 468} +{"train_info/time_between_train_steps": 0.0051114559173583984, "step": 468} +{"info/global_step": 469, "train_info/time_within_train_step": 27.710062742233276, "step": 469} +{"train_info/time_between_train_steps": 0.00539851188659668, "step": 469} +{"info/global_step": 470, "train_info/time_within_train_step": 27.708866119384766, "step": 470} +{"train_info/time_between_train_steps": 0.0052890777587890625, "step": 470} +{"info/global_step": 471, "train_info/time_within_train_step": 27.710402250289917, "step": 471} +{"train_info/time_between_train_steps": 0.005185842514038086, "step": 471} +{"info/global_step": 472, "train_info/time_within_train_step": 27.72133183479309, "step": 472} +{"train_info/time_between_train_steps": 0.0053942203521728516, "step": 472} +{"info/global_step": 473, "train_info/time_within_train_step": 27.710530519485474, "step": 473} +{"train_info/time_between_train_steps": 0.00524592399597168, "step": 473} +{"info/global_step": 474, "train_info/time_within_train_step": 27.817232131958008, "step": 474} +{"train_info/time_between_train_steps": 0.005448341369628906, "step": 474} +{"info/global_step": 475, "train_info/time_within_train_step": 27.727473735809326, "step": 475} +{"train_info/time_between_train_steps": 0.005663156509399414, "step": 475} +{"info/global_step": 476, "train_info/time_within_train_step": 27.723719835281372, "step": 476} +{"train_info/time_between_train_steps": 0.005388498306274414, "step": 476} +{"info/global_step": 477, "train_info/time_within_train_step": 27.728097200393677, "step": 477} +{"train_info/time_between_train_steps": 0.005383491516113281, "step": 477} +{"info/global_step": 478, "train_info/time_within_train_step": 27.729764938354492, "step": 478} +{"train_info/time_between_train_steps": 0.005254507064819336, "step": 478} +{"info/global_step": 479, "train_info/time_within_train_step": 27.724555730819702, "step": 479} +{"train_info/time_between_train_steps": 0.005271434783935547, "step": 479} +{"info/global_step": 480, "train_info/time_within_train_step": 27.715808153152466, "step": 480} +{"train_info/time_between_train_steps": 0.005319118499755859, "step": 480} +{"info/global_step": 481, "train_info/time_within_train_step": 27.725298166275024, "step": 481} +{"train_info/time_between_train_steps": 0.005860328674316406, "step": 481} +{"info/global_step": 482, "train_info/time_within_train_step": 27.71867847442627, "step": 482} +{"train_info/time_between_train_steps": 0.005178689956665039, "step": 482} +{"info/global_step": 483, "train_info/time_within_train_step": 27.72493267059326, "step": 483} +{"train_info/time_between_train_steps": 0.005697727203369141, "step": 483} +{"info/global_step": 484, "train_info/time_within_train_step": 27.728745222091675, "step": 484} +{"train_info/time_between_train_steps": 0.0052950382232666016, "step": 484} +{"info/global_step": 485, "train_info/time_within_train_step": 27.732770681381226, "step": 485} +{"train_info/time_between_train_steps": 0.005651950836181641, "step": 485} +{"info/global_step": 486, "train_info/time_within_train_step": 27.742435932159424, "step": 486} +{"train_info/time_between_train_steps": 0.0057942867279052734, "step": 486} +{"train_info/time_between_train_steps": 20.371830940246582, "step": 486} +{"info/global_step": 487, "train_info/time_within_train_step": 27.798008680343628, "step": 487} +{"train_info/time_between_train_steps": 0.01051020622253418, "step": 487} +{"info/global_step": 488, "train_info/time_within_train_step": 27.882977962493896, "step": 488} +{"train_info/time_between_train_steps": 0.005373239517211914, "step": 488} +{"info/global_step": 489, "train_info/time_within_train_step": 27.827993869781494, "step": 489} +{"train_info/time_between_train_steps": 0.005540132522583008, "step": 489} +{"info/global_step": 490, "train_info/time_within_train_step": 27.845259189605713, "step": 490} +{"train_info/time_between_train_steps": 0.006108283996582031, "step": 490} +{"info/global_step": 491, "train_info/time_within_train_step": 27.895047187805176, "step": 491} +{"train_info/time_between_train_steps": 0.0058901309967041016, "step": 491} +{"info/global_step": 492, "train_info/time_within_train_step": 28.009572744369507, "step": 492} +{"train_info/time_between_train_steps": 0.005830287933349609, "step": 492} +{"info/global_step": 493, "train_info/time_within_train_step": 27.821832418441772, "step": 493} +{"train_info/time_between_train_steps": 0.005843639373779297, "step": 493} +{"info/global_step": 494, "train_info/time_within_train_step": 27.851112842559814, "step": 494} +{"train_info/time_between_train_steps": 0.0057373046875, "step": 494} +{"info/global_step": 495, "train_info/time_within_train_step": 27.74678111076355, "step": 495} +{"train_info/time_between_train_steps": 0.005377292633056641, "step": 495} +{"info/global_step": 496, "train_info/time_within_train_step": 27.727030515670776, "step": 496} +{"train_info/time_between_train_steps": 0.00533604621887207, "step": 496} +{"info/global_step": 497, "train_info/time_within_train_step": 27.733636379241943, "step": 497} +{"train_info/time_between_train_steps": 0.005550861358642578, "step": 497} +{"info/global_step": 498, "train_info/time_within_train_step": 27.72152543067932, "step": 498} +{"train_info/time_between_train_steps": 0.005181550979614258, "step": 498} +{"info/global_step": 499, "train_info/time_within_train_step": 27.7232186794281, "step": 499} +{"train_info/time_between_train_steps": 0.005350828170776367, "step": 499} +{"info/global_step": 500, "train_info/time_within_train_step": 27.715269088745117, "step": 500} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1733781942, "_runtime": 15323}, "step": 500} +{"logs": {"train/loss": 6.1713, "train/learning_rate": 0.00038888888888888887, "train/epoch": 18.01, "_timestamp": 1733781942, "_runtime": 15323}, "step": 500} +{"train_info/time_between_train_steps": 2.4822797775268555, "step": 500} +{"info/global_step": 501, "train_info/time_within_train_step": 27.716432571411133, "step": 501} +{"train_info/time_between_train_steps": 0.005099296569824219, "step": 501} +{"info/global_step": 502, "train_info/time_within_train_step": 27.714923858642578, "step": 502} +{"train_info/time_between_train_steps": 0.0053119659423828125, "step": 502} +{"info/global_step": 503, "train_info/time_within_train_step": 27.720680236816406, "step": 503} +{"train_info/time_between_train_steps": 0.005321025848388672, "step": 503} +{"info/global_step": 504, "train_info/time_within_train_step": 27.72151207923889, "step": 504} +{"train_info/time_between_train_steps": 0.00521087646484375, "step": 504} +{"info/global_step": 505, "train_info/time_within_train_step": 27.83202075958252, "step": 505} +{"train_info/time_between_train_steps": 0.005124330520629883, "step": 505} +{"info/global_step": 506, "train_info/time_within_train_step": 27.72769856452942, "step": 506} +{"train_info/time_between_train_steps": 0.005220174789428711, "step": 506} +{"info/global_step": 507, "train_info/time_within_train_step": 27.75665855407715, "step": 507} +{"train_info/time_between_train_steps": 0.00526738166809082, "step": 507} +{"info/global_step": 508, "train_info/time_within_train_step": 27.747904539108276, "step": 508} +{"train_info/time_between_train_steps": 0.0053293704986572266, "step": 508} +{"info/global_step": 509, "train_info/time_within_train_step": 27.737080097198486, "step": 509} +{"train_info/time_between_train_steps": 0.005377531051635742, "step": 509} +{"info/global_step": 510, "train_info/time_within_train_step": 27.73171591758728, "step": 510} +{"train_info/time_between_train_steps": 0.005522966384887695, "step": 510} +{"info/global_step": 511, "train_info/time_within_train_step": 27.738766193389893, "step": 511} +{"train_info/time_between_train_steps": 0.0053560733795166016, "step": 511} +{"info/global_step": 512, "train_info/time_within_train_step": 27.740710258483887, "step": 512} +{"train_info/time_between_train_steps": 0.00559687614440918, "step": 512} +{"info/global_step": 513, "train_info/time_within_train_step": 27.755672216415405, "step": 513} +{"train_info/time_between_train_steps": 0.005822896957397461, "step": 513} +{"train_info/time_between_train_steps": 20.566756010055542, "step": 513} +{"info/global_step": 514, "train_info/time_within_train_step": 27.724671363830566, "step": 514} +{"train_info/time_between_train_steps": 0.005094051361083984, "step": 514} +{"info/global_step": 515, "train_info/time_within_train_step": 27.82692527770996, "step": 515} +{"train_info/time_between_train_steps": 0.0051686763763427734, "step": 515} +{"info/global_step": 516, "train_info/time_within_train_step": 27.71712875366211, "step": 516} +{"train_info/time_between_train_steps": 0.005147695541381836, "step": 516} +{"info/global_step": 517, "train_info/time_within_train_step": 27.833961725234985, "step": 517} +{"train_info/time_between_train_steps": 0.005131959915161133, "step": 517} +{"info/global_step": 518, "train_info/time_within_train_step": 27.73602318763733, "step": 518} +{"train_info/time_between_train_steps": 0.00538945198059082, "step": 518} +{"info/global_step": 519, "train_info/time_within_train_step": 27.904099464416504, "step": 519} +{"train_info/time_between_train_steps": 0.005404233932495117, "step": 519} +{"info/global_step": 520, "train_info/time_within_train_step": 27.84563660621643, "step": 520} +{"train_info/time_between_train_steps": 0.005421161651611328, "step": 520} +{"info/global_step": 521, "train_info/time_within_train_step": 27.766014575958252, "step": 521} +{"train_info/time_between_train_steps": 0.00552058219909668, "step": 521} +{"info/global_step": 522, "train_info/time_within_train_step": 27.730560064315796, "step": 522} +{"train_info/time_between_train_steps": 0.005372524261474609, "step": 522} +{"info/global_step": 523, "train_info/time_within_train_step": 27.814233779907227, "step": 523} +{"train_info/time_between_train_steps": 0.005303621292114258, "step": 523} +{"info/global_step": 524, "train_info/time_within_train_step": 27.73629593849182, "step": 524} +{"train_info/time_between_train_steps": 0.005917072296142578, "step": 524} +{"info/global_step": 525, "train_info/time_within_train_step": 27.759125232696533, "step": 525} +{"train_info/time_between_train_steps": 0.006957530975341797, "step": 525} +{"info/global_step": 526, "train_info/time_within_train_step": 27.829918384552002, "step": 526} +{"train_info/time_between_train_steps": 0.006223917007446289, "step": 526} +{"info/global_step": 527, "train_info/time_within_train_step": 27.811914920806885, "step": 527} +{"train_info/time_between_train_steps": 0.00650787353515625, "step": 527} +{"info/global_step": 528, "train_info/time_within_train_step": 27.827170848846436, "step": 528} +{"train_info/time_between_train_steps": 0.006116390228271484, "step": 528} +{"info/global_step": 529, "train_info/time_within_train_step": 27.80693030357361, "step": 529} +{"train_info/time_between_train_steps": 0.0062258243560791016, "step": 529} +{"info/global_step": 530, "train_info/time_within_train_step": 27.800200700759888, "step": 530} +{"train_info/time_between_train_steps": 0.005957126617431641, "step": 530} +{"info/global_step": 531, "train_info/time_within_train_step": 27.95720362663269, "step": 531} +{"train_info/time_between_train_steps": 0.005984067916870117, "step": 531} +{"info/global_step": 532, "train_info/time_within_train_step": 27.798444986343384, "step": 532} +{"train_info/time_between_train_steps": 0.0060100555419921875, "step": 532} +{"info/global_step": 533, "train_info/time_within_train_step": 27.79438328742981, "step": 533} +{"train_info/time_between_train_steps": 0.006074666976928711, "step": 533} +{"info/global_step": 534, "train_info/time_within_train_step": 27.924410581588745, "step": 534} +{"train_info/time_between_train_steps": 0.0061948299407958984, "step": 534} +{"info/global_step": 535, "train_info/time_within_train_step": 27.891329765319824, "step": 535} +{"train_info/time_between_train_steps": 0.005620241165161133, "step": 535} +{"info/global_step": 536, "train_info/time_within_train_step": 27.74669885635376, "step": 536} +{"train_info/time_between_train_steps": 0.0052950382232666016, "step": 536} +{"info/global_step": 537, "train_info/time_within_train_step": 27.755208492279053, "step": 537} +{"train_info/time_between_train_steps": 0.0060999393463134766, "step": 537} +{"info/global_step": 538, "train_info/time_within_train_step": 27.763245820999146, "step": 538} +{"train_info/time_between_train_steps": 0.005489826202392578, "step": 538} +{"info/global_step": 539, "train_info/time_within_train_step": 27.771830320358276, "step": 539} +{"train_info/time_between_train_steps": 0.005984783172607422, "step": 539} +{"info/global_step": 540, "train_info/time_within_train_step": 27.8265962600708, "step": 540} +{"train_info/time_between_train_steps": 0.006003618240356445, "step": 540} +{"train_info/time_between_train_steps": 21.038755178451538, "step": 540} +{"info/global_step": 541, "train_info/time_within_train_step": 27.770321369171143, "step": 541} +{"train_info/time_between_train_steps": 0.005379438400268555, "step": 541} +{"info/global_step": 542, "train_info/time_within_train_step": 27.833483695983887, "step": 542} +{"train_info/time_between_train_steps": 0.005345821380615234, "step": 542} +{"info/global_step": 543, "train_info/time_within_train_step": 27.731684923171997, "step": 543} +{"train_info/time_between_train_steps": 0.005345821380615234, "step": 543} +{"info/global_step": 544, "train_info/time_within_train_step": 27.852859258651733, "step": 544} +{"train_info/time_between_train_steps": 0.00570225715637207, "step": 544} +{"info/global_step": 545, "train_info/time_within_train_step": 27.749220371246338, "step": 545} +{"train_info/time_between_train_steps": 0.0056416988372802734, "step": 545} +{"info/global_step": 546, "train_info/time_within_train_step": 27.864115238189697, "step": 546} +{"train_info/time_between_train_steps": 0.005595684051513672, "step": 546} +{"info/global_step": 547, "train_info/time_within_train_step": 27.749605655670166, "step": 547} +{"train_info/time_between_train_steps": 0.006085395812988281, "step": 547} +{"info/global_step": 548, "train_info/time_within_train_step": 27.77486491203308, "step": 548} +{"train_info/time_between_train_steps": 0.005651712417602539, "step": 548} +{"info/global_step": 549, "train_info/time_within_train_step": 27.743253707885742, "step": 549} +{"train_info/time_between_train_steps": 0.005225658416748047, "step": 549} +{"info/global_step": 550, "train_info/time_within_train_step": 27.81158995628357, "step": 550} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1733783375, "_runtime": 16756}, "step": 550} +{"logs": {"train/loss": 6.0415, "train/learning_rate": 0.0003611111111111111, "train/epoch": 20.01, "_timestamp": 1733783375, "_runtime": 16756}, "step": 550} +{"train_info/time_between_train_steps": 0.007755279541015625, "step": 550} +{"info/global_step": 551, "train_info/time_within_train_step": 27.718382120132446, "step": 551} +{"train_info/time_between_train_steps": 0.005197286605834961, "step": 551} +{"info/global_step": 552, "train_info/time_within_train_step": 27.763626098632812, "step": 552} +{"train_info/time_between_train_steps": 0.005301952362060547, "step": 552} +{"info/global_step": 553, "train_info/time_within_train_step": 27.716792583465576, "step": 553} +{"train_info/time_between_train_steps": 0.0053594112396240234, "step": 553} +{"info/global_step": 554, "train_info/time_within_train_step": 27.72883367538452, "step": 554} +{"train_info/time_between_train_steps": 0.005585908889770508, "step": 554} +{"info/global_step": 555, "train_info/time_within_train_step": 27.74198341369629, "step": 555} +{"train_info/time_between_train_steps": 0.005482673645019531, "step": 555} +{"info/global_step": 556, "train_info/time_within_train_step": 27.73981499671936, "step": 556} +{"train_info/time_between_train_steps": 0.005612611770629883, "step": 556} +{"info/global_step": 557, "train_info/time_within_train_step": 27.733312845230103, "step": 557} +{"train_info/time_between_train_steps": 0.0054590702056884766, "step": 557} +{"info/global_step": 558, "train_info/time_within_train_step": 27.7398624420166, "step": 558} +{"train_info/time_between_train_steps": 0.005491495132446289, "step": 558} +{"info/global_step": 559, "train_info/time_within_train_step": 27.730668306350708, "step": 559} +{"train_info/time_between_train_steps": 0.005542278289794922, "step": 559} +{"info/global_step": 560, "train_info/time_within_train_step": 27.73592448234558, "step": 560} +{"train_info/time_between_train_steps": 0.005524396896362305, "step": 560} +{"info/global_step": 561, "train_info/time_within_train_step": 27.727267026901245, "step": 561} +{"train_info/time_between_train_steps": 0.005481719970703125, "step": 561} +{"info/global_step": 562, "train_info/time_within_train_step": 27.72666835784912, "step": 562} +{"train_info/time_between_train_steps": 0.005474567413330078, "step": 562} +{"info/global_step": 563, "train_info/time_within_train_step": 27.736587047576904, "step": 563} +{"train_info/time_between_train_steps": 0.005452871322631836, "step": 563} +{"info/global_step": 564, "train_info/time_within_train_step": 27.75309920310974, "step": 564} +{"train_info/time_between_train_steps": 0.0059680938720703125, "step": 564} +{"info/global_step": 565, "train_info/time_within_train_step": 27.728589057922363, "step": 565} +{"train_info/time_between_train_steps": 0.005448579788208008, "step": 565} +{"info/global_step": 566, "train_info/time_within_train_step": 27.84877848625183, "step": 566} +{"train_info/time_between_train_steps": 0.005629777908325195, "step": 566} +{"info/global_step": 567, "train_info/time_within_train_step": 27.752301931381226, "step": 567} +{"train_info/time_between_train_steps": 0.0060346126556396484, "step": 567} +{"train_info/time_between_train_steps": 20.387155771255493, "step": 567} +{"info/global_step": 568, "train_info/time_within_train_step": 27.711697101593018, "step": 568} +{"train_info/time_between_train_steps": 0.005171060562133789, "step": 568} +{"info/global_step": 569, "train_info/time_within_train_step": 27.81398630142212, "step": 569} +{"train_info/time_between_train_steps": 0.005208253860473633, "step": 569} +{"info/global_step": 570, "train_info/time_within_train_step": 27.70987105369568, "step": 570} +{"train_info/time_between_train_steps": 0.005160331726074219, "step": 570} +{"info/global_step": 571, "train_info/time_within_train_step": 27.872828483581543, "step": 571} +{"train_info/time_between_train_steps": 0.005309104919433594, "step": 571} +{"info/global_step": 572, "train_info/time_within_train_step": 27.72855043411255, "step": 572} +{"train_info/time_between_train_steps": 0.005650997161865234, "step": 572} +{"info/global_step": 573, "train_info/time_within_train_step": 27.85317373275757, "step": 573} +{"train_info/time_between_train_steps": 0.00555109977722168, "step": 573} +{"info/global_step": 574, "train_info/time_within_train_step": 27.736456871032715, "step": 574} +{"train_info/time_between_train_steps": 0.005531787872314453, "step": 574} +{"info/global_step": 575, "train_info/time_within_train_step": 27.76633906364441, "step": 575} +{"train_info/time_between_train_steps": 0.005552530288696289, "step": 575} +{"info/global_step": 576, "train_info/time_within_train_step": 27.72655415534973, "step": 576} +{"train_info/time_between_train_steps": 0.0051424503326416016, "step": 576} +{"info/global_step": 577, "train_info/time_within_train_step": 27.71582341194153, "step": 577} +{"train_info/time_between_train_steps": 0.0052490234375, "step": 577} +{"info/global_step": 578, "train_info/time_within_train_step": 27.714125633239746, "step": 578} +{"train_info/time_between_train_steps": 0.005158185958862305, "step": 578} +{"info/global_step": 579, "train_info/time_within_train_step": 27.712554454803467, "step": 579} +{"train_info/time_between_train_steps": 0.005213737487792969, "step": 579} +{"info/global_step": 580, "train_info/time_within_train_step": 27.726455450057983, "step": 580} +{"train_info/time_between_train_steps": 0.005186557769775391, "step": 580} +{"info/global_step": 581, "train_info/time_within_train_step": 27.813934087753296, "step": 581} +{"train_info/time_between_train_steps": 0.005249500274658203, "step": 581} +{"info/global_step": 582, "train_info/time_within_train_step": 27.719745635986328, "step": 582} +{"train_info/time_between_train_steps": 0.005381584167480469, "step": 582} +{"info/global_step": 583, "train_info/time_within_train_step": 27.71498465538025, "step": 583} +{"train_info/time_between_train_steps": 0.005284547805786133, "step": 583} +{"info/global_step": 584, "train_info/time_within_train_step": 27.710731506347656, "step": 584} +{"train_info/time_between_train_steps": 0.0053005218505859375, "step": 584} +{"info/global_step": 585, "train_info/time_within_train_step": 27.717923164367676, "step": 585} +{"train_info/time_between_train_steps": 0.00522160530090332, "step": 585} +{"info/global_step": 586, "train_info/time_within_train_step": 27.713150024414062, "step": 586} +{"train_info/time_between_train_steps": 0.005185604095458984, "step": 586} +{"info/global_step": 587, "train_info/time_within_train_step": 27.710695028305054, "step": 587} +{"train_info/time_between_train_steps": 0.0051727294921875, "step": 587} +{"info/global_step": 588, "train_info/time_within_train_step": 27.71364736557007, "step": 588} +{"train_info/time_between_train_steps": 0.005177974700927734, "step": 588} +{"info/global_step": 589, "train_info/time_within_train_step": 27.725160121917725, "step": 589} +{"train_info/time_between_train_steps": 0.005325794219970703, "step": 589} +{"info/global_step": 590, "train_info/time_within_train_step": 27.71025824546814, "step": 590} +{"train_info/time_between_train_steps": 0.005187034606933594, "step": 590} +{"info/global_step": 591, "train_info/time_within_train_step": 27.7237069606781, "step": 591} +{"train_info/time_between_train_steps": 0.005504608154296875, "step": 591} +{"info/global_step": 592, "train_info/time_within_train_step": 27.7268545627594, "step": 592} +{"train_info/time_between_train_steps": 0.005591630935668945, "step": 592} +{"info/global_step": 593, "train_info/time_within_train_step": 27.72862982749939, "step": 593} +{"train_info/time_between_train_steps": 0.005491495132446289, "step": 593} +{"info/global_step": 594, "train_info/time_within_train_step": 27.739279747009277, "step": 594} +{"train_info/time_between_train_steps": 0.005704641342163086, "step": 594} +{"train_info/time_between_train_steps": 20.535616874694824, "step": 594} +{"info/global_step": 595, "train_info/time_within_train_step": 27.7108314037323, "step": 595} +{"train_info/time_between_train_steps": 0.005213022232055664, "step": 595} +{"info/global_step": 596, "train_info/time_within_train_step": 28.02286386489868, "step": 596} +{"train_info/time_between_train_steps": 0.005221366882324219, "step": 596} +{"info/global_step": 597, "train_info/time_within_train_step": 27.713415145874023, "step": 597} +{"train_info/time_between_train_steps": 0.005324602127075195, "step": 597} +{"info/global_step": 598, "train_info/time_within_train_step": 27.854549407958984, "step": 598} +{"train_info/time_between_train_steps": 0.0053174495697021484, "step": 598} +{"info/global_step": 599, "train_info/time_within_train_step": 27.806285858154297, "step": 599} +{"train_info/time_between_train_steps": 0.005359649658203125, "step": 599} +{"info/global_step": 600, "train_info/time_within_train_step": 27.835862398147583, "step": 600} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1733784804, "_runtime": 18185}, "step": 600} +{"logs": {"train/loss": 5.9176, "train/learning_rate": 0.0003333333333333333, "train/epoch": 22.0, "_timestamp": 1733784804, "_runtime": 18185}, "step": 600} +{"train_info/time_between_train_steps": 4.266299486160278, "step": 600} +{"info/global_step": 601, "train_info/time_within_train_step": 27.727527618408203, "step": 601} +{"train_info/time_between_train_steps": 0.00532841682434082, "step": 601} +{"info/global_step": 602, "train_info/time_within_train_step": 27.75957751274109, "step": 602} +{"train_info/time_between_train_steps": 0.005376577377319336, "step": 602} +{"info/global_step": 603, "train_info/time_within_train_step": 27.713665008544922, "step": 603} +{"train_info/time_between_train_steps": 0.004990339279174805, "step": 603} +{"info/global_step": 604, "train_info/time_within_train_step": 27.711345434188843, "step": 604} +{"train_info/time_between_train_steps": 0.005063772201538086, "step": 604} +{"info/global_step": 605, "train_info/time_within_train_step": 27.70863914489746, "step": 605} +{"train_info/time_between_train_steps": 0.0052640438079833984, "step": 605} +{"info/global_step": 606, "train_info/time_within_train_step": 27.715288162231445, "step": 606} +{"train_info/time_between_train_steps": 0.0051348209381103516, "step": 606} +{"info/global_step": 607, "train_info/time_within_train_step": 27.713682174682617, "step": 607} +{"train_info/time_between_train_steps": 0.005317211151123047, "step": 607} +{"info/global_step": 608, "train_info/time_within_train_step": 27.70889687538147, "step": 608} +{"train_info/time_between_train_steps": 0.0052280426025390625, "step": 608} +{"info/global_step": 609, "train_info/time_within_train_step": 27.71346402168274, "step": 609} +{"train_info/time_between_train_steps": 0.005179643630981445, "step": 609} +{"info/global_step": 610, "train_info/time_within_train_step": 27.712467908859253, "step": 610} +{"train_info/time_between_train_steps": 0.005241870880126953, "step": 610} +{"info/global_step": 611, "train_info/time_within_train_step": 27.706255435943604, "step": 611} +{"train_info/time_between_train_steps": 0.005326986312866211, "step": 611} +{"info/global_step": 612, "train_info/time_within_train_step": 27.812878370285034, "step": 612} +{"train_info/time_between_train_steps": 0.005146026611328125, "step": 612} +{"info/global_step": 613, "train_info/time_within_train_step": 27.71316385269165, "step": 613} +{"train_info/time_between_train_steps": 0.005307912826538086, "step": 613} +{"info/global_step": 614, "train_info/time_within_train_step": 27.708442211151123, "step": 614} +{"train_info/time_between_train_steps": 0.005200624465942383, "step": 614} +{"info/global_step": 615, "train_info/time_within_train_step": 27.712831735610962, "step": 615} +{"train_info/time_between_train_steps": 0.005200624465942383, "step": 615} +{"info/global_step": 616, "train_info/time_within_train_step": 27.713005542755127, "step": 616} +{"train_info/time_between_train_steps": 0.005129098892211914, "step": 616} +{"info/global_step": 617, "train_info/time_within_train_step": 27.718692302703857, "step": 617} +{"train_info/time_between_train_steps": 0.0051424503326416016, "step": 617} +{"info/global_step": 618, "train_info/time_within_train_step": 27.71594524383545, "step": 618} +{"train_info/time_between_train_steps": 0.005438089370727539, "step": 618} +{"info/global_step": 619, "train_info/time_within_train_step": 27.722145557403564, "step": 619} +{"train_info/time_between_train_steps": 0.0055408477783203125, "step": 619} +{"info/global_step": 620, "train_info/time_within_train_step": 27.726533889770508, "step": 620} +{"train_info/time_between_train_steps": 0.005636692047119141, "step": 620} +{"info/global_step": 621, "train_info/time_within_train_step": 27.743335008621216, "step": 621} +{"train_info/time_between_train_steps": 0.0057070255279541016, "step": 621} +{"train_info/time_between_train_steps": 20.607364177703857, "step": 621} +{"info/global_step": 622, "train_info/time_within_train_step": 27.71657109260559, "step": 622} +{"train_info/time_between_train_steps": 0.005101203918457031, "step": 622} +{"info/global_step": 623, "train_info/time_within_train_step": 27.81575632095337, "step": 623} +{"train_info/time_between_train_steps": 0.005205631256103516, "step": 623} +{"info/global_step": 624, "train_info/time_within_train_step": 27.708341598510742, "step": 624} +{"train_info/time_between_train_steps": 0.005097150802612305, "step": 624} +{"info/global_step": 625, "train_info/time_within_train_step": 27.839420080184937, "step": 625} +{"train_info/time_between_train_steps": 0.005322694778442383, "step": 625} +{"info/global_step": 626, "train_info/time_within_train_step": 27.727411031723022, "step": 626} +{"train_info/time_between_train_steps": 0.005389213562011719, "step": 626} +{"info/global_step": 627, "train_info/time_within_train_step": 27.94776964187622, "step": 627} +{"train_info/time_between_train_steps": 0.005367279052734375, "step": 627} +{"info/global_step": 628, "train_info/time_within_train_step": 27.716511249542236, "step": 628} +{"train_info/time_between_train_steps": 0.005344867706298828, "step": 628} +{"info/global_step": 629, "train_info/time_within_train_step": 27.75241231918335, "step": 629} +{"train_info/time_between_train_steps": 0.005403757095336914, "step": 629} +{"info/global_step": 630, "train_info/time_within_train_step": 27.715848207473755, "step": 630} +{"train_info/time_between_train_steps": 0.005059719085693359, "step": 630} +{"info/global_step": 631, "train_info/time_within_train_step": 27.700239181518555, "step": 631} +{"train_info/time_between_train_steps": 0.005076408386230469, "step": 631} +{"info/global_step": 632, "train_info/time_within_train_step": 27.70506000518799, "step": 632} +{"train_info/time_between_train_steps": 0.005063056945800781, "step": 632} +{"info/global_step": 633, "train_info/time_within_train_step": 27.70943307876587, "step": 633} +{"train_info/time_between_train_steps": 0.0051195621490478516, "step": 633} +{"info/global_step": 634, "train_info/time_within_train_step": 27.70059895515442, "step": 634} +{"train_info/time_between_train_steps": 0.0051593780517578125, "step": 634} +{"info/global_step": 635, "train_info/time_within_train_step": 27.7137610912323, "step": 635} +{"train_info/time_between_train_steps": 0.005328655242919922, "step": 635} +{"info/global_step": 636, "train_info/time_within_train_step": 27.71393656730652, "step": 636} +{"train_info/time_between_train_steps": 0.01093745231628418, "step": 636} +{"info/global_step": 637, "train_info/time_within_train_step": 27.712388515472412, "step": 637} +{"train_info/time_between_train_steps": 0.010264158248901367, "step": 637} +{"info/global_step": 638, "train_info/time_within_train_step": 27.713550567626953, "step": 638} +{"train_info/time_between_train_steps": 0.010260820388793945, "step": 638} +{"info/global_step": 639, "train_info/time_within_train_step": 27.713217973709106, "step": 639} +{"train_info/time_between_train_steps": 0.010183095932006836, "step": 639} +{"info/global_step": 640, "train_info/time_within_train_step": 27.722036123275757, "step": 640} +{"train_info/time_between_train_steps": 0.005474567413330078, "step": 640} +{"info/global_step": 641, "train_info/time_within_train_step": 27.743147373199463, "step": 641} +{"train_info/time_between_train_steps": 0.005151987075805664, "step": 641} +{"info/global_step": 642, "train_info/time_within_train_step": 27.822325944900513, "step": 642} +{"train_info/time_between_train_steps": 0.005288124084472656, "step": 642} +{"info/global_step": 643, "train_info/time_within_train_step": 27.71394419670105, "step": 643} +{"train_info/time_between_train_steps": 0.005268573760986328, "step": 643} +{"info/global_step": 644, "train_info/time_within_train_step": 27.71914315223694, "step": 644} +{"train_info/time_between_train_steps": 0.00530242919921875, "step": 644} +{"info/global_step": 645, "train_info/time_within_train_step": 27.71682095527649, "step": 645} +{"train_info/time_between_train_steps": 0.005506753921508789, "step": 645} +{"info/global_step": 646, "train_info/time_within_train_step": 27.719871282577515, "step": 646} +{"train_info/time_between_train_steps": 0.005345582962036133, "step": 646} +{"info/global_step": 647, "train_info/time_within_train_step": 27.72776699066162, "step": 647} +{"train_info/time_between_train_steps": 0.0056111812591552734, "step": 647} +{"info/global_step": 648, "train_info/time_within_train_step": 27.738758325576782, "step": 648} +{"train_info/time_between_train_steps": 0.005819082260131836, "step": 648} +{"train_info/time_between_train_steps": 20.3405499458313, "step": 648} +{"info/global_step": 649, "train_info/time_within_train_step": 27.725836753845215, "step": 649} +{"train_info/time_between_train_steps": 0.0054738521575927734, "step": 649} +{"info/global_step": 650, "train_info/time_within_train_step": 27.855989933013916, "step": 650} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1733786236, "_runtime": 19617}, "step": 650} +{"logs": {"train/loss": 5.7948, "train/learning_rate": 0.00030555555555555555, "train/epoch": 24.0, "_timestamp": 1733786236, "_runtime": 19617}, "step": 650} +{"train_info/time_between_train_steps": 0.007226467132568359, "step": 650} +{"info/global_step": 651, "train_info/time_within_train_step": 27.721267461776733, "step": 651} +{"train_info/time_between_train_steps": 0.005258083343505859, "step": 651} +{"info/global_step": 652, "train_info/time_within_train_step": 27.853062868118286, "step": 652} +{"train_info/time_between_train_steps": 0.005333662033081055, "step": 652} +{"info/global_step": 653, "train_info/time_within_train_step": 27.727018356323242, "step": 653} +{"train_info/time_between_train_steps": 0.00551915168762207, "step": 653} +{"info/global_step": 654, "train_info/time_within_train_step": 27.854568243026733, "step": 654} +{"train_info/time_between_train_steps": 0.005257129669189453, "step": 654} +{"info/global_step": 655, "train_info/time_within_train_step": 27.75527000427246, "step": 655} +{"train_info/time_between_train_steps": 0.005318403244018555, "step": 655} +{"info/global_step": 656, "train_info/time_within_train_step": 27.74813437461853, "step": 656} +{"train_info/time_between_train_steps": 0.00532221794128418, "step": 656} +{"info/global_step": 657, "train_info/time_within_train_step": 27.82487964630127, "step": 657} +{"train_info/time_between_train_steps": 0.005124568939208984, "step": 657} +{"info/global_step": 658, "train_info/time_within_train_step": 27.70989441871643, "step": 658} +{"train_info/time_between_train_steps": 0.005183219909667969, "step": 658} +{"info/global_step": 659, "train_info/time_within_train_step": 27.711559534072876, "step": 659} +{"train_info/time_between_train_steps": 0.005219697952270508, "step": 659} +{"info/global_step": 660, "train_info/time_within_train_step": 27.71470046043396, "step": 660} +{"train_info/time_between_train_steps": 0.005318403244018555, "step": 660} +{"info/global_step": 661, "train_info/time_within_train_step": 27.714060068130493, "step": 661} +{"train_info/time_between_train_steps": 0.005311012268066406, "step": 661} +{"info/global_step": 662, "train_info/time_within_train_step": 27.714988470077515, "step": 662} +{"train_info/time_between_train_steps": 0.005238533020019531, "step": 662} +{"info/global_step": 663, "train_info/time_within_train_step": 27.716995000839233, "step": 663} +{"train_info/time_between_train_steps": 0.0052280426025390625, "step": 663} +{"info/global_step": 664, "train_info/time_within_train_step": 27.70900535583496, "step": 664} +{"train_info/time_between_train_steps": 0.005229949951171875, "step": 664} +{"info/global_step": 665, "train_info/time_within_train_step": 27.71096396446228, "step": 665} +{"train_info/time_between_train_steps": 0.0052225589752197266, "step": 665} +{"info/global_step": 666, "train_info/time_within_train_step": 27.71536636352539, "step": 666} +{"train_info/time_between_train_steps": 0.005227088928222656, "step": 666} +{"info/global_step": 667, "train_info/time_within_train_step": 27.718122005462646, "step": 667} +{"train_info/time_between_train_steps": 0.0054666996002197266, "step": 667} +{"info/global_step": 668, "train_info/time_within_train_step": 27.71695375442505, "step": 668} +{"train_info/time_between_train_steps": 0.005222320556640625, "step": 668} +{"info/global_step": 669, "train_info/time_within_train_step": 27.7179434299469, "step": 669} +{"train_info/time_between_train_steps": 0.005234479904174805, "step": 669} +{"info/global_step": 670, "train_info/time_within_train_step": 27.714807748794556, "step": 670} +{"train_info/time_between_train_steps": 0.005201816558837891, "step": 670} +{"info/global_step": 671, "train_info/time_within_train_step": 27.717024564743042, "step": 671} +{"train_info/time_between_train_steps": 0.005215644836425781, "step": 671} +{"info/global_step": 672, "train_info/time_within_train_step": 27.720632553100586, "step": 672} +{"train_info/time_between_train_steps": 0.0054967403411865234, "step": 672} +{"info/global_step": 673, "train_info/time_within_train_step": 27.828168869018555, "step": 673} +{"train_info/time_between_train_steps": 0.005404233932495117, "step": 673} +{"info/global_step": 674, "train_info/time_within_train_step": 27.7313494682312, "step": 674} +{"train_info/time_between_train_steps": 0.0056760311126708984, "step": 674} +{"info/global_step": 675, "train_info/time_within_train_step": 27.738940477371216, "step": 675} +{"train_info/time_between_train_steps": 0.005786895751953125, "step": 675} +{"train_info/time_between_train_steps": 20.484272480010986, "step": 675} +{"info/global_step": 676, "train_info/time_within_train_step": 27.70919394493103, "step": 676} +{"train_info/time_between_train_steps": 0.0050504207611083984, "step": 676} +{"info/global_step": 677, "train_info/time_within_train_step": 27.81149649620056, "step": 677} +{"train_info/time_between_train_steps": 0.005137920379638672, "step": 677} +{"info/global_step": 678, "train_info/time_within_train_step": 27.70486569404602, "step": 678} +{"train_info/time_between_train_steps": 0.005109071731567383, "step": 678} +{"info/global_step": 679, "train_info/time_within_train_step": 27.857587337493896, "step": 679} +{"train_info/time_between_train_steps": 0.005306720733642578, "step": 679} +{"info/global_step": 680, "train_info/time_within_train_step": 27.72094988822937, "step": 680} +{"train_info/time_between_train_steps": 0.0053408145904541016, "step": 680} +{"info/global_step": 681, "train_info/time_within_train_step": 27.846789598464966, "step": 681} +{"train_info/time_between_train_steps": 0.005354642868041992, "step": 681} +{"info/global_step": 682, "train_info/time_within_train_step": 27.725337982177734, "step": 682} +{"train_info/time_between_train_steps": 0.005377769470214844, "step": 682} +{"info/global_step": 683, "train_info/time_within_train_step": 27.75242805480957, "step": 683} +{"train_info/time_between_train_steps": 0.005381345748901367, "step": 683} +{"info/global_step": 684, "train_info/time_within_train_step": 27.70965051651001, "step": 684} +{"train_info/time_between_train_steps": 0.0050296783447265625, "step": 684} +{"info/global_step": 685, "train_info/time_within_train_step": 27.70731496810913, "step": 685} +{"train_info/time_between_train_steps": 0.005069255828857422, "step": 685} +{"info/global_step": 686, "train_info/time_within_train_step": 27.703362464904785, "step": 686} +{"train_info/time_between_train_steps": 0.005159139633178711, "step": 686} +{"info/global_step": 687, "train_info/time_within_train_step": 27.71030592918396, "step": 687} +{"train_info/time_between_train_steps": 0.005197048187255859, "step": 687} +{"info/global_step": 688, "train_info/time_within_train_step": 27.9756338596344, "step": 688} +{"train_info/time_between_train_steps": 0.0052149295806884766, "step": 688} +{"info/global_step": 689, "train_info/time_within_train_step": 27.70952558517456, "step": 689} +{"train_info/time_between_train_steps": 0.005257844924926758, "step": 689} +{"info/global_step": 690, "train_info/time_within_train_step": 27.709233283996582, "step": 690} +{"train_info/time_between_train_steps": 0.005185604095458984, "step": 690} +{"info/global_step": 691, "train_info/time_within_train_step": 27.712592840194702, "step": 691} +{"train_info/time_between_train_steps": 0.0051577091217041016, "step": 691} +{"info/global_step": 692, "train_info/time_within_train_step": 27.708759307861328, "step": 692} +{"train_info/time_between_train_steps": 0.005213499069213867, "step": 692} +{"info/global_step": 693, "train_info/time_within_train_step": 27.706703662872314, "step": 693} +{"train_info/time_between_train_steps": 0.0051953792572021484, "step": 693} +{"info/global_step": 694, "train_info/time_within_train_step": 27.708815813064575, "step": 694} +{"train_info/time_between_train_steps": 0.0052492618560791016, "step": 694} +{"info/global_step": 695, "train_info/time_within_train_step": 27.712486743927002, "step": 695} +{"train_info/time_between_train_steps": 0.005295276641845703, "step": 695} +{"info/global_step": 696, "train_info/time_within_train_step": 27.7090322971344, "step": 696} +{"train_info/time_between_train_steps": 0.005208015441894531, "step": 696} +{"info/global_step": 697, "train_info/time_within_train_step": 27.712116718292236, "step": 697} +{"train_info/time_between_train_steps": 0.005167961120605469, "step": 697} +{"info/global_step": 698, "train_info/time_within_train_step": 27.707263231277466, "step": 698} +{"train_info/time_between_train_steps": 0.00514674186706543, "step": 698} +{"info/global_step": 699, "train_info/time_within_train_step": 27.711621522903442, "step": 699} +{"train_info/time_between_train_steps": 0.005442619323730469, "step": 699} +{"info/global_step": 700, "train_info/time_within_train_step": 27.722087860107422, "step": 700} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1733787644, "_runtime": 21025}, "step": 700} +{"logs": {"train/loss": 5.6087, "train/learning_rate": 0.0002777777777777778, "train/epoch": 25.02, "_timestamp": 1733787644, "_runtime": 21025}, "step": 700} +{"train_info/time_between_train_steps": 2.2482388019561768, "step": 700} +{"info/global_step": 701, "train_info/time_within_train_step": 27.727149963378906, "step": 701} +{"train_info/time_between_train_steps": 0.0057353973388671875, "step": 701} +{"info/global_step": 702, "train_info/time_within_train_step": 27.73650074005127, "step": 702} +{"train_info/time_between_train_steps": 0.005633831024169922, "step": 702} +{"train_info/time_between_train_steps": 20.583053827285767, "step": 702} +{"info/global_step": 703, "train_info/time_within_train_step": 27.823906898498535, "step": 703} +{"train_info/time_between_train_steps": 0.005166530609130859, "step": 703} +{"info/global_step": 704, "train_info/time_within_train_step": 27.812964916229248, "step": 704} +{"train_info/time_between_train_steps": 0.005113124847412109, "step": 704} +{"info/global_step": 705, "train_info/time_within_train_step": 27.70541787147522, "step": 705} +{"train_info/time_between_train_steps": 0.0052263736724853516, "step": 705} +{"info/global_step": 706, "train_info/time_within_train_step": 27.82674288749695, "step": 706} +{"train_info/time_between_train_steps": 0.005339860916137695, "step": 706} +{"info/global_step": 707, "train_info/time_within_train_step": 27.729567766189575, "step": 707} +{"train_info/time_between_train_steps": 0.005471706390380859, "step": 707} +{"info/global_step": 708, "train_info/time_within_train_step": 27.858155727386475, "step": 708} +{"train_info/time_between_train_steps": 0.005360841751098633, "step": 708} +{"info/global_step": 709, "train_info/time_within_train_step": 27.721942901611328, "step": 709} +{"train_info/time_between_train_steps": 0.005292177200317383, "step": 709} +{"info/global_step": 710, "train_info/time_within_train_step": 27.7759747505188, "step": 710} +{"train_info/time_between_train_steps": 0.0054280757904052734, "step": 710} +{"info/global_step": 711, "train_info/time_within_train_step": 27.709402799606323, "step": 711} +{"train_info/time_between_train_steps": 0.0050525665283203125, "step": 711} +{"info/global_step": 712, "train_info/time_within_train_step": 27.705705642700195, "step": 712} +{"train_info/time_between_train_steps": 0.0050373077392578125, "step": 712} +{"info/global_step": 713, "train_info/time_within_train_step": 27.701940298080444, "step": 713} +{"train_info/time_between_train_steps": 0.004984140396118164, "step": 713} +{"info/global_step": 714, "train_info/time_within_train_step": 27.700701475143433, "step": 714} +{"train_info/time_between_train_steps": 0.005061626434326172, "step": 714} +{"info/global_step": 715, "train_info/time_within_train_step": 27.70499300956726, "step": 715} +{"train_info/time_between_train_steps": 0.0051937103271484375, "step": 715} +{"info/global_step": 716, "train_info/time_within_train_step": 27.706270456314087, "step": 716} +{"train_info/time_between_train_steps": 0.005315065383911133, "step": 716} +{"info/global_step": 717, "train_info/time_within_train_step": 27.71243715286255, "step": 717} +{"train_info/time_between_train_steps": 0.005292415618896484, "step": 717} +{"info/global_step": 718, "train_info/time_within_train_step": 27.715280532836914, "step": 718} +{"train_info/time_between_train_steps": 0.005202054977416992, "step": 718} +{"info/global_step": 719, "train_info/time_within_train_step": 27.81066083908081, "step": 719} +{"train_info/time_between_train_steps": 0.005167484283447266, "step": 719} +{"info/global_step": 720, "train_info/time_within_train_step": 27.710790157318115, "step": 720} +{"train_info/time_between_train_steps": 0.005116939544677734, "step": 720} +{"info/global_step": 721, "train_info/time_within_train_step": 27.7087562084198, "step": 721} +{"train_info/time_between_train_steps": 0.005136251449584961, "step": 721} +{"info/global_step": 722, "train_info/time_within_train_step": 27.711397886276245, "step": 722} +{"train_info/time_between_train_steps": 0.005283832550048828, "step": 722} +{"info/global_step": 723, "train_info/time_within_train_step": 27.715529441833496, "step": 723} +{"train_info/time_between_train_steps": 0.0053751468658447266, "step": 723} +{"info/global_step": 724, "train_info/time_within_train_step": 27.712781190872192, "step": 724} +{"train_info/time_between_train_steps": 0.005200862884521484, "step": 724} +{"info/global_step": 725, "train_info/time_within_train_step": 27.710554122924805, "step": 725} +{"train_info/time_between_train_steps": 0.005129814147949219, "step": 725} +{"info/global_step": 726, "train_info/time_within_train_step": 27.718453645706177, "step": 726} +{"train_info/time_between_train_steps": 0.005445957183837891, "step": 726} +{"info/global_step": 727, "train_info/time_within_train_step": 27.714922428131104, "step": 727} +{"train_info/time_between_train_steps": 0.005316495895385742, "step": 727} +{"info/global_step": 728, "train_info/time_within_train_step": 27.72647738456726, "step": 728} +{"train_info/time_between_train_steps": 0.005577802658081055, "step": 728} +{"info/global_step": 729, "train_info/time_within_train_step": 27.74191927909851, "step": 729} +{"train_info/time_between_train_steps": 0.0057373046875, "step": 729} +{"train_info/time_between_train_steps": 20.343745946884155, "step": 729} +{"info/global_step": 730, "train_info/time_within_train_step": 27.71099853515625, "step": 730} +{"train_info/time_between_train_steps": 0.005179166793823242, "step": 730} +{"info/global_step": 731, "train_info/time_within_train_step": 27.81301259994507, "step": 731} +{"train_info/time_between_train_steps": 0.005141019821166992, "step": 731} +{"info/global_step": 732, "train_info/time_within_train_step": 27.707287549972534, "step": 732} +{"train_info/time_between_train_steps": 0.005116701126098633, "step": 732} +{"info/global_step": 733, "train_info/time_within_train_step": 27.819475889205933, "step": 733} +{"train_info/time_between_train_steps": 0.005151033401489258, "step": 733} +{"info/global_step": 734, "train_info/time_within_train_step": 27.801271438598633, "step": 734} +{"train_info/time_between_train_steps": 0.005197286605834961, "step": 734} +{"info/global_step": 735, "train_info/time_within_train_step": 27.86262607574463, "step": 735} +{"train_info/time_between_train_steps": 0.005539894104003906, "step": 735} +{"info/global_step": 736, "train_info/time_within_train_step": 27.72641944885254, "step": 736} +{"train_info/time_between_train_steps": 0.005430936813354492, "step": 736} +{"info/global_step": 737, "train_info/time_within_train_step": 27.756712436676025, "step": 737} +{"train_info/time_between_train_steps": 0.005395412445068359, "step": 737} +{"info/global_step": 738, "train_info/time_within_train_step": 27.71564817428589, "step": 738} +{"train_info/time_between_train_steps": 0.005171060562133789, "step": 738} +{"info/global_step": 739, "train_info/time_within_train_step": 27.708048105239868, "step": 739} +{"train_info/time_between_train_steps": 0.005049228668212891, "step": 739} +{"info/global_step": 740, "train_info/time_within_train_step": 27.707250356674194, "step": 740} +{"train_info/time_between_train_steps": 0.005103111267089844, "step": 740} +{"info/global_step": 741, "train_info/time_within_train_step": 27.706034183502197, "step": 741} +{"train_info/time_between_train_steps": 0.0051746368408203125, "step": 741} +{"info/global_step": 742, "train_info/time_within_train_step": 27.710402965545654, "step": 742} +{"train_info/time_between_train_steps": 0.005103349685668945, "step": 742} +{"info/global_step": 743, "train_info/time_within_train_step": 27.709529876708984, "step": 743} +{"train_info/time_between_train_steps": 0.0050945281982421875, "step": 743} +{"info/global_step": 744, "train_info/time_within_train_step": 27.71019673347473, "step": 744} +{"train_info/time_between_train_steps": 0.005236625671386719, "step": 744} +{"info/global_step": 745, "train_info/time_within_train_step": 27.718459606170654, "step": 745} +{"train_info/time_between_train_steps": 0.005295991897583008, "step": 745} +{"info/global_step": 746, "train_info/time_within_train_step": 27.713711738586426, "step": 746} +{"train_info/time_between_train_steps": 0.00538945198059082, "step": 746} +{"info/global_step": 747, "train_info/time_within_train_step": 27.713026523590088, "step": 747} +{"train_info/time_between_train_steps": 0.005173683166503906, "step": 747} +{"info/global_step": 748, "train_info/time_within_train_step": 27.71451687812805, "step": 748} +{"train_info/time_between_train_steps": 0.0051860809326171875, "step": 748} +{"info/global_step": 749, "train_info/time_within_train_step": 27.804904222488403, "step": 749} +{"train_info/time_between_train_steps": 0.00513911247253418, "step": 749} +{"info/global_step": 750, "train_info/time_within_train_step": 27.709951400756836, "step": 750} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1733789075, "_runtime": 22456}, "step": 750} +{"logs": {"train/loss": 5.5749, "train/learning_rate": 0.00025, "train/epoch": 27.02, "_timestamp": 1733789075, "_runtime": 22456}, "step": 750} +{"train_info/time_between_train_steps": 0.006907224655151367, "step": 750} +{"info/global_step": 751, "train_info/time_within_train_step": 27.716103553771973, "step": 751} +{"train_info/time_between_train_steps": 0.005266427993774414, "step": 751} +{"info/global_step": 752, "train_info/time_within_train_step": 27.716997385025024, "step": 752} +{"train_info/time_between_train_steps": 0.005218505859375, "step": 752} +{"info/global_step": 753, "train_info/time_within_train_step": 27.719446420669556, "step": 753} +{"train_info/time_between_train_steps": 0.00563359260559082, "step": 753} +{"info/global_step": 754, "train_info/time_within_train_step": 27.72204279899597, "step": 754} +{"train_info/time_between_train_steps": 0.005505800247192383, "step": 754} +{"info/global_step": 755, "train_info/time_within_train_step": 27.725513219833374, "step": 755} +{"train_info/time_between_train_steps": 0.005609035491943359, "step": 755} +{"info/global_step": 756, "train_info/time_within_train_step": 27.739434003829956, "step": 756} +{"train_info/time_between_train_steps": 0.005630970001220703, "step": 756} +{"train_info/time_between_train_steps": 20.426955938339233, "step": 756} +{"info/global_step": 757, "train_info/time_within_train_step": 27.729626893997192, "step": 757} +{"train_info/time_between_train_steps": 0.0054438114166259766, "step": 757} +{"info/global_step": 758, "train_info/time_within_train_step": 27.858184576034546, "step": 758} +{"train_info/time_between_train_steps": 0.005381584167480469, "step": 758} +{"info/global_step": 759, "train_info/time_within_train_step": 27.71963334083557, "step": 759} +{"train_info/time_between_train_steps": 0.00531315803527832, "step": 759} +{"info/global_step": 760, "train_info/time_within_train_step": 27.84357500076294, "step": 760} +{"train_info/time_between_train_steps": 0.005425453186035156, "step": 760} +{"info/global_step": 761, "train_info/time_within_train_step": 27.730714559555054, "step": 761} +{"train_info/time_between_train_steps": 0.005571126937866211, "step": 761} +{"info/global_step": 762, "train_info/time_within_train_step": 27.829345703125, "step": 762} +{"train_info/time_between_train_steps": 0.010449886322021484, "step": 762} +{"info/global_step": 763, "train_info/time_within_train_step": 27.720138788223267, "step": 763} +{"train_info/time_between_train_steps": 0.010361909866333008, "step": 763} +{"info/global_step": 764, "train_info/time_within_train_step": 27.839035749435425, "step": 764} +{"train_info/time_between_train_steps": 0.009246349334716797, "step": 764} +{"info/global_step": 765, "train_info/time_within_train_step": 27.709754943847656, "step": 765} +{"train_info/time_between_train_steps": 0.0050394535064697266, "step": 765} +{"info/global_step": 766, "train_info/time_within_train_step": 27.702430963516235, "step": 766} +{"train_info/time_between_train_steps": 0.010311126708984375, "step": 766} +{"info/global_step": 767, "train_info/time_within_train_step": 27.708892583847046, "step": 767} +{"train_info/time_between_train_steps": 0.010244607925415039, "step": 767} +{"info/global_step": 768, "train_info/time_within_train_step": 27.70833992958069, "step": 768} +{"train_info/time_between_train_steps": 0.005339384078979492, "step": 768} +{"info/global_step": 769, "train_info/time_within_train_step": 27.708509922027588, "step": 769} +{"train_info/time_between_train_steps": 0.005108833312988281, "step": 769} +{"info/global_step": 770, "train_info/time_within_train_step": 27.706814527511597, "step": 770} +{"train_info/time_between_train_steps": 0.010429143905639648, "step": 770} +{"info/global_step": 771, "train_info/time_within_train_step": 27.708596229553223, "step": 771} +{"train_info/time_between_train_steps": 0.0051610469818115234, "step": 771} +{"info/global_step": 772, "train_info/time_within_train_step": 27.709717273712158, "step": 772} +{"train_info/time_between_train_steps": 0.010172605514526367, "step": 772} +{"info/global_step": 773, "train_info/time_within_train_step": 27.704963445663452, "step": 773} +{"train_info/time_between_train_steps": 0.0052490234375, "step": 773} +{"info/global_step": 774, "train_info/time_within_train_step": 27.704702377319336, "step": 774} +{"train_info/time_between_train_steps": 0.010315179824829102, "step": 774} +{"info/global_step": 775, "train_info/time_within_train_step": 27.712239503860474, "step": 775} +{"train_info/time_between_train_steps": 0.010195255279541016, "step": 775} +{"info/global_step": 776, "train_info/time_within_train_step": 27.710826635360718, "step": 776} +{"train_info/time_between_train_steps": 0.010623693466186523, "step": 776} +{"info/global_step": 777, "train_info/time_within_train_step": 27.707417964935303, "step": 777} +{"train_info/time_between_train_steps": 0.010400533676147461, "step": 777} +{"info/global_step": 778, "train_info/time_within_train_step": 27.72619938850403, "step": 778} +{"train_info/time_between_train_steps": 0.010149478912353516, "step": 778} +{"info/global_step": 779, "train_info/time_within_train_step": 27.710745573043823, "step": 779} +{"train_info/time_between_train_steps": 0.005258321762084961, "step": 779} +{"info/global_step": 780, "train_info/time_within_train_step": 27.835665464401245, "step": 780} +{"train_info/time_between_train_steps": 0.005376338958740234, "step": 780} +{"info/global_step": 781, "train_info/time_within_train_step": 27.72098731994629, "step": 781} +{"train_info/time_between_train_steps": 0.005284547805786133, "step": 781} +{"info/global_step": 782, "train_info/time_within_train_step": 27.72776961326599, "step": 782} +{"train_info/time_between_train_steps": 0.010768413543701172, "step": 782} +{"info/global_step": 783, "train_info/time_within_train_step": 27.740514278411865, "step": 783} +{"train_info/time_between_train_steps": 0.005561113357543945, "step": 783} +{"train_info/time_between_train_steps": 20.595609664916992, "step": 783} +{"info/global_step": 784, "train_info/time_within_train_step": 27.70769953727722, "step": 784} +{"train_info/time_between_train_steps": 0.005068778991699219, "step": 784} +{"info/global_step": 785, "train_info/time_within_train_step": 27.814242124557495, "step": 785} +{"train_info/time_between_train_steps": 0.005171298980712891, "step": 785} +{"info/global_step": 786, "train_info/time_within_train_step": 27.709964275360107, "step": 786} +{"train_info/time_between_train_steps": 0.005095958709716797, "step": 786} +{"info/global_step": 787, "train_info/time_within_train_step": 27.820482969284058, "step": 787} +{"train_info/time_between_train_steps": 0.0051839351654052734, "step": 787} +{"info/global_step": 788, "train_info/time_within_train_step": 27.71790838241577, "step": 788} +{"train_info/time_between_train_steps": 0.005510807037353516, "step": 788} +{"info/global_step": 789, "train_info/time_within_train_step": 27.862032413482666, "step": 789} +{"train_info/time_between_train_steps": 0.005400896072387695, "step": 789} +{"info/global_step": 790, "train_info/time_within_train_step": 27.719432592391968, "step": 790} +{"train_info/time_between_train_steps": 0.005329132080078125, "step": 790} +{"info/global_step": 791, "train_info/time_within_train_step": 27.761430978775024, "step": 791} +{"train_info/time_between_train_steps": 0.0054738521575927734, "step": 791} +{"info/global_step": 792, "train_info/time_within_train_step": 27.71486473083496, "step": 792} +{"train_info/time_between_train_steps": 0.005098104476928711, "step": 792} +{"info/global_step": 793, "train_info/time_within_train_step": 27.70523452758789, "step": 793} +{"train_info/time_between_train_steps": 0.005061149597167969, "step": 793} +{"info/global_step": 794, "train_info/time_within_train_step": 27.7073016166687, "step": 794} +{"train_info/time_between_train_steps": 0.005123615264892578, "step": 794} +{"info/global_step": 795, "train_info/time_within_train_step": 27.79643940925598, "step": 795} +{"train_info/time_between_train_steps": 0.005238533020019531, "step": 795} +{"info/global_step": 796, "train_info/time_within_train_step": 27.704506874084473, "step": 796} +{"train_info/time_between_train_steps": 0.010352849960327148, "step": 796} +{"info/global_step": 797, "train_info/time_within_train_step": 27.71424102783203, "step": 797} +{"train_info/time_between_train_steps": 0.011022567749023438, "step": 797} +{"info/global_step": 798, "train_info/time_within_train_step": 27.710432529449463, "step": 798} +{"train_info/time_between_train_steps": 0.005392551422119141, "step": 798} +{"info/global_step": 799, "train_info/time_within_train_step": 27.71053647994995, "step": 799} +{"train_info/time_between_train_steps": 0.005177974700927734, "step": 799} +{"info/global_step": 800, "train_info/time_within_train_step": 27.712616443634033, "step": 800} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1733790503, "_runtime": 23884}, "step": 800} +{"logs": {"train/loss": 5.4786, "train/learning_rate": 0.00022222222222222218, "train/epoch": 29.01, "_timestamp": 1733790503, "_runtime": 23884}, "step": 800} +{"train_info/time_between_train_steps": 2.3383657932281494, "step": 800} +{"info/global_step": 801, "train_info/time_within_train_step": 27.707674980163574, "step": 801} +{"train_info/time_between_train_steps": 0.005152225494384766, "step": 801} +{"info/global_step": 802, "train_info/time_within_train_step": 27.710288047790527, "step": 802} +{"train_info/time_between_train_steps": 0.005098104476928711, "step": 802} +{"info/global_step": 803, "train_info/time_within_train_step": 27.71124005317688, "step": 803} +{"train_info/time_between_train_steps": 0.005166053771972656, "step": 803} +{"info/global_step": 804, "train_info/time_within_train_step": 27.709278345108032, "step": 804} +{"train_info/time_between_train_steps": 0.005146026611328125, "step": 804} +{"info/global_step": 805, "train_info/time_within_train_step": 27.713525772094727, "step": 805} +{"train_info/time_between_train_steps": 0.005146503448486328, "step": 805} +{"info/global_step": 806, "train_info/time_within_train_step": 27.71790862083435, "step": 806} +{"train_info/time_between_train_steps": 0.005339860916137695, "step": 806} +{"info/global_step": 807, "train_info/time_within_train_step": 27.716212272644043, "step": 807} +{"train_info/time_between_train_steps": 0.010483980178833008, "step": 807} +{"info/global_step": 808, "train_info/time_within_train_step": 27.718798398971558, "step": 808} +{"train_info/time_between_train_steps": 0.010604143142700195, "step": 808} +{"info/global_step": 809, "train_info/time_within_train_step": 27.726027011871338, "step": 809} +{"train_info/time_between_train_steps": 0.010477542877197266, "step": 809} +{"info/global_step": 810, "train_info/time_within_train_step": 27.82737398147583, "step": 810} +{"train_info/time_between_train_steps": 0.01160883903503418, "step": 810} +{"train_info/time_between_train_steps": 20.33428454399109, "step": 810} +{"info/global_step": 811, "train_info/time_within_train_step": 27.72048592567444, "step": 811} +{"train_info/time_between_train_steps": 0.005472421646118164, "step": 811} +{"info/global_step": 812, "train_info/time_within_train_step": 27.857327222824097, "step": 812} +{"train_info/time_between_train_steps": 0.005354166030883789, "step": 812} +{"info/global_step": 813, "train_info/time_within_train_step": 27.723658323287964, "step": 813} +{"train_info/time_between_train_steps": 0.010537385940551758, "step": 813} +{"info/global_step": 814, "train_info/time_within_train_step": 27.844048500061035, "step": 814} +{"train_info/time_between_train_steps": 0.010542631149291992, "step": 814} +{"info/global_step": 815, "train_info/time_within_train_step": 27.72451901435852, "step": 815} +{"train_info/time_between_train_steps": 0.010607481002807617, "step": 815} +{"info/global_step": 816, "train_info/time_within_train_step": 27.842698335647583, "step": 816} +{"train_info/time_between_train_steps": 0.0052869319915771484, "step": 816} +{"info/global_step": 817, "train_info/time_within_train_step": 27.71958041191101, "step": 817} +{"train_info/time_between_train_steps": 0.005518913269042969, "step": 817} +{"info/global_step": 818, "train_info/time_within_train_step": 27.748716115951538, "step": 818} +{"train_info/time_between_train_steps": 0.0053408145904541016, "step": 818} +{"info/global_step": 819, "train_info/time_within_train_step": 27.715468168258667, "step": 819} +{"train_info/time_between_train_steps": 0.00507664680480957, "step": 819} +{"info/global_step": 820, "train_info/time_within_train_step": 27.703967094421387, "step": 820} +{"train_info/time_between_train_steps": 0.005206584930419922, "step": 820} +{"info/global_step": 821, "train_info/time_within_train_step": 27.715110063552856, "step": 821} +{"train_info/time_between_train_steps": 0.005280971527099609, "step": 821} +{"info/global_step": 822, "train_info/time_within_train_step": 27.71424436569214, "step": 822} +{"train_info/time_between_train_steps": 0.005375862121582031, "step": 822} +{"info/global_step": 823, "train_info/time_within_train_step": 27.71112561225891, "step": 823} +{"train_info/time_between_train_steps": 0.005225419998168945, "step": 823} +{"info/global_step": 824, "train_info/time_within_train_step": 27.711541175842285, "step": 824} +{"train_info/time_between_train_steps": 0.005158424377441406, "step": 824} +{"info/global_step": 825, "train_info/time_within_train_step": 27.804274320602417, "step": 825} +{"train_info/time_between_train_steps": 0.005189418792724609, "step": 825} +{"info/global_step": 826, "train_info/time_within_train_step": 27.70910406112671, "step": 826} +{"train_info/time_between_train_steps": 0.005238771438598633, "step": 826} +{"info/global_step": 827, "train_info/time_within_train_step": 27.7127525806427, "step": 827} +{"train_info/time_between_train_steps": 0.005201578140258789, "step": 827} +{"info/global_step": 828, "train_info/time_within_train_step": 27.709790468215942, "step": 828} +{"train_info/time_between_train_steps": 0.005311250686645508, "step": 828} +{"info/global_step": 829, "train_info/time_within_train_step": 27.715779781341553, "step": 829} +{"train_info/time_between_train_steps": 0.005263328552246094, "step": 829} +{"info/global_step": 830, "train_info/time_within_train_step": 27.7187237739563, "step": 830} +{"train_info/time_between_train_steps": 0.005297183990478516, "step": 830} +{"info/global_step": 831, "train_info/time_within_train_step": 27.71279215812683, "step": 831} +{"train_info/time_between_train_steps": 0.0053064823150634766, "step": 831} +{"info/global_step": 832, "train_info/time_within_train_step": 27.713099718093872, "step": 832} +{"train_info/time_between_train_steps": 0.005251407623291016, "step": 832} +{"info/global_step": 833, "train_info/time_within_train_step": 27.720495223999023, "step": 833} +{"train_info/time_between_train_steps": 0.005221843719482422, "step": 833} +{"info/global_step": 834, "train_info/time_within_train_step": 27.71812915802002, "step": 834} +{"train_info/time_between_train_steps": 0.005320310592651367, "step": 834} +{"info/global_step": 835, "train_info/time_within_train_step": 27.720616817474365, "step": 835} +{"train_info/time_between_train_steps": 0.005514860153198242, "step": 835} +{"info/global_step": 836, "train_info/time_within_train_step": 27.735843420028687, "step": 836} +{"train_info/time_between_train_steps": 0.005868673324584961, "step": 836} +{"info/global_step": 837, "train_info/time_within_train_step": 27.741660594940186, "step": 837} +{"train_info/time_between_train_steps": 0.00575566291809082, "step": 837} +{"train_info/time_between_train_steps": 20.44518280029297, "step": 837} +{"info/global_step": 838, "train_info/time_within_train_step": 27.71090531349182, "step": 838} +{"train_info/time_between_train_steps": 0.005184650421142578, "step": 838} +{"info/global_step": 839, "train_info/time_within_train_step": 27.81063723564148, "step": 839} +{"train_info/time_between_train_steps": 0.005167484283447266, "step": 839} +{"info/global_step": 840, "train_info/time_within_train_step": 27.799445629119873, "step": 840} +{"train_info/time_between_train_steps": 0.005091190338134766, "step": 840} +{"info/global_step": 841, "train_info/time_within_train_step": 27.82066512107849, "step": 841} +{"train_info/time_between_train_steps": 0.005136728286743164, "step": 841} +{"info/global_step": 842, "train_info/time_within_train_step": 27.70643639564514, "step": 842} +{"train_info/time_between_train_steps": 0.005394697189331055, "step": 842} +{"info/global_step": 843, "train_info/time_within_train_step": 27.862022399902344, "step": 843} +{"train_info/time_between_train_steps": 0.005361318588256836, "step": 843} +{"info/global_step": 844, "train_info/time_within_train_step": 27.723599672317505, "step": 844} +{"train_info/time_between_train_steps": 0.005439281463623047, "step": 844} +{"info/global_step": 845, "train_info/time_within_train_step": 27.755582809448242, "step": 845} +{"train_info/time_between_train_steps": 0.005339622497558594, "step": 845} +{"info/global_step": 846, "train_info/time_within_train_step": 27.722445249557495, "step": 846} +{"train_info/time_between_train_steps": 0.00516057014465332, "step": 846} +{"info/global_step": 847, "train_info/time_within_train_step": 27.709300756454468, "step": 847} +{"train_info/time_between_train_steps": 0.0050847530364990234, "step": 847} +{"info/global_step": 848, "train_info/time_within_train_step": 27.704508066177368, "step": 848} +{"train_info/time_between_train_steps": 0.005070209503173828, "step": 848} +{"info/global_step": 849, "train_info/time_within_train_step": 27.707288026809692, "step": 849} +{"train_info/time_between_train_steps": 0.005128622055053711, "step": 849} +{"info/global_step": 850, "train_info/time_within_train_step": 27.705979108810425, "step": 850} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1733791933, "_runtime": 25314}, "step": 850} +{"logs": {"train/loss": 5.3877, "train/learning_rate": 0.00019444444444444443, "train/epoch": 31.01, "_timestamp": 1733791933, "_runtime": 25314}, "step": 850} +{"train_info/time_between_train_steps": 0.0068111419677734375, "step": 850} +{"info/global_step": 851, "train_info/time_within_train_step": 27.70562243461609, "step": 851} +{"train_info/time_between_train_steps": 0.00504612922668457, "step": 851} +{"info/global_step": 852, "train_info/time_within_train_step": 27.7128586769104, "step": 852} +{"train_info/time_between_train_steps": 0.005222797393798828, "step": 852} +{"info/global_step": 853, "train_info/time_within_train_step": 27.71263837814331, "step": 853} +{"train_info/time_between_train_steps": 0.005182981491088867, "step": 853} +{"info/global_step": 854, "train_info/time_within_train_step": 27.713521242141724, "step": 854} +{"train_info/time_between_train_steps": 0.005314350128173828, "step": 854} +{"info/global_step": 855, "train_info/time_within_train_step": 27.715628147125244, "step": 855} +{"train_info/time_between_train_steps": 0.005186319351196289, "step": 855} +{"info/global_step": 856, "train_info/time_within_train_step": 27.80452251434326, "step": 856} +{"train_info/time_between_train_steps": 0.005207538604736328, "step": 856} +{"info/global_step": 857, "train_info/time_within_train_step": 27.71081519126892, "step": 857} +{"train_info/time_between_train_steps": 0.010307788848876953, "step": 857} +{"info/global_step": 858, "train_info/time_within_train_step": 27.71207356452942, "step": 858} +{"train_info/time_between_train_steps": 0.010320186614990234, "step": 858} +{"info/global_step": 859, "train_info/time_within_train_step": 27.714695692062378, "step": 859} +{"train_info/time_between_train_steps": 0.010441303253173828, "step": 859} +{"info/global_step": 860, "train_info/time_within_train_step": 27.72012495994568, "step": 860} +{"train_info/time_between_train_steps": 0.005358457565307617, "step": 860} +{"info/global_step": 861, "train_info/time_within_train_step": 27.7236328125, "step": 861} +{"train_info/time_between_train_steps": 0.005481243133544922, "step": 861} +{"info/global_step": 862, "train_info/time_within_train_step": 27.717998504638672, "step": 862} +{"train_info/time_between_train_steps": 0.0052988529205322266, "step": 862} +{"info/global_step": 863, "train_info/time_within_train_step": 27.723294258117676, "step": 863} +{"train_info/time_between_train_steps": 0.010696649551391602, "step": 863} +{"info/global_step": 864, "train_info/time_within_train_step": 27.75514054298401, "step": 864} +{"train_info/time_between_train_steps": 0.005484580993652344, "step": 864} +{"train_info/time_between_train_steps": 20.5726900100708, "step": 864} +{"info/global_step": 865, "train_info/time_within_train_step": 27.738317012786865, "step": 865} +{"train_info/time_between_train_steps": 0.005490779876708984, "step": 865} +{"info/global_step": 866, "train_info/time_within_train_step": 27.87927484512329, "step": 866} +{"train_info/time_between_train_steps": 0.00543212890625, "step": 866} +{"info/global_step": 867, "train_info/time_within_train_step": 27.743062257766724, "step": 867} +{"train_info/time_between_train_steps": 0.005254030227661133, "step": 867} +{"info/global_step": 868, "train_info/time_within_train_step": 27.922926425933838, "step": 868} +{"train_info/time_between_train_steps": 0.005395650863647461, "step": 868} +{"info/global_step": 869, "train_info/time_within_train_step": 27.731603860855103, "step": 869} +{"train_info/time_between_train_steps": 0.00547027587890625, "step": 869} +{"info/global_step": 870, "train_info/time_within_train_step": 27.856475114822388, "step": 870} +{"train_info/time_between_train_steps": 0.005249500274658203, "step": 870} +{"info/global_step": 871, "train_info/time_within_train_step": 27.81139588356018, "step": 871} +{"train_info/time_between_train_steps": 0.005386829376220703, "step": 871} +{"info/global_step": 872, "train_info/time_within_train_step": 27.762829542160034, "step": 872} +{"train_info/time_between_train_steps": 0.005400180816650391, "step": 872} +{"info/global_step": 873, "train_info/time_within_train_step": 27.862295627593994, "step": 873} +{"train_info/time_between_train_steps": 0.005052804946899414, "step": 873} +{"info/global_step": 874, "train_info/time_within_train_step": 27.717337608337402, "step": 874} +{"train_info/time_between_train_steps": 0.005173921585083008, "step": 874} +{"info/global_step": 875, "train_info/time_within_train_step": 27.724077701568604, "step": 875} +{"train_info/time_between_train_steps": 0.005385637283325195, "step": 875} +{"info/global_step": 876, "train_info/time_within_train_step": 27.711233139038086, "step": 876} +{"train_info/time_between_train_steps": 0.0052263736724853516, "step": 876} +{"info/global_step": 877, "train_info/time_within_train_step": 27.71998691558838, "step": 877} +{"train_info/time_between_train_steps": 0.005141735076904297, "step": 877} +{"info/global_step": 878, "train_info/time_within_train_step": 27.717175245285034, "step": 878} +{"train_info/time_between_train_steps": 0.005527496337890625, "step": 878} +{"info/global_step": 879, "train_info/time_within_train_step": 27.718796014785767, "step": 879} +{"train_info/time_between_train_steps": 0.005221366882324219, "step": 879} +{"info/global_step": 880, "train_info/time_within_train_step": 27.718549728393555, "step": 880} +{"train_info/time_between_train_steps": 0.005204916000366211, "step": 880} +{"info/global_step": 881, "train_info/time_within_train_step": 27.71528172492981, "step": 881} +{"train_info/time_between_train_steps": 0.005410671234130859, "step": 881} +{"info/global_step": 882, "train_info/time_within_train_step": 27.72004532814026, "step": 882} +{"train_info/time_between_train_steps": 0.005238533020019531, "step": 882} +{"info/global_step": 883, "train_info/time_within_train_step": 27.725552082061768, "step": 883} +{"train_info/time_between_train_steps": 0.005188703536987305, "step": 883} +{"info/global_step": 884, "train_info/time_within_train_step": 27.71218252182007, "step": 884} +{"train_info/time_between_train_steps": 0.005150794982910156, "step": 884} +{"info/global_step": 885, "train_info/time_within_train_step": 27.71878743171692, "step": 885} +{"train_info/time_between_train_steps": 0.00515294075012207, "step": 885} +{"info/global_step": 886, "train_info/time_within_train_step": 27.71459674835205, "step": 886} +{"train_info/time_between_train_steps": 0.005214214324951172, "step": 886} +{"info/global_step": 887, "train_info/time_within_train_step": 27.818555116653442, "step": 887} +{"train_info/time_between_train_steps": 0.0052225589752197266, "step": 887} +{"info/global_step": 888, "train_info/time_within_train_step": 27.7254478931427, "step": 888} +{"train_info/time_between_train_steps": 0.00537562370300293, "step": 888} +{"info/global_step": 889, "train_info/time_within_train_step": 27.72662901878357, "step": 889} +{"train_info/time_between_train_steps": 0.005426168441772461, "step": 889} +{"info/global_step": 890, "train_info/time_within_train_step": 27.731294631958008, "step": 890} +{"train_info/time_between_train_steps": 0.005697011947631836, "step": 890} +{"info/global_step": 891, "train_info/time_within_train_step": 27.74392342567444, "step": 891} +{"train_info/time_between_train_steps": 0.005918979644775391, "step": 891} +{"train_info/time_between_train_steps": 20.694220542907715, "step": 891} +{"info/global_step": 892, "train_info/time_within_train_step": 27.72251057624817, "step": 892} +{"train_info/time_between_train_steps": 0.0051422119140625, "step": 892} +{"info/global_step": 893, "train_info/time_within_train_step": 27.856131553649902, "step": 893} +{"train_info/time_between_train_steps": 0.005264997482299805, "step": 893} +{"info/global_step": 894, "train_info/time_within_train_step": 27.730921983718872, "step": 894} +{"train_info/time_between_train_steps": 0.005330801010131836, "step": 894} +{"info/global_step": 895, "train_info/time_within_train_step": 27.83579182624817, "step": 895} +{"train_info/time_between_train_steps": 0.005349159240722656, "step": 895} +{"info/global_step": 896, "train_info/time_within_train_step": 27.734115600585938, "step": 896} +{"train_info/time_between_train_steps": 0.0053904056549072266, "step": 896} +{"info/global_step": 897, "train_info/time_within_train_step": 27.841628074645996, "step": 897} +{"train_info/time_between_train_steps": 0.005380868911743164, "step": 897} +{"info/global_step": 898, "train_info/time_within_train_step": 27.72293210029602, "step": 898} +{"train_info/time_between_train_steps": 0.005307435989379883, "step": 898} +{"info/global_step": 899, "train_info/time_within_train_step": 27.74983859062195, "step": 899} +{"train_info/time_between_train_steps": 0.010775566101074219, "step": 899} +{"info/global_step": 900, "train_info/time_within_train_step": 27.71818995475769, "step": 900} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1733793362, "_runtime": 26743}, "step": 900} +{"logs": {"train/loss": 5.3031, "train/learning_rate": 0.00016666666666666666, "train/epoch": 33.01, "_timestamp": 1733793362, "_runtime": 26743}, "step": 900} +{"train_info/time_between_train_steps": 2.3128349781036377, "step": 900} +{"info/global_step": 901, "train_info/time_within_train_step": 27.711379051208496, "step": 901} +{"train_info/time_between_train_steps": 0.005078315734863281, "step": 901} +{"info/global_step": 902, "train_info/time_within_train_step": 27.823373317718506, "step": 902} +{"train_info/time_between_train_steps": 0.0052568912506103516, "step": 902} +{"info/global_step": 903, "train_info/time_within_train_step": 27.723978519439697, "step": 903} +{"train_info/time_between_train_steps": 0.005383014678955078, "step": 903} +{"info/global_step": 904, "train_info/time_within_train_step": 27.744813680648804, "step": 904} +{"train_info/time_between_train_steps": 0.006250143051147461, "step": 904} +{"info/global_step": 905, "train_info/time_within_train_step": 27.72986125946045, "step": 905} +{"train_info/time_between_train_steps": 0.005467653274536133, "step": 905} +{"info/global_step": 906, "train_info/time_within_train_step": 27.728463172912598, "step": 906} +{"train_info/time_between_train_steps": 0.005395650863647461, "step": 906} +{"info/global_step": 907, "train_info/time_within_train_step": 27.720160961151123, "step": 907} +{"train_info/time_between_train_steps": 0.005337953567504883, "step": 907} +{"info/global_step": 908, "train_info/time_within_train_step": 27.71186900138855, "step": 908} +{"train_info/time_between_train_steps": 0.005262851715087891, "step": 908} +{"info/global_step": 909, "train_info/time_within_train_step": 27.7156982421875, "step": 909} +{"train_info/time_between_train_steps": 0.0053942203521728516, "step": 909} +{"info/global_step": 910, "train_info/time_within_train_step": 27.715722799301147, "step": 910} +{"train_info/time_between_train_steps": 0.005174160003662109, "step": 910} +{"info/global_step": 911, "train_info/time_within_train_step": 27.708389282226562, "step": 911} +{"train_info/time_between_train_steps": 0.0063631534576416016, "step": 911} +{"info/global_step": 912, "train_info/time_within_train_step": 27.71051287651062, "step": 912} +{"train_info/time_between_train_steps": 0.00560307502746582, "step": 912} +{"info/global_step": 913, "train_info/time_within_train_step": 27.726656913757324, "step": 913} +{"train_info/time_between_train_steps": 0.005303382873535156, "step": 913} +{"info/global_step": 914, "train_info/time_within_train_step": 27.713444232940674, "step": 914} +{"train_info/time_between_train_steps": 0.00528264045715332, "step": 914} +{"info/global_step": 915, "train_info/time_within_train_step": 27.719370365142822, "step": 915} +{"train_info/time_between_train_steps": 0.005335569381713867, "step": 915} +{"info/global_step": 916, "train_info/time_within_train_step": 27.732205152511597, "step": 916} +{"train_info/time_between_train_steps": 0.00537419319152832, "step": 916} +{"info/global_step": 917, "train_info/time_within_train_step": 27.81624722480774, "step": 917} +{"train_info/time_between_train_steps": 0.005586862564086914, "step": 917} +{"info/global_step": 918, "train_info/time_within_train_step": 27.75248908996582, "step": 918} +{"train_info/time_between_train_steps": 0.005774497985839844, "step": 918} +{"train_info/time_between_train_steps": 20.325047731399536, "step": 918} +{"info/global_step": 919, "train_info/time_within_train_step": 27.71854829788208, "step": 919} +{"train_info/time_between_train_steps": 0.005185365676879883, "step": 919} +{"info/global_step": 920, "train_info/time_within_train_step": 27.823044538497925, "step": 920} +{"train_info/time_between_train_steps": 0.005135297775268555, "step": 920} +{"info/global_step": 921, "train_info/time_within_train_step": 27.713748693466187, "step": 921} +{"train_info/time_between_train_steps": 0.0054111480712890625, "step": 921} +{"info/global_step": 922, "train_info/time_within_train_step": 27.86349630355835, "step": 922} +{"train_info/time_between_train_steps": 0.005366086959838867, "step": 922} +{"info/global_step": 923, "train_info/time_within_train_step": 27.727901935577393, "step": 923} +{"train_info/time_between_train_steps": 0.005488872528076172, "step": 923} +{"info/global_step": 924, "train_info/time_within_train_step": 27.9730122089386, "step": 924} +{"train_info/time_between_train_steps": 0.0056378841400146484, "step": 924} +{"info/global_step": 925, "train_info/time_within_train_step": 27.79627299308777, "step": 925} +{"train_info/time_between_train_steps": 0.005930900573730469, "step": 925} +{"info/global_step": 926, "train_info/time_within_train_step": 27.879438161849976, "step": 926} +{"train_info/time_between_train_steps": 0.005875110626220703, "step": 926} +{"info/global_step": 927, "train_info/time_within_train_step": 27.761509656906128, "step": 927} +{"train_info/time_between_train_steps": 0.005412578582763672, "step": 927} +{"info/global_step": 928, "train_info/time_within_train_step": 27.75823950767517, "step": 928} +{"train_info/time_between_train_steps": 0.011960983276367188, "step": 928} +{"info/global_step": 929, "train_info/time_within_train_step": 27.76857900619507, "step": 929} +{"train_info/time_between_train_steps": 0.011620521545410156, "step": 929} +{"info/global_step": 930, "train_info/time_within_train_step": 27.762378931045532, "step": 930} +{"train_info/time_between_train_steps": 0.0052700042724609375, "step": 930} +{"info/global_step": 931, "train_info/time_within_train_step": 27.740817070007324, "step": 931} +{"train_info/time_between_train_steps": 0.005373477935791016, "step": 931} +{"info/global_step": 932, "train_info/time_within_train_step": 27.899746894836426, "step": 932} +{"train_info/time_between_train_steps": 0.005509138107299805, "step": 932} +{"info/global_step": 933, "train_info/time_within_train_step": 27.75979709625244, "step": 933} +{"train_info/time_between_train_steps": 0.005361080169677734, "step": 933} +{"info/global_step": 934, "train_info/time_within_train_step": 27.74633002281189, "step": 934} +{"train_info/time_between_train_steps": 0.010348796844482422, "step": 934} +{"info/global_step": 935, "train_info/time_within_train_step": 27.740569353103638, "step": 935} +{"train_info/time_between_train_steps": 0.010508537292480469, "step": 935} +{"info/global_step": 936, "train_info/time_within_train_step": 27.740073442459106, "step": 936} +{"train_info/time_between_train_steps": 0.0053637027740478516, "step": 936} +{"info/global_step": 937, "train_info/time_within_train_step": 27.747174501419067, "step": 937} +{"train_info/time_between_train_steps": 0.0054776668548583984, "step": 937} +{"info/global_step": 938, "train_info/time_within_train_step": 27.78195548057556, "step": 938} +{"train_info/time_between_train_steps": 0.012047290802001953, "step": 938} +{"info/global_step": 939, "train_info/time_within_train_step": 27.753451347351074, "step": 939} +{"train_info/time_between_train_steps": 0.005415678024291992, "step": 939} +{"info/global_step": 940, "train_info/time_within_train_step": 27.76328468322754, "step": 940} +{"train_info/time_between_train_steps": 0.005534172058105469, "step": 940} +{"info/global_step": 941, "train_info/time_within_train_step": 27.7477126121521, "step": 941} +{"train_info/time_between_train_steps": 0.0052602291107177734, "step": 941} +{"info/global_step": 942, "train_info/time_within_train_step": 27.758410930633545, "step": 942} +{"train_info/time_between_train_steps": 0.006340503692626953, "step": 942} +{"info/global_step": 943, "train_info/time_within_train_step": 27.7735116481781, "step": 943} +{"train_info/time_between_train_steps": 0.005578041076660156, "step": 943} +{"info/global_step": 944, "train_info/time_within_train_step": 27.776403665542603, "step": 944} +{"train_info/time_between_train_steps": 0.005496978759765625, "step": 944} +{"info/global_step": 945, "train_info/time_within_train_step": 27.770196437835693, "step": 945} +{"train_info/time_between_train_steps": 0.005621433258056641, "step": 945} +{"train_info/time_between_train_steps": 20.6078884601593, "step": 945} +{"info/global_step": 946, "train_info/time_within_train_step": 27.77573013305664, "step": 946} +{"train_info/time_between_train_steps": 0.005971670150756836, "step": 946} +{"info/global_step": 947, "train_info/time_within_train_step": 28.074360847473145, "step": 947} +{"train_info/time_between_train_steps": 0.0057201385498046875, "step": 947} +{"info/global_step": 948, "train_info/time_within_train_step": 27.82479691505432, "step": 948} +{"train_info/time_between_train_steps": 0.005433082580566406, "step": 948} +{"info/global_step": 949, "train_info/time_within_train_step": 27.93803095817566, "step": 949} +{"train_info/time_between_train_steps": 0.005591630935668945, "step": 949} +{"info/global_step": 950, "train_info/time_within_train_step": 27.79521083831787, "step": 950} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1733794794, "_runtime": 28175}, "step": 950} +{"logs": {"train/loss": 5.2301, "train/learning_rate": 0.0001388888888888889, "train/epoch": 35.0, "_timestamp": 1733794794, "_runtime": 28175}, "step": 950} +{"train_info/time_between_train_steps": 0.007996320724487305, "step": 950} +{"info/global_step": 951, "train_info/time_within_train_step": 27.92046880722046, "step": 951} +{"train_info/time_between_train_steps": 0.005840778350830078, "step": 951} +{"info/global_step": 952, "train_info/time_within_train_step": 27.773765563964844, "step": 952} +{"train_info/time_between_train_steps": 0.005754947662353516, "step": 952} +{"info/global_step": 953, "train_info/time_within_train_step": 27.821911334991455, "step": 953} +{"train_info/time_between_train_steps": 0.005495786666870117, "step": 953} +{"info/global_step": 954, "train_info/time_within_train_step": 27.744524717330933, "step": 954} +{"train_info/time_between_train_steps": 0.005181074142456055, "step": 954} +{"info/global_step": 955, "train_info/time_within_train_step": 27.746463537216187, "step": 955} +{"train_info/time_between_train_steps": 0.005190849304199219, "step": 955} +{"info/global_step": 956, "train_info/time_within_train_step": 27.961994171142578, "step": 956} +{"train_info/time_between_train_steps": 0.005338191986083984, "step": 956} +{"info/global_step": 957, "train_info/time_within_train_step": 27.76461124420166, "step": 957} +{"train_info/time_between_train_steps": 0.005162477493286133, "step": 957} +{"info/global_step": 958, "train_info/time_within_train_step": 27.740721464157104, "step": 958} +{"train_info/time_between_train_steps": 0.005562543869018555, "step": 958} +{"info/global_step": 959, "train_info/time_within_train_step": 27.739822387695312, "step": 959} +{"train_info/time_between_train_steps": 0.00535273551940918, "step": 959} +{"info/global_step": 960, "train_info/time_within_train_step": 27.742841482162476, "step": 960} +{"train_info/time_between_train_steps": 0.0053730010986328125, "step": 960} +{"info/global_step": 961, "train_info/time_within_train_step": 27.747514963150024, "step": 961} +{"train_info/time_between_train_steps": 0.005506992340087891, "step": 961} +{"info/global_step": 962, "train_info/time_within_train_step": 27.770897388458252, "step": 962} +{"train_info/time_between_train_steps": 0.005518674850463867, "step": 962} +{"info/global_step": 963, "train_info/time_within_train_step": 27.85232973098755, "step": 963} +{"train_info/time_between_train_steps": 0.005341768264770508, "step": 963} +{"info/global_step": 964, "train_info/time_within_train_step": 27.76482081413269, "step": 964} +{"train_info/time_between_train_steps": 0.005445241928100586, "step": 964} +{"info/global_step": 965, "train_info/time_within_train_step": 27.73627281188965, "step": 965} +{"train_info/time_between_train_steps": 0.0052411556243896484, "step": 965} +{"info/global_step": 966, "train_info/time_within_train_step": 27.756598472595215, "step": 966} +{"train_info/time_between_train_steps": 0.0054128170013427734, "step": 966} +{"info/global_step": 967, "train_info/time_within_train_step": 27.821573972702026, "step": 967} +{"train_info/time_between_train_steps": 0.005594015121459961, "step": 967} +{"info/global_step": 968, "train_info/time_within_train_step": 27.754364728927612, "step": 968} +{"train_info/time_between_train_steps": 0.005769014358520508, "step": 968} +{"info/global_step": 969, "train_info/time_within_train_step": 27.75213098526001, "step": 969} +{"train_info/time_between_train_steps": 0.005655765533447266, "step": 969} +{"info/global_step": 970, "train_info/time_within_train_step": 27.75457739830017, "step": 970} +{"train_info/time_between_train_steps": 0.005613088607788086, "step": 970} +{"info/global_step": 971, "train_info/time_within_train_step": 27.754677057266235, "step": 971} +{"train_info/time_between_train_steps": 0.005723237991333008, "step": 971} +{"info/global_step": 972, "train_info/time_within_train_step": 27.7614803314209, "step": 972} +{"train_info/time_between_train_steps": 0.005797863006591797, "step": 972} +{"train_info/time_between_train_steps": 20.725215196609497, "step": 972} +{"info/global_step": 973, "train_info/time_within_train_step": 27.744470596313477, "step": 973} +{"train_info/time_between_train_steps": 0.005174398422241211, "step": 973} +{"info/global_step": 974, "train_info/time_within_train_step": 27.84145188331604, "step": 974} +{"train_info/time_between_train_steps": 0.005268096923828125, "step": 974} +{"info/global_step": 975, "train_info/time_within_train_step": 27.782726764678955, "step": 975} +{"train_info/time_between_train_steps": 0.005945920944213867, "step": 975} +{"info/global_step": 976, "train_info/time_within_train_step": 27.941328048706055, "step": 976} +{"train_info/time_between_train_steps": 0.005614757537841797, "step": 976} +{"info/global_step": 977, "train_info/time_within_train_step": 27.760984182357788, "step": 977} +{"train_info/time_between_train_steps": 0.005478382110595703, "step": 977} +{"info/global_step": 978, "train_info/time_within_train_step": 28.00813102722168, "step": 978} +{"train_info/time_between_train_steps": 0.0055730342864990234, "step": 978} +{"info/global_step": 979, "train_info/time_within_train_step": 27.74243927001953, "step": 979} +{"train_info/time_between_train_steps": 0.005778789520263672, "step": 979} +{"info/global_step": 980, "train_info/time_within_train_step": 27.78297710418701, "step": 980} +{"train_info/time_between_train_steps": 0.005359172821044922, "step": 980} +{"info/global_step": 981, "train_info/time_within_train_step": 27.721980810165405, "step": 981} +{"train_info/time_between_train_steps": 0.005311012268066406, "step": 981} +{"info/global_step": 982, "train_info/time_within_train_step": 27.713310956954956, "step": 982} +{"train_info/time_between_train_steps": 0.005204200744628906, "step": 982} +{"info/global_step": 983, "train_info/time_within_train_step": 27.7063729763031, "step": 983} +{"train_info/time_between_train_steps": 0.005177497863769531, "step": 983} +{"info/global_step": 984, "train_info/time_within_train_step": 27.713244199752808, "step": 984} +{"train_info/time_between_train_steps": 0.005157947540283203, "step": 984} +{"info/global_step": 985, "train_info/time_within_train_step": 27.712939977645874, "step": 985} +{"train_info/time_between_train_steps": 0.005412101745605469, "step": 985} +{"info/global_step": 986, "train_info/time_within_train_step": 27.71129846572876, "step": 986} +{"train_info/time_between_train_steps": 0.005219221115112305, "step": 986} +{"info/global_step": 987, "train_info/time_within_train_step": 27.71236300468445, "step": 987} +{"train_info/time_between_train_steps": 0.005188703536987305, "step": 987} +{"info/global_step": 988, "train_info/time_within_train_step": 27.712196111679077, "step": 988} +{"train_info/time_between_train_steps": 0.005225658416748047, "step": 988} +{"info/global_step": 989, "train_info/time_within_train_step": 27.70950984954834, "step": 989} +{"train_info/time_between_train_steps": 0.005205631256103516, "step": 989} +{"info/global_step": 990, "train_info/time_within_train_step": 27.719724893569946, "step": 990} +{"train_info/time_between_train_steps": 0.005128383636474609, "step": 990} +{"info/global_step": 991, "train_info/time_within_train_step": 27.71013617515564, "step": 991} +{"train_info/time_between_train_steps": 0.005198955535888672, "step": 991} +{"info/global_step": 992, "train_info/time_within_train_step": 27.708768129348755, "step": 992} +{"train_info/time_between_train_steps": 0.0052073001861572266, "step": 992} +{"info/global_step": 993, "train_info/time_within_train_step": 27.717350721359253, "step": 993} +{"train_info/time_between_train_steps": 0.005298614501953125, "step": 993} +{"info/global_step": 994, "train_info/time_within_train_step": 27.839568614959717, "step": 994} +{"train_info/time_between_train_steps": 0.00513911247253418, "step": 994} +{"info/global_step": 995, "train_info/time_within_train_step": 27.715038537979126, "step": 995} +{"train_info/time_between_train_steps": 0.005128145217895508, "step": 995} +{"info/global_step": 996, "train_info/time_within_train_step": 27.79600429534912, "step": 996} +{"train_info/time_between_train_steps": 0.005433559417724609, "step": 996} +{"info/global_step": 997, "train_info/time_within_train_step": 27.716649055480957, "step": 997} +{"train_info/time_between_train_steps": 0.005323886871337891, "step": 997} +{"info/global_step": 998, "train_info/time_within_train_step": 27.733755588531494, "step": 998} +{"train_info/time_between_train_steps": 0.005895853042602539, "step": 998} +{"info/global_step": 999, "train_info/time_within_train_step": 27.73686718940735, "step": 999} +{"train_info/time_between_train_steps": 0.005820035934448242, "step": 999} +{"train_info/time_between_train_steps": 20.375328302383423, "step": 999} +{"info/global_step": 1000, "train_info/time_within_train_step": 27.713494062423706, "step": 1000} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 22626.0, "train_info/memory_max_reserved": 22626.0, "_timestamp": 1733796224, "_runtime": 29605}, "step": 1000} +{"logs": {"train/loss": 5.1653, "train/learning_rate": 0.00011111111111111109, "train/epoch": 37.0, "_timestamp": 1733796224, "_runtime": 29605}, "step": 1000} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1733796226, "_runtime": 29607}, "step": 1000} +{"logs": {"eval/loss": 5.702059268951416, "eval/runtime": 1.9567, "eval/samples_per_second": 51.105, "eval/steps_per_second": 3.577, "train/epoch": 37.0, "_timestamp": 1733796226, "_runtime": 29607}, "step": 1000} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1733796226, "_runtime": 29607}, "step": 1000} +{"logs": {"eval//local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_loss": 5.702059268951416, "eval//local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_ppl": 299.4834834485122, "eval//local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_runtime": 1.9567, "eval//local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_samples_per_second": 51.105, "train/epoch": 37.0, "_timestamp": 1733796226, "_runtime": 29607}, "step": 1000} +{"train_info/time_between_train_steps": 4.208975315093994, "step": 1000} +{"info/global_step": 1001, "train_info/time_within_train_step": 27.865995407104492, "step": 1001} +{"train_info/time_between_train_steps": 0.005201578140258789, "step": 1001} +{"info/global_step": 1002, "train_info/time_within_train_step": 27.706790924072266, "step": 1002} +{"train_info/time_between_train_steps": 0.005058765411376953, "step": 1002} +{"info/global_step": 1003, "train_info/time_within_train_step": 27.84881615638733, "step": 1003} +{"train_info/time_between_train_steps": 0.005249977111816406, "step": 1003} +{"info/global_step": 1004, "train_info/time_within_train_step": 27.723446369171143, "step": 1004} +{"train_info/time_between_train_steps": 0.00538325309753418, "step": 1004} +{"info/global_step": 1005, "train_info/time_within_train_step": 27.850038528442383, "step": 1005} +{"train_info/time_between_train_steps": 0.005414247512817383, "step": 1005} +{"info/global_step": 1006, "train_info/time_within_train_step": 27.72363567352295, "step": 1006} +{"train_info/time_between_train_steps": 0.005733966827392578, "step": 1006} +{"info/global_step": 1007, "train_info/time_within_train_step": 27.75078272819519, "step": 1007} +{"train_info/time_between_train_steps": 0.005341291427612305, "step": 1007} +{"info/global_step": 1008, "train_info/time_within_train_step": 27.713919162750244, "step": 1008} +{"train_info/time_between_train_steps": 0.0050046443939208984, "step": 1008} +{"info/global_step": 1009, "train_info/time_within_train_step": 27.821474313735962, "step": 1009} +{"train_info/time_between_train_steps": 0.005072355270385742, "step": 1009} +{"info/global_step": 1010, "train_info/time_within_train_step": 27.70011568069458, "step": 1010} +{"train_info/time_between_train_steps": 0.0050237178802490234, "step": 1010} +{"info/global_step": 1011, "train_info/time_within_train_step": 27.702332258224487, "step": 1011} +{"train_info/time_between_train_steps": 0.00509333610534668, "step": 1011} +{"info/global_step": 1012, "train_info/time_within_train_step": 27.70991802215576, "step": 1012} +{"train_info/time_between_train_steps": 0.0051653385162353516, "step": 1012} +{"info/global_step": 1013, "train_info/time_within_train_step": 27.708499908447266, "step": 1013} +{"train_info/time_between_train_steps": 0.005341529846191406, "step": 1013} +{"info/global_step": 1014, "train_info/time_within_train_step": 27.710054397583008, "step": 1014} +{"train_info/time_between_train_steps": 0.0051610469818115234, "step": 1014} +{"info/global_step": 1015, "train_info/time_within_train_step": 27.708974361419678, "step": 1015} +{"train_info/time_between_train_steps": 0.005152702331542969, "step": 1015} +{"info/global_step": 1016, "train_info/time_within_train_step": 27.71091628074646, "step": 1016} +{"train_info/time_between_train_steps": 0.005164623260498047, "step": 1016} +{"info/global_step": 1017, "train_info/time_within_train_step": 27.706139087677002, "step": 1017} +{"train_info/time_between_train_steps": 0.00509953498840332, "step": 1017} +{"info/global_step": 1018, "train_info/time_within_train_step": 27.70916771888733, "step": 1018} +{"train_info/time_between_train_steps": 0.005162715911865234, "step": 1018} +{"info/global_step": 1019, "train_info/time_within_train_step": 27.70973539352417, "step": 1019} +{"train_info/time_between_train_steps": 0.0056781768798828125, "step": 1019} +{"info/global_step": 1020, "train_info/time_within_train_step": 27.713720083236694, "step": 1020} +{"train_info/time_between_train_steps": 0.005291461944580078, "step": 1020} +{"info/global_step": 1021, "train_info/time_within_train_step": 27.716161966323853, "step": 1021} +{"train_info/time_between_train_steps": 0.00527644157409668, "step": 1021} +{"info/global_step": 1022, "train_info/time_within_train_step": 27.712278127670288, "step": 1022} +{"train_info/time_between_train_steps": 0.005187511444091797, "step": 1022} +{"info/global_step": 1023, "train_info/time_within_train_step": 27.717721939086914, "step": 1023} +{"train_info/time_between_train_steps": 0.005276918411254883, "step": 1023} +{"info/global_step": 1024, "train_info/time_within_train_step": 27.8114812374115, "step": 1024} +{"train_info/time_between_train_steps": 0.005355358123779297, "step": 1024} +{"info/global_step": 1025, "train_info/time_within_train_step": 27.721397161483765, "step": 1025} +{"train_info/time_between_train_steps": 0.005516529083251953, "step": 1025} +{"info/global_step": 1026, "train_info/time_within_train_step": 27.744322776794434, "step": 1026} +{"train_info/time_between_train_steps": 0.005740642547607422, "step": 1026} +{"train_info/time_between_train_steps": 20.53538227081299, "step": 1026} +{"info/global_step": 1027, "train_info/time_within_train_step": 27.71237277984619, "step": 1027} +{"train_info/time_between_train_steps": 0.005066871643066406, "step": 1027} +{"info/global_step": 1028, "train_info/time_within_train_step": 27.822900772094727, "step": 1028} +{"train_info/time_between_train_steps": 0.005264997482299805, "step": 1028} +{"info/global_step": 1029, "train_info/time_within_train_step": 27.704650163650513, "step": 1029} +{"train_info/time_between_train_steps": 0.0051724910736083984, "step": 1029} +{"info/global_step": 1030, "train_info/time_within_train_step": 27.835002660751343, "step": 1030} +{"train_info/time_between_train_steps": 0.005117177963256836, "step": 1030} +{"info/global_step": 1031, "train_info/time_within_train_step": 27.70960307121277, "step": 1031} +{"train_info/time_between_train_steps": 0.00506591796875, "step": 1031} +{"info/global_step": 1032, "train_info/time_within_train_step": 27.822142362594604, "step": 1032} +{"train_info/time_between_train_steps": 0.005103111267089844, "step": 1032} +{"info/global_step": 1033, "train_info/time_within_train_step": 27.719225883483887, "step": 1033} +{"train_info/time_between_train_steps": 0.005102396011352539, "step": 1033} +{"info/global_step": 1034, "train_info/time_within_train_step": 27.748772382736206, "step": 1034} +{"train_info/time_between_train_steps": 0.00515437126159668, "step": 1034} +{"info/global_step": 1035, "train_info/time_within_train_step": 27.71235489845276, "step": 1035} +{"train_info/time_between_train_steps": 0.0051043033599853516, "step": 1035} +{"info/global_step": 1036, "train_info/time_within_train_step": 27.708975553512573, "step": 1036} +{"train_info/time_between_train_steps": 0.005099058151245117, "step": 1036} +{"info/global_step": 1037, "train_info/time_within_train_step": 27.722375631332397, "step": 1037} +{"train_info/time_between_train_steps": 0.005141496658325195, "step": 1037} +{"info/global_step": 1038, "train_info/time_within_train_step": 27.708645820617676, "step": 1038} +{"train_info/time_between_train_steps": 0.005213499069213867, "step": 1038} +{"info/global_step": 1039, "train_info/time_within_train_step": 27.80334448814392, "step": 1039} +{"train_info/time_between_train_steps": 0.00509190559387207, "step": 1039} +{"info/global_step": 1040, "train_info/time_within_train_step": 27.717952489852905, "step": 1040} +{"train_info/time_between_train_steps": 0.005114316940307617, "step": 1040} +{"info/global_step": 1041, "train_info/time_within_train_step": 27.714118719100952, "step": 1041} +{"train_info/time_between_train_steps": 0.00513458251953125, "step": 1041} +{"info/global_step": 1042, "train_info/time_within_train_step": 27.71554136276245, "step": 1042} +{"train_info/time_between_train_steps": 0.005090951919555664, "step": 1042} +{"info/global_step": 1043, "train_info/time_within_train_step": 27.77607488632202, "step": 1043} +{"train_info/time_between_train_steps": 0.0052967071533203125, "step": 1043} +{"info/global_step": 1044, "train_info/time_within_train_step": 27.811903476715088, "step": 1044} +{"train_info/time_between_train_steps": 0.0052945613861083984, "step": 1044} +{"info/global_step": 1045, "train_info/time_within_train_step": 27.717835426330566, "step": 1045} +{"train_info/time_between_train_steps": 0.0053937435150146484, "step": 1045} +{"info/global_step": 1046, "train_info/time_within_train_step": 27.724453926086426, "step": 1046} +{"train_info/time_between_train_steps": 0.005321502685546875, "step": 1046} +{"info/global_step": 1047, "train_info/time_within_train_step": 27.711814880371094, "step": 1047} +{"train_info/time_between_train_steps": 0.005208730697631836, "step": 1047} +{"info/global_step": 1048, "train_info/time_within_train_step": 27.715394258499146, "step": 1048} +{"train_info/time_between_train_steps": 0.0052340030670166016, "step": 1048} +{"info/global_step": 1049, "train_info/time_within_train_step": 27.71609854698181, "step": 1049} +{"train_info/time_between_train_steps": 0.005162954330444336, "step": 1049} +{"info/global_step": 1050, "train_info/time_within_train_step": 27.72682023048401, "step": 1050} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1733797636, "_runtime": 31017}, "step": 1050} +{"logs": {"train/loss": 5.0501, "train/learning_rate": 8.333333333333333e-05, "train/epoch": 38.02, "_timestamp": 1733797636, "_runtime": 31017}, "step": 1050} +{"train_info/time_between_train_steps": 0.007471323013305664, "step": 1050} +{"info/global_step": 1051, "train_info/time_within_train_step": 27.748229026794434, "step": 1051} +{"train_info/time_between_train_steps": 0.005460262298583984, "step": 1051} +{"info/global_step": 1052, "train_info/time_within_train_step": 27.7559974193573, "step": 1052} +{"train_info/time_between_train_steps": 0.005614757537841797, "step": 1052} +{"info/global_step": 1053, "train_info/time_within_train_step": 27.744328260421753, "step": 1053} +{"train_info/time_between_train_steps": 0.005728483200073242, "step": 1053} +{"train_info/time_between_train_steps": 20.815574169158936, "step": 1053} +{"info/global_step": 1054, "train_info/time_within_train_step": 27.81493878364563, "step": 1054} +{"train_info/time_between_train_steps": 0.006951093673706055, "step": 1054} +{"info/global_step": 1055, "train_info/time_within_train_step": 27.814767360687256, "step": 1055} +{"train_info/time_between_train_steps": 0.010226964950561523, "step": 1055} +{"info/global_step": 1056, "train_info/time_within_train_step": 27.71216106414795, "step": 1056} +{"train_info/time_between_train_steps": 0.005201101303100586, "step": 1056} +{"info/global_step": 1057, "train_info/time_within_train_step": 27.815857410430908, "step": 1057} +{"train_info/time_between_train_steps": 0.005257129669189453, "step": 1057} +{"info/global_step": 1058, "train_info/time_within_train_step": 27.709444522857666, "step": 1058} +{"train_info/time_between_train_steps": 0.005219936370849609, "step": 1058} +{"info/global_step": 1059, "train_info/time_within_train_step": 27.824488878250122, "step": 1059} +{"train_info/time_between_train_steps": 0.0050580501556396484, "step": 1059} +{"info/global_step": 1060, "train_info/time_within_train_step": 27.711521863937378, "step": 1060} +{"train_info/time_between_train_steps": 0.005232572555541992, "step": 1060} +{"info/global_step": 1061, "train_info/time_within_train_step": 27.74112105369568, "step": 1061} +{"train_info/time_between_train_steps": 0.00536036491394043, "step": 1061} +{"info/global_step": 1062, "train_info/time_within_train_step": 27.7125346660614, "step": 1062} +{"train_info/time_between_train_steps": 0.005025148391723633, "step": 1062} +{"info/global_step": 1063, "train_info/time_within_train_step": 27.70795965194702, "step": 1063} +{"train_info/time_between_train_steps": 0.005115032196044922, "step": 1063} +{"info/global_step": 1064, "train_info/time_within_train_step": 27.711294651031494, "step": 1064} +{"train_info/time_between_train_steps": 0.005320072174072266, "step": 1064} +{"info/global_step": 1065, "train_info/time_within_train_step": 27.72729992866516, "step": 1065} +{"train_info/time_between_train_steps": 0.010207414627075195, "step": 1065} +{"info/global_step": 1066, "train_info/time_within_train_step": 27.710737943649292, "step": 1066} +{"train_info/time_between_train_steps": 0.010372400283813477, "step": 1066} +{"info/global_step": 1067, "train_info/time_within_train_step": 27.709416389465332, "step": 1067} +{"train_info/time_between_train_steps": 0.00521087646484375, "step": 1067} +{"info/global_step": 1068, "train_info/time_within_train_step": 27.71155309677124, "step": 1068} +{"train_info/time_between_train_steps": 0.005116701126098633, "step": 1068} +{"info/global_step": 1069, "train_info/time_within_train_step": 27.708678245544434, "step": 1069} +{"train_info/time_between_train_steps": 0.005255699157714844, "step": 1069} +{"info/global_step": 1070, "train_info/time_within_train_step": 27.80260396003723, "step": 1070} +{"train_info/time_between_train_steps": 0.005162239074707031, "step": 1070} +{"info/global_step": 1071, "train_info/time_within_train_step": 27.71450972557068, "step": 1071} +{"train_info/time_between_train_steps": 0.005180835723876953, "step": 1071} +{"info/global_step": 1072, "train_info/time_within_train_step": 27.714221000671387, "step": 1072} +{"train_info/time_between_train_steps": 0.005276203155517578, "step": 1072} +{"info/global_step": 1073, "train_info/time_within_train_step": 27.714940071105957, "step": 1073} +{"train_info/time_between_train_steps": 0.0052487850189208984, "step": 1073} +{"info/global_step": 1074, "train_info/time_within_train_step": 27.715246438980103, "step": 1074} +{"train_info/time_between_train_steps": 0.005193948745727539, "step": 1074} +{"info/global_step": 1075, "train_info/time_within_train_step": 27.71509599685669, "step": 1075} +{"train_info/time_between_train_steps": 0.005216836929321289, "step": 1075} +{"info/global_step": 1076, "train_info/time_within_train_step": 27.71106719970703, "step": 1076} +{"train_info/time_between_train_steps": 0.005217313766479492, "step": 1076} +{"info/global_step": 1077, "train_info/time_within_train_step": 27.717045783996582, "step": 1077} +{"train_info/time_between_train_steps": 0.005345582962036133, "step": 1077} +{"info/global_step": 1078, "train_info/time_within_train_step": 27.720515966415405, "step": 1078} +{"train_info/time_between_train_steps": 0.005284547805786133, "step": 1078} +{"info/global_step": 1079, "train_info/time_within_train_step": 27.73443031311035, "step": 1079} +{"train_info/time_between_train_steps": 0.00606846809387207, "step": 1079} +{"info/global_step": 1080, "train_info/time_within_train_step": 27.7448251247406, "step": 1080} +{"train_info/time_between_train_steps": 0.005885124206542969, "step": 1080} +{"train_info/time_between_train_steps": 20.42482352256775, "step": 1080} +{"info/global_step": 1081, "train_info/time_within_train_step": 27.713562488555908, "step": 1081} +{"train_info/time_between_train_steps": 0.00509953498840332, "step": 1081} +{"info/global_step": 1082, "train_info/time_within_train_step": 27.817538738250732, "step": 1082} +{"train_info/time_between_train_steps": 0.005184173583984375, "step": 1082} +{"info/global_step": 1083, "train_info/time_within_train_step": 27.712091207504272, "step": 1083} +{"train_info/time_between_train_steps": 0.005116701126098633, "step": 1083} +{"info/global_step": 1084, "train_info/time_within_train_step": 27.832679986953735, "step": 1084} +{"train_info/time_between_train_steps": 0.0052547454833984375, "step": 1084} +{"info/global_step": 1085, "train_info/time_within_train_step": 27.81897473335266, "step": 1085} +{"train_info/time_between_train_steps": 0.005551815032958984, "step": 1085} +{"info/global_step": 1086, "train_info/time_within_train_step": 27.865581512451172, "step": 1086} +{"train_info/time_between_train_steps": 0.005423784255981445, "step": 1086} +{"info/global_step": 1087, "train_info/time_within_train_step": 27.724391222000122, "step": 1087} +{"train_info/time_between_train_steps": 0.005364894866943359, "step": 1087} +{"info/global_step": 1088, "train_info/time_within_train_step": 27.758854627609253, "step": 1088} +{"train_info/time_between_train_steps": 0.0054242610931396484, "step": 1088} +{"info/global_step": 1089, "train_info/time_within_train_step": 27.71587610244751, "step": 1089} +{"train_info/time_between_train_steps": 0.005062580108642578, "step": 1089} +{"info/global_step": 1090, "train_info/time_within_train_step": 27.70565700531006, "step": 1090} +{"train_info/time_between_train_steps": 0.005151987075805664, "step": 1090} +{"info/global_step": 1091, "train_info/time_within_train_step": 27.710023164749146, "step": 1091} +{"train_info/time_between_train_steps": 0.005017757415771484, "step": 1091} +{"info/global_step": 1092, "train_info/time_within_train_step": 27.706892490386963, "step": 1092} +{"train_info/time_between_train_steps": 0.0051953792572021484, "step": 1092} +{"info/global_step": 1093, "train_info/time_within_train_step": 27.714144468307495, "step": 1093} +{"train_info/time_between_train_steps": 0.005151033401489258, "step": 1093} +{"info/global_step": 1094, "train_info/time_within_train_step": 27.71676731109619, "step": 1094} +{"train_info/time_between_train_steps": 0.005323648452758789, "step": 1094} +{"info/global_step": 1095, "train_info/time_within_train_step": 27.710670232772827, "step": 1095} +{"train_info/time_between_train_steps": 0.0052759647369384766, "step": 1095} +{"info/global_step": 1096, "train_info/time_within_train_step": 27.70660138130188, "step": 1096} +{"train_info/time_between_train_steps": 0.005182981491088867, "step": 1096} +{"info/global_step": 1097, "train_info/time_within_train_step": 27.711443662643433, "step": 1097} +{"train_info/time_between_train_steps": 0.005187511444091797, "step": 1097} +{"info/global_step": 1098, "train_info/time_within_train_step": 27.710567712783813, "step": 1098} +{"train_info/time_between_train_steps": 0.005213499069213867, "step": 1098} +{"info/global_step": 1099, "train_info/time_within_train_step": 27.71142888069153, "step": 1099} +{"train_info/time_between_train_steps": 0.005303144454956055, "step": 1099} +{"info/global_step": 1100, "train_info/time_within_train_step": 27.839042901992798, "step": 1100} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1733799065, "_runtime": 32446}, "step": 1100} +{"logs": {"train/loss": 5.0694, "train/learning_rate": 5.5555555555555545e-05, "train/epoch": 40.02, "_timestamp": 1733799065, "_runtime": 32446}, "step": 1100} +{"train_info/time_between_train_steps": 2.4306821823120117, "step": 1100} +{"info/global_step": 1101, "train_info/time_within_train_step": 27.711300134658813, "step": 1101} +{"train_info/time_between_train_steps": 0.005175113677978516, "step": 1101} +{"info/global_step": 1102, "train_info/time_within_train_step": 27.70832371711731, "step": 1102} +{"train_info/time_between_train_steps": 0.0052127838134765625, "step": 1102} +{"info/global_step": 1103, "train_info/time_within_train_step": 27.716282844543457, "step": 1103} +{"train_info/time_between_train_steps": 0.005013227462768555, "step": 1103} +{"info/global_step": 1104, "train_info/time_within_train_step": 27.715436458587646, "step": 1104} +{"train_info/time_between_train_steps": 0.0054018497467041016, "step": 1104} +{"info/global_step": 1105, "train_info/time_within_train_step": 27.714054346084595, "step": 1105} +{"train_info/time_between_train_steps": 0.005385160446166992, "step": 1105} +{"info/global_step": 1106, "train_info/time_within_train_step": 27.73233723640442, "step": 1106} +{"train_info/time_between_train_steps": 0.005783796310424805, "step": 1106} +{"info/global_step": 1107, "train_info/time_within_train_step": 27.750361680984497, "step": 1107} +{"train_info/time_between_train_steps": 0.005928993225097656, "step": 1107} +{"train_info/time_between_train_steps": 20.729090213775635, "step": 1107} +{"info/global_step": 1108, "train_info/time_within_train_step": 27.711256504058838, "step": 1108} +{"train_info/time_between_train_steps": 0.005100250244140625, "step": 1108} +{"info/global_step": 1109, "train_info/time_within_train_step": 27.812416076660156, "step": 1109} +{"train_info/time_between_train_steps": 0.005129575729370117, "step": 1109} +{"info/global_step": 1110, "train_info/time_within_train_step": 27.703954458236694, "step": 1110} +{"train_info/time_between_train_steps": 0.005101203918457031, "step": 1110} +{"info/global_step": 1111, "train_info/time_within_train_step": 27.819048404693604, "step": 1111} +{"train_info/time_between_train_steps": 0.005103349685668945, "step": 1111} +{"info/global_step": 1112, "train_info/time_within_train_step": 27.716280698776245, "step": 1112} +{"train_info/time_between_train_steps": 0.005529880523681641, "step": 1112} +{"info/global_step": 1113, "train_info/time_within_train_step": 27.872912168502808, "step": 1113} +{"train_info/time_between_train_steps": 0.00542449951171875, "step": 1113} +{"info/global_step": 1114, "train_info/time_within_train_step": 27.72262716293335, "step": 1114} +{"train_info/time_between_train_steps": 0.005317211151123047, "step": 1114} +{"info/global_step": 1115, "train_info/time_within_train_step": 27.849186897277832, "step": 1115} +{"train_info/time_between_train_steps": 0.0053958892822265625, "step": 1115} +{"info/global_step": 1116, "train_info/time_within_train_step": 27.715822458267212, "step": 1116} +{"train_info/time_between_train_steps": 0.0051441192626953125, "step": 1116} +{"info/global_step": 1117, "train_info/time_within_train_step": 27.706358194351196, "step": 1117} +{"train_info/time_between_train_steps": 0.005114555358886719, "step": 1117} +{"info/global_step": 1118, "train_info/time_within_train_step": 27.70519709587097, "step": 1118} +{"train_info/time_between_train_steps": 0.0050470829010009766, "step": 1118} +{"info/global_step": 1119, "train_info/time_within_train_step": 27.70290517807007, "step": 1119} +{"train_info/time_between_train_steps": 0.0051348209381103516, "step": 1119} +{"info/global_step": 1120, "train_info/time_within_train_step": 27.704933404922485, "step": 1120} +{"train_info/time_between_train_steps": 0.005051136016845703, "step": 1120} +{"info/global_step": 1121, "train_info/time_within_train_step": 27.709423065185547, "step": 1121} +{"train_info/time_between_train_steps": 0.005056619644165039, "step": 1121} +{"info/global_step": 1122, "train_info/time_within_train_step": 27.70383310317993, "step": 1122} +{"train_info/time_between_train_steps": 0.005174160003662109, "step": 1122} +{"info/global_step": 1123, "train_info/time_within_train_step": 27.71107792854309, "step": 1123} +{"train_info/time_between_train_steps": 0.005245685577392578, "step": 1123} +{"info/global_step": 1124, "train_info/time_within_train_step": 27.71373987197876, "step": 1124} +{"train_info/time_between_train_steps": 0.005297183990478516, "step": 1124} +{"info/global_step": 1125, "train_info/time_within_train_step": 27.712130308151245, "step": 1125} +{"train_info/time_between_train_steps": 0.00529026985168457, "step": 1125} +{"info/global_step": 1126, "train_info/time_within_train_step": 27.708998203277588, "step": 1126} +{"train_info/time_between_train_steps": 0.005160093307495117, "step": 1126} +{"info/global_step": 1127, "train_info/time_within_train_step": 27.712517976760864, "step": 1127} +{"train_info/time_between_train_steps": 0.005234718322753906, "step": 1127} +{"info/global_step": 1128, "train_info/time_within_train_step": 27.710004091262817, "step": 1128} +{"train_info/time_between_train_steps": 0.0051801204681396484, "step": 1128} +{"info/global_step": 1129, "train_info/time_within_train_step": 27.70821261405945, "step": 1129} +{"train_info/time_between_train_steps": 0.0052340030670166016, "step": 1129} +{"info/global_step": 1130, "train_info/time_within_train_step": 27.713714122772217, "step": 1130} +{"train_info/time_between_train_steps": 0.005100727081298828, "step": 1130} +{"info/global_step": 1131, "train_info/time_within_train_step": 27.82764983177185, "step": 1131} +{"train_info/time_between_train_steps": 0.0054624080657958984, "step": 1131} +{"info/global_step": 1132, "train_info/time_within_train_step": 27.723060369491577, "step": 1132} +{"train_info/time_between_train_steps": 0.00552678108215332, "step": 1132} +{"info/global_step": 1133, "train_info/time_within_train_step": 27.733927488327026, "step": 1133} +{"train_info/time_between_train_steps": 0.005617856979370117, "step": 1133} +{"info/global_step": 1134, "train_info/time_within_train_step": 27.737345457077026, "step": 1134} +{"train_info/time_between_train_steps": 0.005853176116943359, "step": 1134} +{"train_info/time_between_train_steps": 20.667433261871338, "step": 1134} +{"info/global_step": 1135, "train_info/time_within_train_step": 27.709441661834717, "step": 1135} +{"train_info/time_between_train_steps": 0.0050811767578125, "step": 1135} +{"info/global_step": 1136, "train_info/time_within_train_step": 27.810336112976074, "step": 1136} +{"train_info/time_between_train_steps": 0.0050983428955078125, "step": 1136} +{"info/global_step": 1137, "train_info/time_within_train_step": 27.708481311798096, "step": 1137} +{"train_info/time_between_train_steps": 0.005066871643066406, "step": 1137} +{"info/global_step": 1138, "train_info/time_within_train_step": 27.83409881591797, "step": 1138} +{"train_info/time_between_train_steps": 0.00520634651184082, "step": 1138} +{"info/global_step": 1139, "train_info/time_within_train_step": 27.7212975025177, "step": 1139} +{"train_info/time_between_train_steps": 0.005444049835205078, "step": 1139} +{"info/global_step": 1140, "train_info/time_within_train_step": 27.851502656936646, "step": 1140} +{"train_info/time_between_train_steps": 0.005346059799194336, "step": 1140} +{"info/global_step": 1141, "train_info/time_within_train_step": 27.72391653060913, "step": 1141} +{"train_info/time_between_train_steps": 0.0053555965423583984, "step": 1141} +{"info/global_step": 1142, "train_info/time_within_train_step": 27.751240730285645, "step": 1142} +{"train_info/time_between_train_steps": 0.005421161651611328, "step": 1142} +{"info/global_step": 1143, "train_info/time_within_train_step": 27.714173555374146, "step": 1143} +{"train_info/time_between_train_steps": 0.005037784576416016, "step": 1143} +{"info/global_step": 1144, "train_info/time_within_train_step": 27.708738803863525, "step": 1144} +{"train_info/time_between_train_steps": 0.0050792694091796875, "step": 1144} +{"info/global_step": 1145, "train_info/time_within_train_step": 27.70580744743347, "step": 1145} +{"train_info/time_between_train_steps": 0.005028724670410156, "step": 1145} +{"info/global_step": 1146, "train_info/time_within_train_step": 27.794313669204712, "step": 1146} +{"train_info/time_between_train_steps": 0.005229949951171875, "step": 1146} +{"info/global_step": 1147, "train_info/time_within_train_step": 27.717153787612915, "step": 1147} +{"train_info/time_between_train_steps": 0.005186557769775391, "step": 1147} +{"info/global_step": 1148, "train_info/time_within_train_step": 27.714354276657104, "step": 1148} +{"train_info/time_between_train_steps": 0.005358695983886719, "step": 1148} +{"info/global_step": 1149, "train_info/time_within_train_step": 27.7146053314209, "step": 1149} +{"train_info/time_between_train_steps": 0.005233287811279297, "step": 1149} +{"info/global_step": 1150, "train_info/time_within_train_step": 27.71490716934204, "step": 1150} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1733800496, "_runtime": 33877}, "step": 1150} +{"logs": {"train/loss": 5.0386, "train/learning_rate": 2.7777777777777772e-05, "train/epoch": 42.01, "_timestamp": 1733800496, "_runtime": 33877}, "step": 1150} +{"train_info/time_between_train_steps": 0.006963491439819336, "step": 1150} +{"info/global_step": 1151, "train_info/time_within_train_step": 27.713624715805054, "step": 1151} +{"train_info/time_between_train_steps": 0.005457401275634766, "step": 1151} +{"info/global_step": 1152, "train_info/time_within_train_step": 27.709513425827026, "step": 1152} +{"train_info/time_between_train_steps": 0.005366086959838867, "step": 1152} +{"info/global_step": 1153, "train_info/time_within_train_step": 27.7127206325531, "step": 1153} +{"train_info/time_between_train_steps": 0.005124807357788086, "step": 1153} +{"info/global_step": 1154, "train_info/time_within_train_step": 27.71472692489624, "step": 1154} +{"train_info/time_between_train_steps": 0.010766267776489258, "step": 1154} +{"info/global_step": 1155, "train_info/time_within_train_step": 27.71681547164917, "step": 1155} +{"train_info/time_between_train_steps": 0.0053730010986328125, "step": 1155} +{"info/global_step": 1156, "train_info/time_within_train_step": 27.719517946243286, "step": 1156} +{"train_info/time_between_train_steps": 0.005273103713989258, "step": 1156} +{"info/global_step": 1157, "train_info/time_within_train_step": 27.718448400497437, "step": 1157} +{"train_info/time_between_train_steps": 0.005262851715087891, "step": 1157} +{"info/global_step": 1158, "train_info/time_within_train_step": 27.71640920639038, "step": 1158} +{"train_info/time_between_train_steps": 0.00555109977722168, "step": 1158} +{"info/global_step": 1159, "train_info/time_within_train_step": 27.720975160598755, "step": 1159} +{"train_info/time_between_train_steps": 0.005211591720581055, "step": 1159} +{"info/global_step": 1160, "train_info/time_within_train_step": 27.727521896362305, "step": 1160} +{"train_info/time_between_train_steps": 0.0061266422271728516, "step": 1160} +{"info/global_step": 1161, "train_info/time_within_train_step": 27.747395753860474, "step": 1161} +{"train_info/time_between_train_steps": 0.005887508392333984, "step": 1161} +{"train_info/time_between_train_steps": 20.541022777557373, "step": 1161} +{"info/global_step": 1162, "train_info/time_within_train_step": 27.712311506271362, "step": 1162} +{"train_info/time_between_train_steps": 0.005087852478027344, "step": 1162} +{"info/global_step": 1163, "train_info/time_within_train_step": 27.811830043792725, "step": 1163} +{"train_info/time_between_train_steps": 0.0051233768463134766, "step": 1163} +{"info/global_step": 1164, "train_info/time_within_train_step": 27.70773959159851, "step": 1164} +{"train_info/time_between_train_steps": 0.005233049392700195, "step": 1164} +{"info/global_step": 1165, "train_info/time_within_train_step": 27.849902153015137, "step": 1165} +{"train_info/time_between_train_steps": 0.005250692367553711, "step": 1165} +{"info/global_step": 1166, "train_info/time_within_train_step": 27.72392225265503, "step": 1166} +{"train_info/time_between_train_steps": 0.0053920745849609375, "step": 1166} +{"info/global_step": 1167, "train_info/time_within_train_step": 27.889155626296997, "step": 1167} +{"train_info/time_between_train_steps": 0.005555391311645508, "step": 1167} +{"info/global_step": 1168, "train_info/time_within_train_step": 27.757288217544556, "step": 1168} +{"train_info/time_between_train_steps": 0.0055751800537109375, "step": 1168} +{"info/global_step": 1169, "train_info/time_within_train_step": 27.772268772125244, "step": 1169} +{"train_info/time_between_train_steps": 0.0055272579193115234, "step": 1169} +{"info/global_step": 1170, "train_info/time_within_train_step": 27.736252069473267, "step": 1170} +{"train_info/time_between_train_steps": 0.005231618881225586, "step": 1170} +{"info/global_step": 1171, "train_info/time_within_train_step": 27.71947741508484, "step": 1171} +{"train_info/time_between_train_steps": 0.005170583724975586, "step": 1171} +{"info/global_step": 1172, "train_info/time_within_train_step": 27.71733593940735, "step": 1172} +{"train_info/time_between_train_steps": 0.00515437126159668, "step": 1172} +{"info/global_step": 1173, "train_info/time_within_train_step": 27.717931509017944, "step": 1173} +{"train_info/time_between_train_steps": 0.0051975250244140625, "step": 1173} +{"info/global_step": 1174, "train_info/time_within_train_step": 27.718406915664673, "step": 1174} +{"train_info/time_between_train_steps": 0.0051670074462890625, "step": 1174} +{"info/global_step": 1175, "train_info/time_within_train_step": 27.714184045791626, "step": 1175} +{"train_info/time_between_train_steps": 0.0051267147064208984, "step": 1175} +{"info/global_step": 1176, "train_info/time_within_train_step": 27.72164487838745, "step": 1176} +{"train_info/time_between_train_steps": 0.005347728729248047, "step": 1176} +{"info/global_step": 1177, "train_info/time_within_train_step": 27.81582760810852, "step": 1177} +{"train_info/time_between_train_steps": 0.005232572555541992, "step": 1177} +{"info/global_step": 1178, "train_info/time_within_train_step": 27.721343755722046, "step": 1178} +{"train_info/time_between_train_steps": 0.0054552555084228516, "step": 1178} +{"info/global_step": 1179, "train_info/time_within_train_step": 27.72182297706604, "step": 1179} +{"train_info/time_between_train_steps": 0.005270242691040039, "step": 1179} +{"info/global_step": 1180, "train_info/time_within_train_step": 27.72156286239624, "step": 1180} +{"train_info/time_between_train_steps": 0.0052340030670166016, "step": 1180} +{"info/global_step": 1181, "train_info/time_within_train_step": 27.721149444580078, "step": 1181} +{"train_info/time_between_train_steps": 0.005296468734741211, "step": 1181} +{"info/global_step": 1182, "train_info/time_within_train_step": 27.718312978744507, "step": 1182} +{"train_info/time_between_train_steps": 0.005288124084472656, "step": 1182} +{"info/global_step": 1183, "train_info/time_within_train_step": 27.728678941726685, "step": 1183} +{"train_info/time_between_train_steps": 0.005540370941162109, "step": 1183} +{"info/global_step": 1184, "train_info/time_within_train_step": 27.72779607772827, "step": 1184} +{"train_info/time_between_train_steps": 0.005568504333496094, "step": 1184} +{"info/global_step": 1185, "train_info/time_within_train_step": 27.735156774520874, "step": 1185} +{"train_info/time_between_train_steps": 0.0058634281158447266, "step": 1185} +{"info/global_step": 1186, "train_info/time_within_train_step": 27.739386558532715, "step": 1186} +{"train_info/time_between_train_steps": 0.005518436431884766, "step": 1186} +{"info/global_step": 1187, "train_info/time_within_train_step": 27.744056940078735, "step": 1187} +{"train_info/time_between_train_steps": 0.005736351013183594, "step": 1187} +{"info/global_step": 1188, "train_info/time_within_train_step": 27.75426435470581, "step": 1188} +{"train_info/time_between_train_steps": 0.005719900131225586, "step": 1188} +{"train_info/time_between_train_steps": 20.623891830444336, "step": 1188} +{"info/global_step": 1189, "train_info/time_within_train_step": 27.72651171684265, "step": 1189} +{"train_info/time_between_train_steps": 0.0052814483642578125, "step": 1189} +{"info/global_step": 1190, "train_info/time_within_train_step": 27.832427263259888, "step": 1190} +{"train_info/time_between_train_steps": 0.00521397590637207, "step": 1190} +{"info/global_step": 1191, "train_info/time_within_train_step": 27.745303869247437, "step": 1191} +{"train_info/time_between_train_steps": 0.00550079345703125, "step": 1191} +{"info/global_step": 1192, "train_info/time_within_train_step": 27.964462995529175, "step": 1192} +{"train_info/time_between_train_steps": 0.005496025085449219, "step": 1192} +{"info/global_step": 1193, "train_info/time_within_train_step": 27.734492301940918, "step": 1193} +{"train_info/time_between_train_steps": 0.005471467971801758, "step": 1193} +{"info/global_step": 1194, "train_info/time_within_train_step": 27.845815658569336, "step": 1194} +{"train_info/time_between_train_steps": 0.005367755889892578, "step": 1194} +{"info/global_step": 1195, "train_info/time_within_train_step": 27.728920698165894, "step": 1195} +{"train_info/time_between_train_steps": 0.005408525466918945, "step": 1195} +{"info/global_step": 1196, "train_info/time_within_train_step": 27.760444402694702, "step": 1196} +{"train_info/time_between_train_steps": 0.005421161651611328, "step": 1196} +{"info/global_step": 1197, "train_info/time_within_train_step": 27.727322816848755, "step": 1197} +{"train_info/time_between_train_steps": 0.005108833312988281, "step": 1197} +{"info/global_step": 1198, "train_info/time_within_train_step": 27.71959924697876, "step": 1198} +{"train_info/time_between_train_steps": 0.00526118278503418, "step": 1198} +{"info/global_step": 1199, "train_info/time_within_train_step": 27.72154664993286, "step": 1199} +{"train_info/time_between_train_steps": 0.005254268646240234, "step": 1199} +{"info/global_step": 1200, "train_info/time_within_train_step": 27.726475954055786, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1733801924, "_runtime": 35305}, "step": 1200} +{"logs": {"train/loss": 5.019, "train/learning_rate": 0.0, "train/epoch": 44.01, "_timestamp": 1733801924, "_runtime": 35305}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1922.4462890625, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1733801927, "_runtime": 35308}, "step": 1200} +{"logs": {"train/train_runtime": 35308.8988, "train/train_samples_per_second": 17.401, "train/train_steps_per_second": 0.034, "train/total_flos": 3.2843363254272e+17, "train/train_loss": 5.94017751455307, "train/epoch": 44.01, "_timestamp": 1733801927, "_runtime": 35308}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1922.4453125, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1733801930, "_runtime": 35311}, "step": 1200} +{"logs": {"eval/loss": 5.6305365562438965, "eval/runtime": 1.9614, "eval/samples_per_second": 50.983, "eval/steps_per_second": 3.569, "train/epoch": 44.01, "_timestamp": 1733801930, "_runtime": 35311}, "step": 1200} +{"train_info": {"train_info/memory_allocated": 1922.4453125, "train_info/memory_max_allocated": 20713.490234375, "train_info/memory_reserved": 27336.0, "train_info/memory_max_reserved": 27336.0, "_timestamp": 1733801930, "_runtime": 35311}, "step": 1200} +{"logs": {"eval//local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_loss": 5.6305365562438965, "eval//local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_ppl": 278.81167565174974, "eval//local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_runtime": 1.9614, "eval//local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py_samples_per_second": 50.983, "train/epoch": 44.01, "_timestamp": 1733801930, "_runtime": 35311}, "step": 1200} diff --git a/pytorch_model.bin b/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..15d72e26860b84ce66d0557941ba7380ec63d4a8 --- /dev/null +++ b/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:264f42ae84028911e12cb342c2258bd2ee82c6eae74d62c32226255df708095a +size 510396521 diff --git a/shuffle_deterministic84_en_EN_randinit_seed53.log b/shuffle_deterministic84_en_EN_randinit_seed53.log new file mode 100755 index 0000000000000000000000000000000000000000..c32578c882f3cf4450ca7926f5bb086933360708 --- /dev/null +++ b/shuffle_deterministic84_en_EN_randinit_seed53.log @@ -0,0 +1,121 @@ +|=>> 12/09 [18:49:33] - mistral - INFO :: Starting Run: shuffle_deterministic84_en_EN_randinit_seed53... +|=>> 12/09 [18:49:33] - mistral - INFO :: Setting Random Seed to 53! +|=>> 12/09 [18:49:33] - mistral - INFO :: Building Tokenize and Initializing `gpt2-small` via AutoModel/AutoConfig... +|=>> 12/09 [18:49:33] - mistral - INFO :: Using Configs For Model From: /local/xiulyang/mission-impossible-language-models/mistral/conf/models/gpt2-small-50257.json ... +|=>> 12/09 [18:49:33] - mistral.models.auto - INFO :: Building Hugging Face GPT2Config from provided configs: {'activation_function': 'gelu_new', 'architectures': ['GPT2LMHeadModel'], 'attn_pdrop': 0.1, 'embd_pdrop': 0.1, 'eos_token_id': 50256, 'bos_token_id': 50256, 'initializer_range': 0.02, 'layer_norm_epsilon': 1e-05, 'model_type': 'gpt2', 'n_ctx': 1024, 'n_embd': 768, 'n_head': 12, 'n_inner': None, 'n_layer': 12, 'n_positions': 1024, 'reorder_and_upcast_attn': True, 'resid_pdrop': 0.1, 'scale_attn_by_inverse_layer_idx': True, 'scale_attn_weights': True, 'summary_activation': None, 'summary_first_dropout': 0.2, 'summary_proj_to_labels': True, 'summary_type': 'cls_index', 'summary_use_proj': True, 'task_specific_params': {'text-generation': {'do_sample': True, 'max_length': 1024}}, 'torch_dtype': 'float32', 'transformers_version': '4.35.2', 'use_cache': False, 'vocab_size': 50257} ... +|=>> 12/09 [18:49:33] - mistral.models.auto - INFO :: Fetching Hugging Face [Fast] AutoTokenizer for Model: `gpt2`... +|=>> 12/09 [18:49:33] - mistral.models.auto - INFO :: Using a Pretokenized Dataset +|=>> 12/09 [18:49:33] - mistral.models.auto - INFO :: Initializing Custom GPT-2 Model from Configuration: `gpt2`... +|=>> 12/09 [18:49:37] - mistral - INFO :: Setting Training Arguments from Quinfig... +|=>> 12/09 [18:49:37] - mistral.args.training - INFO :: Setting Gradient Accumulation Steps = `64` [BSZ: 512 World Size: 1 Device BSZ: 8] +|=>> 12/09 [18:49:37] - mistral - INFO :: Downloading and Preprocessing Dataset `/local/xiulyang/mission-impossible-language-models/training/babylm_dataset.py`... +|=>> 12/09 [18:49:37] - datasets_modules.datasets.babylm_dataset.fff6b8bdf961d84c2d3bc04820fc732ee53a8c3a0e82cfad28a8e9dec40ceec0.babylm_dataset - INFO :: Generating examples from = /local/xiulyang/mission-impossible-language-models/data/multilingual/multilingual_data_perturbed/shuffle_deterministic84_en/train +|=>> 12/09 [18:49:37] - datasets_modules.datasets.babylm_dataset.fff6b8bdf961d84c2d3bc04820fc732ee53a8c3a0e82cfad28a8e9dec40ceec0.babylm_dataset - INFO :: Total sentences: 1032638 +|=>> 12/09 [18:49:37] - datasets_modules.datasets.babylm_dataset.fff6b8bdf961d84c2d3bc04820fc732ee53a8c3a0e82cfad28a8e9dec40ceec0.babylm_dataset - INFO :: Loading pre-tokenized data +|=>> 12/09 [18:49:42] - datasets_modules.datasets.babylm_dataset.fff6b8bdf961d84c2d3bc04820fc732ee53a8c3a0e82cfad28a8e9dec40ceec0.babylm_dataset - INFO :: Concatenating tokenized data using EOS token +|=>> 12/09 [18:49:43] - datasets_modules.datasets.babylm_dataset.fff6b8bdf961d84c2d3bc04820fc732ee53a8c3a0e82cfad28a8e9dec40ceec0.babylm_dataset - INFO :: Chunking tokens into sublists of 1024 +|=>> 12/09 [18:49:43] - datasets_modules.datasets.babylm_dataset.fff6b8bdf961d84c2d3bc04820fc732ee53a8c3a0e82cfad28a8e9dec40ceec0.babylm_dataset - INFO :: Writing dataset as space-separated sequences of tokens +|=>> 12/09 [18:49:46] - datasets_modules.datasets.babylm_dataset.fff6b8bdf961d84c2d3bc04820fc732ee53a8c3a0e82cfad28a8e9dec40ceec0.babylm_dataset - INFO :: Generating examples from = /local/xiulyang/mission-impossible-language-models/data/multilingual/multilingual_data_perturbed/shuffle_deterministic84_en/dev +|=>> 12/09 [18:49:46] - datasets_modules.datasets.babylm_dataset.fff6b8bdf961d84c2d3bc04820fc732ee53a8c3a0e82cfad28a8e9dec40ceec0.babylm_dataset - INFO :: Total sentences: 5498 +|=>> 12/09 [18:49:46] - datasets_modules.datasets.babylm_dataset.fff6b8bdf961d84c2d3bc04820fc732ee53a8c3a0e82cfad28a8e9dec40ceec0.babylm_dataset - INFO :: Loading pre-tokenized data +|=>> 12/09 [18:49:46] - datasets_modules.datasets.babylm_dataset.fff6b8bdf961d84c2d3bc04820fc732ee53a8c3a0e82cfad28a8e9dec40ceec0.babylm_dataset - INFO :: Concatenating tokenized data using EOS token +|=>> 12/09 [18:49:46] - datasets_modules.datasets.babylm_dataset.fff6b8bdf961d84c2d3bc04820fc732ee53a8c3a0e82cfad28a8e9dec40ceec0.babylm_dataset - INFO :: Chunking tokens into sublists of 1024 +|=>> 12/09 [18:49:46] - datasets_modules.datasets.babylm_dataset.fff6b8bdf961d84c2d3bc04820fc732ee53a8c3a0e82cfad28a8e9dec40ceec0.babylm_dataset - INFO :: Writing dataset as space-separated sequences of tokens +|=>> 12/09 [18:49:46] - mistral.corpora.auto - INFO :: Building Tokenized Indexed Dataset for {dataset_id}/{dataset_name}... +|=>> 12/09 [18:49:46] - mistral.corpora.auto - INFO :: Building Indexed Dataset for train +|=>> 12/09 [18:50:15] - mistral.corpora.auto - INFO :: Building Indexed Dataset for validation +|=>> 12/09 [18:50:15] - mistral - INFO :: Initializing Model Trainer... +|=>> 12/09 [18:50:15] - mistral - INFO :: Training Arguments: TrainingArguments( +_n_gpu=1, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +bf16=False, +bf16_full_eval=False, +data_seed=53, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=IntervalStrategy.STEPS, +fp16=True, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +gradient_accumulation_steps=64, +gradient_checkpointing=False, +greater_is_better=None, +group_by_length=False, +half_precision_backend=auto, +hub_model_id=None, +hub_strategy=HubStrategy.EVERY_SAVE, +hub_token=, +ignore_data_skip=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0006, +length_column_name=length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=logs, +logging_first_step=True, +logging_nan_inf_filter=True, +logging_steps=50, +logging_strategy=IntervalStrategy.STEPS, +lr_scheduler_type=SchedulerType.LINEAR, +max_grad_norm=1.0, +max_steps=1200, +metric_for_best_model=None, +mp_parameters=, +no_cuda=False, +num_train_epochs=3.0, +optim=OptimizerNames.ADAMW_HF, +output_dir=//local/xiulyang/babylm_models/shuffle_deterministic84_en_EN_randinit/babylm_shuffle_deterministic84_en_EN_randinit_seed53/runs/shuffle_deterministic84_en_EN_randinit_seed53, +overwrite_output_dir=False, +past_index=-1, +per_device_eval_batch_size=16, +per_device_train_batch_size=8, +prediction_loss_only=True, +push_to_hub=False, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +remove_unused_columns=True, +report_to=[], +resume_from_checkpoint=None, +run_name=shuffle_deterministic84_en_EN_randinit_seed53, +save_on_each_node=False, +save_steps=1000, +save_strategy=IntervalStrategy.STEPS, +save_total_limit=None, +seed=53, +sharded_ddp=[], +skip_memory_metrics=True, +tf32=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=120, +weight_decay=0.1, +xpu_backend=None, +) +|=>> 12/09 [18:50:15] - mistral.core.callbacks - INFO :: Setting W&B Project: xiulin-yang-compling +|=>> 12/09 [18:50:18] - mistral - INFO :: Training... +|=>> 12/09 [18:50:18] - mistral.core.callbacks - INFO :: Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" +|=>> 12/10 [04:38:48] - mistral - INFO :: ...and that's all folks! +|=>> 12/10 [04:38:48] - mistral - INFO :: Running final evaluation... diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d0bb84ef853fda188d996b93c143bd905b3674 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1 @@ +{"tokenizer_class": "PassthroughTokenizer"} \ No newline at end of file diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..eaccfd8316af7736489690ab9f6e133305fda6a5 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:293690fbed5a45d87250b9feccf0dd9aa14b6f83c629ffe357c2693865f5f1cd +size 3183