| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.25103830179972314, | |
| "eval_steps": 500, | |
| "global_step": 34, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.007383479464697739, | |
| "grad_norm": 1.9591929912567139, | |
| "learning_rate": 0.0, | |
| "loss": 1.6228, | |
| "memory/device_mem_reserved(gib)": 21.61, | |
| "memory/max_mem_active(gib)": 21.2, | |
| "memory/max_mem_allocated(gib)": 21.2, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.014766958929395477, | |
| "grad_norm": 1.4523507356643677, | |
| "learning_rate": 1.5384615384615387e-05, | |
| "loss": 1.5769, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.022150438394093218, | |
| "grad_norm": 1.1918187141418457, | |
| "learning_rate": 3.0769230769230774e-05, | |
| "loss": 1.5435, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.029533917858790955, | |
| "grad_norm": 0.8260876536369324, | |
| "learning_rate": 4.615384615384616e-05, | |
| "loss": 1.6523, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.03691739732348869, | |
| "grad_norm": 0.8584926128387451, | |
| "learning_rate": 6.153846153846155e-05, | |
| "loss": 1.5745, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.044300876788186436, | |
| "grad_norm": 0.6466429829597473, | |
| "learning_rate": 7.692307692307693e-05, | |
| "loss": 1.4759, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.05168435625288417, | |
| "grad_norm": 0.5014482140541077, | |
| "learning_rate": 9.230769230769232e-05, | |
| "loss": 1.602, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.05906783571758191, | |
| "grad_norm": 0.6017433404922485, | |
| "learning_rate": 0.0001076923076923077, | |
| "loss": 1.4176, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.06645131518227965, | |
| "grad_norm": 0.4612258970737457, | |
| "learning_rate": 0.0001230769230769231, | |
| "loss": 1.5819, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.07383479464697738, | |
| "grad_norm": 0.4430214464664459, | |
| "learning_rate": 0.00013846153846153847, | |
| "loss": 1.561, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.08121827411167512, | |
| "grad_norm": 0.3746771216392517, | |
| "learning_rate": 0.00015384615384615385, | |
| "loss": 1.6744, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.08860175357637287, | |
| "grad_norm": 0.38248857855796814, | |
| "learning_rate": 0.00016923076923076923, | |
| "loss": 1.5629, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.09598523304107061, | |
| "grad_norm": 0.515844464302063, | |
| "learning_rate": 0.00018461538461538463, | |
| "loss": 1.5264, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.10336871250576835, | |
| "grad_norm": 0.3964424431324005, | |
| "learning_rate": 0.0002, | |
| "loss": 1.5398, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.11075219197046608, | |
| "grad_norm": 0.4010593891143799, | |
| "learning_rate": 0.0001999668467514313, | |
| "loss": 1.4618, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.11813567143516382, | |
| "grad_norm": 0.3192802965641022, | |
| "learning_rate": 0.00019986740898848306, | |
| "loss": 1.6994, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.12551915089986157, | |
| "grad_norm": 0.410099059343338, | |
| "learning_rate": 0.00019970175264485266, | |
| "loss": 1.5913, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.1329026303645593, | |
| "grad_norm": 0.312429815530777, | |
| "learning_rate": 0.0001994699875614589, | |
| "loss": 1.5701, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.14028610982925704, | |
| "grad_norm": 0.2831230163574219, | |
| "learning_rate": 0.00019917226741361015, | |
| "loss": 1.5744, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.14766958929395477, | |
| "grad_norm": 0.3618868291378021, | |
| "learning_rate": 0.00019880878960910772, | |
| "loss": 1.5185, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.15505306875865252, | |
| "grad_norm": 0.3151628077030182, | |
| "learning_rate": 0.00019837979515735166, | |
| "loss": 1.5086, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.16243654822335024, | |
| "grad_norm": 0.31955838203430176, | |
| "learning_rate": 0.0001978855685095358, | |
| "loss": 1.6329, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.169820027688048, | |
| "grad_norm": 0.3030437231063843, | |
| "learning_rate": 0.00019732643737003827, | |
| "loss": 1.6697, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.17720350715274574, | |
| "grad_norm": 0.41288134455680847, | |
| "learning_rate": 0.00019670277247913205, | |
| "loss": 1.7094, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.18458698661744347, | |
| "grad_norm": 0.2887294888496399, | |
| "learning_rate": 0.00019601498736716017, | |
| "loss": 1.5554, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.19197046608214122, | |
| "grad_norm": 0.3173791170120239, | |
| "learning_rate": 0.00019526353808033825, | |
| "loss": 1.4404, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.19935394554683894, | |
| "grad_norm": 0.2877439558506012, | |
| "learning_rate": 0.00019444892287836613, | |
| "loss": 1.4766, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.2067374250115367, | |
| "grad_norm": 0.29286038875579834, | |
| "learning_rate": 0.00019357168190404936, | |
| "loss": 1.5156, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.2141209044762344, | |
| "grad_norm": 0.27713659405708313, | |
| "learning_rate": 0.00019263239682514952, | |
| "loss": 1.5153, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.22150438394093216, | |
| "grad_norm": 0.29187655448913574, | |
| "learning_rate": 0.0001916316904487005, | |
| "loss": 1.6036, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.22888786340562992, | |
| "grad_norm": 0.2671583890914917, | |
| "learning_rate": 0.00019057022630804716, | |
| "loss": 1.4675, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.23627134287032764, | |
| "grad_norm": 0.2679831087589264, | |
| "learning_rate": 0.00018944870822287956, | |
| "loss": 1.581, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.2436548223350254, | |
| "grad_norm": 0.26359617710113525, | |
| "learning_rate": 0.00018826787983255473, | |
| "loss": 1.4674, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.25103830179972314, | |
| "grad_norm": 0.30446046590805054, | |
| "learning_rate": 0.00018702852410301554, | |
| "loss": 1.5038, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 34 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 135, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 34, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6.17292722381783e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |