| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.5020766035994463, | |
| "eval_steps": 500, | |
| "global_step": 68, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.007383479464697739, | |
| "grad_norm": 1.9591929912567139, | |
| "learning_rate": 0.0, | |
| "loss": 1.6228, | |
| "memory/device_mem_reserved(gib)": 21.61, | |
| "memory/max_mem_active(gib)": 21.2, | |
| "memory/max_mem_allocated(gib)": 21.2, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.014766958929395477, | |
| "grad_norm": 1.4523507356643677, | |
| "learning_rate": 1.5384615384615387e-05, | |
| "loss": 1.5769, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.022150438394093218, | |
| "grad_norm": 1.1918187141418457, | |
| "learning_rate": 3.0769230769230774e-05, | |
| "loss": 1.5435, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.029533917858790955, | |
| "grad_norm": 0.8260876536369324, | |
| "learning_rate": 4.615384615384616e-05, | |
| "loss": 1.6523, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.03691739732348869, | |
| "grad_norm": 0.8584926128387451, | |
| "learning_rate": 6.153846153846155e-05, | |
| "loss": 1.5745, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.044300876788186436, | |
| "grad_norm": 0.6466429829597473, | |
| "learning_rate": 7.692307692307693e-05, | |
| "loss": 1.4759, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.05168435625288417, | |
| "grad_norm": 0.5014482140541077, | |
| "learning_rate": 9.230769230769232e-05, | |
| "loss": 1.602, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.05906783571758191, | |
| "grad_norm": 0.6017433404922485, | |
| "learning_rate": 0.0001076923076923077, | |
| "loss": 1.4176, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.06645131518227965, | |
| "grad_norm": 0.4612258970737457, | |
| "learning_rate": 0.0001230769230769231, | |
| "loss": 1.5819, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.07383479464697738, | |
| "grad_norm": 0.4430214464664459, | |
| "learning_rate": 0.00013846153846153847, | |
| "loss": 1.561, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.08121827411167512, | |
| "grad_norm": 0.3746771216392517, | |
| "learning_rate": 0.00015384615384615385, | |
| "loss": 1.6744, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.08860175357637287, | |
| "grad_norm": 0.38248857855796814, | |
| "learning_rate": 0.00016923076923076923, | |
| "loss": 1.5629, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.09598523304107061, | |
| "grad_norm": 0.515844464302063, | |
| "learning_rate": 0.00018461538461538463, | |
| "loss": 1.5264, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.10336871250576835, | |
| "grad_norm": 0.3964424431324005, | |
| "learning_rate": 0.0002, | |
| "loss": 1.5398, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.11075219197046608, | |
| "grad_norm": 0.4010593891143799, | |
| "learning_rate": 0.0001999668467514313, | |
| "loss": 1.4618, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.11813567143516382, | |
| "grad_norm": 0.3192802965641022, | |
| "learning_rate": 0.00019986740898848306, | |
| "loss": 1.6994, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.12551915089986157, | |
| "grad_norm": 0.410099059343338, | |
| "learning_rate": 0.00019970175264485266, | |
| "loss": 1.5913, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.1329026303645593, | |
| "grad_norm": 0.312429815530777, | |
| "learning_rate": 0.0001994699875614589, | |
| "loss": 1.5701, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.14028610982925704, | |
| "grad_norm": 0.2831230163574219, | |
| "learning_rate": 0.00019917226741361015, | |
| "loss": 1.5744, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.14766958929395477, | |
| "grad_norm": 0.3618868291378021, | |
| "learning_rate": 0.00019880878960910772, | |
| "loss": 1.5185, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.15505306875865252, | |
| "grad_norm": 0.3151628077030182, | |
| "learning_rate": 0.00019837979515735166, | |
| "loss": 1.5086, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.16243654822335024, | |
| "grad_norm": 0.31955838203430176, | |
| "learning_rate": 0.0001978855685095358, | |
| "loss": 1.6329, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.169820027688048, | |
| "grad_norm": 0.3030437231063843, | |
| "learning_rate": 0.00019732643737003827, | |
| "loss": 1.6697, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.17720350715274574, | |
| "grad_norm": 0.41288134455680847, | |
| "learning_rate": 0.00019670277247913205, | |
| "loss": 1.7094, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.18458698661744347, | |
| "grad_norm": 0.2887294888496399, | |
| "learning_rate": 0.00019601498736716017, | |
| "loss": 1.5554, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.19197046608214122, | |
| "grad_norm": 0.3173791170120239, | |
| "learning_rate": 0.00019526353808033825, | |
| "loss": 1.4404, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.19935394554683894, | |
| "grad_norm": 0.2877439558506012, | |
| "learning_rate": 0.00019444892287836613, | |
| "loss": 1.4766, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.2067374250115367, | |
| "grad_norm": 0.29286038875579834, | |
| "learning_rate": 0.00019357168190404936, | |
| "loss": 1.5156, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.2141209044762344, | |
| "grad_norm": 0.27713659405708313, | |
| "learning_rate": 0.00019263239682514952, | |
| "loss": 1.5153, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.22150438394093216, | |
| "grad_norm": 0.29187655448913574, | |
| "learning_rate": 0.0001916316904487005, | |
| "loss": 1.6036, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.22888786340562992, | |
| "grad_norm": 0.2671583890914917, | |
| "learning_rate": 0.00019057022630804716, | |
| "loss": 1.4675, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.23627134287032764, | |
| "grad_norm": 0.2679831087589264, | |
| "learning_rate": 0.00018944870822287956, | |
| "loss": 1.581, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.2436548223350254, | |
| "grad_norm": 0.26359617710113525, | |
| "learning_rate": 0.00018826787983255473, | |
| "loss": 1.4674, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.25103830179972314, | |
| "grad_norm": 0.30446046590805054, | |
| "learning_rate": 0.00018702852410301554, | |
| "loss": 1.5038, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.25842178126442084, | |
| "grad_norm": 0.3004315197467804, | |
| "learning_rate": 0.00018573146280763324, | |
| "loss": 1.4024, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.2658052607291186, | |
| "grad_norm": 0.27353399991989136, | |
| "learning_rate": 0.00018437755598231856, | |
| "loss": 1.3652, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.27318874019381634, | |
| "grad_norm": 0.2659265995025635, | |
| "learning_rate": 0.0001829677013552619, | |
| "loss": 1.4905, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.2805722196585141, | |
| "grad_norm": 0.2703750431537628, | |
| "learning_rate": 0.00018150283375168114, | |
| "loss": 1.3298, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.28795569912321184, | |
| "grad_norm": 0.29322877526283264, | |
| "learning_rate": 0.00017998392447397197, | |
| "loss": 1.3639, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.29533917858790953, | |
| "grad_norm": 0.26927265524864197, | |
| "learning_rate": 0.00017841198065767107, | |
| "loss": 1.4849, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.3027226580526073, | |
| "grad_norm": 0.26683205366134644, | |
| "learning_rate": 0.00017678804460366, | |
| "loss": 1.4523, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.31010613751730504, | |
| "grad_norm": 0.26909339427948, | |
| "learning_rate": 0.00017511319308705198, | |
| "loss": 1.6047, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.3174896169820028, | |
| "grad_norm": 0.2938016355037689, | |
| "learning_rate": 0.00017338853664321992, | |
| "loss": 1.3858, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.3248730964467005, | |
| "grad_norm": 0.28290048241615295, | |
| "learning_rate": 0.00017161521883143934, | |
| "loss": 1.3671, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.33225657591139823, | |
| "grad_norm": 0.2870519161224365, | |
| "learning_rate": 0.00016979441547663435, | |
| "loss": 1.5334, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.339640055376096, | |
| "grad_norm": 0.26375913619995117, | |
| "learning_rate": 0.00016792733388972932, | |
| "loss": 1.5627, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.34702353484079373, | |
| "grad_norm": 0.26975446939468384, | |
| "learning_rate": 0.00016601521206712318, | |
| "loss": 1.4987, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.3544070143054915, | |
| "grad_norm": 0.30166226625442505, | |
| "learning_rate": 0.00016405931786981755, | |
| "loss": 1.5128, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.3617904937701892, | |
| "grad_norm": 0.29190436005592346, | |
| "learning_rate": 0.00016206094818274229, | |
| "loss": 1.4914, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.36917397323488693, | |
| "grad_norm": 0.2847207486629486, | |
| "learning_rate": 0.00016002142805483685, | |
| "loss": 1.4755, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.3765574526995847, | |
| "grad_norm": 0.2877140939235687, | |
| "learning_rate": 0.00015794210982045636, | |
| "loss": 1.422, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.38394093216428243, | |
| "grad_norm": 0.2705255150794983, | |
| "learning_rate": 0.00015582437220268647, | |
| "loss": 1.4822, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.3913244116289802, | |
| "grad_norm": 0.2597866654396057, | |
| "learning_rate": 0.00015366961939916008, | |
| "loss": 1.5088, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.3987078910936779, | |
| "grad_norm": 0.2965547740459442, | |
| "learning_rate": 0.0001514792801509831, | |
| "loss": 1.5346, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.40609137055837563, | |
| "grad_norm": 0.2670862078666687, | |
| "learning_rate": 0.00014925480679538647, | |
| "loss": 1.5695, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.4134748500230734, | |
| "grad_norm": 0.30679062008857727, | |
| "learning_rate": 0.000146997674302732, | |
| "loss": 1.6911, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.42085832948777113, | |
| "grad_norm": 0.2812528908252716, | |
| "learning_rate": 0.0001447093792985114, | |
| "loss": 1.664, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.4282418089524688, | |
| "grad_norm": 0.26079821586608887, | |
| "learning_rate": 0.0001423914390709861, | |
| "loss": 1.3911, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.4356252884171666, | |
| "grad_norm": 0.3026648461818695, | |
| "learning_rate": 0.00014004539056512667, | |
| "loss": 1.5685, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.44300876788186433, | |
| "grad_norm": 0.28435277938842773, | |
| "learning_rate": 0.00013767278936351854, | |
| "loss": 1.3599, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.4503922473465621, | |
| "grad_norm": 0.29877325892448425, | |
| "learning_rate": 0.0001352752086549095, | |
| "loss": 1.5173, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.45777572681125983, | |
| "grad_norm": 0.29666033387184143, | |
| "learning_rate": 0.0001328542381910835, | |
| "loss": 1.5017, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.4651592062759575, | |
| "grad_norm": 0.26934438943862915, | |
| "learning_rate": 0.0001304114832327518, | |
| "loss": 1.4674, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.4725426857406553, | |
| "grad_norm": 0.30038923025131226, | |
| "learning_rate": 0.00012794856348516095, | |
| "loss": 1.5306, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.47992616520535303, | |
| "grad_norm": 0.2799171805381775, | |
| "learning_rate": 0.00012546711202412287, | |
| "loss": 1.4696, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.4873096446700508, | |
| "grad_norm": 0.313919335603714, | |
| "learning_rate": 0.0001229687742131796, | |
| "loss": 1.5927, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.4946931241347485, | |
| "grad_norm": 0.27579808235168457, | |
| "learning_rate": 0.0001204552066126201, | |
| "loss": 1.4654, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.5020766035994463, | |
| "grad_norm": 0.28434908390045166, | |
| "learning_rate": 0.00011792807588107357, | |
| "loss": 1.5039, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 68 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 135, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 34, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.234585444763566e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |