| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.7531149053991694, | |
| "eval_steps": 500, | |
| "global_step": 102, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.007383479464697739, | |
| "grad_norm": 1.9591929912567139, | |
| "learning_rate": 0.0, | |
| "loss": 1.6228, | |
| "memory/device_mem_reserved(gib)": 21.61, | |
| "memory/max_mem_active(gib)": 21.2, | |
| "memory/max_mem_allocated(gib)": 21.2, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.014766958929395477, | |
| "grad_norm": 1.4523507356643677, | |
| "learning_rate": 1.5384615384615387e-05, | |
| "loss": 1.5769, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.022150438394093218, | |
| "grad_norm": 1.1918187141418457, | |
| "learning_rate": 3.0769230769230774e-05, | |
| "loss": 1.5435, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.029533917858790955, | |
| "grad_norm": 0.8260876536369324, | |
| "learning_rate": 4.615384615384616e-05, | |
| "loss": 1.6523, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.03691739732348869, | |
| "grad_norm": 0.8584926128387451, | |
| "learning_rate": 6.153846153846155e-05, | |
| "loss": 1.5745, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.044300876788186436, | |
| "grad_norm": 0.6466429829597473, | |
| "learning_rate": 7.692307692307693e-05, | |
| "loss": 1.4759, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.05168435625288417, | |
| "grad_norm": 0.5014482140541077, | |
| "learning_rate": 9.230769230769232e-05, | |
| "loss": 1.602, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.05906783571758191, | |
| "grad_norm": 0.6017433404922485, | |
| "learning_rate": 0.0001076923076923077, | |
| "loss": 1.4176, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.06645131518227965, | |
| "grad_norm": 0.4612258970737457, | |
| "learning_rate": 0.0001230769230769231, | |
| "loss": 1.5819, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.07383479464697738, | |
| "grad_norm": 0.4430214464664459, | |
| "learning_rate": 0.00013846153846153847, | |
| "loss": 1.561, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.08121827411167512, | |
| "grad_norm": 0.3746771216392517, | |
| "learning_rate": 0.00015384615384615385, | |
| "loss": 1.6744, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.08860175357637287, | |
| "grad_norm": 0.38248857855796814, | |
| "learning_rate": 0.00016923076923076923, | |
| "loss": 1.5629, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.09598523304107061, | |
| "grad_norm": 0.515844464302063, | |
| "learning_rate": 0.00018461538461538463, | |
| "loss": 1.5264, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.10336871250576835, | |
| "grad_norm": 0.3964424431324005, | |
| "learning_rate": 0.0002, | |
| "loss": 1.5398, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.11075219197046608, | |
| "grad_norm": 0.4010593891143799, | |
| "learning_rate": 0.0001999668467514313, | |
| "loss": 1.4618, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.11813567143516382, | |
| "grad_norm": 0.3192802965641022, | |
| "learning_rate": 0.00019986740898848306, | |
| "loss": 1.6994, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.12551915089986157, | |
| "grad_norm": 0.410099059343338, | |
| "learning_rate": 0.00019970175264485266, | |
| "loss": 1.5913, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.1329026303645593, | |
| "grad_norm": 0.312429815530777, | |
| "learning_rate": 0.0001994699875614589, | |
| "loss": 1.5701, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.14028610982925704, | |
| "grad_norm": 0.2831230163574219, | |
| "learning_rate": 0.00019917226741361015, | |
| "loss": 1.5744, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.14766958929395477, | |
| "grad_norm": 0.3618868291378021, | |
| "learning_rate": 0.00019880878960910772, | |
| "loss": 1.5185, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.15505306875865252, | |
| "grad_norm": 0.3151628077030182, | |
| "learning_rate": 0.00019837979515735166, | |
| "loss": 1.5086, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.16243654822335024, | |
| "grad_norm": 0.31955838203430176, | |
| "learning_rate": 0.0001978855685095358, | |
| "loss": 1.6329, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.169820027688048, | |
| "grad_norm": 0.3030437231063843, | |
| "learning_rate": 0.00019732643737003827, | |
| "loss": 1.6697, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.17720350715274574, | |
| "grad_norm": 0.41288134455680847, | |
| "learning_rate": 0.00019670277247913205, | |
| "loss": 1.7094, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.18458698661744347, | |
| "grad_norm": 0.2887294888496399, | |
| "learning_rate": 0.00019601498736716017, | |
| "loss": 1.5554, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.19197046608214122, | |
| "grad_norm": 0.3173791170120239, | |
| "learning_rate": 0.00019526353808033825, | |
| "loss": 1.4404, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.19935394554683894, | |
| "grad_norm": 0.2877439558506012, | |
| "learning_rate": 0.00019444892287836613, | |
| "loss": 1.4766, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.2067374250115367, | |
| "grad_norm": 0.29286038875579834, | |
| "learning_rate": 0.00019357168190404936, | |
| "loss": 1.5156, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.2141209044762344, | |
| "grad_norm": 0.27713659405708313, | |
| "learning_rate": 0.00019263239682514952, | |
| "loss": 1.5153, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.22150438394093216, | |
| "grad_norm": 0.29187655448913574, | |
| "learning_rate": 0.0001916316904487005, | |
| "loss": 1.6036, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.22888786340562992, | |
| "grad_norm": 0.2671583890914917, | |
| "learning_rate": 0.00019057022630804716, | |
| "loss": 1.4675, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.23627134287032764, | |
| "grad_norm": 0.2679831087589264, | |
| "learning_rate": 0.00018944870822287956, | |
| "loss": 1.581, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.2436548223350254, | |
| "grad_norm": 0.26359617710113525, | |
| "learning_rate": 0.00018826787983255473, | |
| "loss": 1.4674, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.25103830179972314, | |
| "grad_norm": 0.30446046590805054, | |
| "learning_rate": 0.00018702852410301554, | |
| "loss": 1.5038, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.25842178126442084, | |
| "grad_norm": 0.3004315197467804, | |
| "learning_rate": 0.00018573146280763324, | |
| "loss": 1.4024, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.2658052607291186, | |
| "grad_norm": 0.27353399991989136, | |
| "learning_rate": 0.00018437755598231856, | |
| "loss": 1.3652, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.27318874019381634, | |
| "grad_norm": 0.2659265995025635, | |
| "learning_rate": 0.0001829677013552619, | |
| "loss": 1.4905, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.2805722196585141, | |
| "grad_norm": 0.2703750431537628, | |
| "learning_rate": 0.00018150283375168114, | |
| "loss": 1.3298, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.28795569912321184, | |
| "grad_norm": 0.29322877526283264, | |
| "learning_rate": 0.00017998392447397197, | |
| "loss": 1.3639, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.29533917858790953, | |
| "grad_norm": 0.26927265524864197, | |
| "learning_rate": 0.00017841198065767107, | |
| "loss": 1.4849, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.3027226580526073, | |
| "grad_norm": 0.26683205366134644, | |
| "learning_rate": 0.00017678804460366, | |
| "loss": 1.4523, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.31010613751730504, | |
| "grad_norm": 0.26909339427948, | |
| "learning_rate": 0.00017511319308705198, | |
| "loss": 1.6047, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.3174896169820028, | |
| "grad_norm": 0.2938016355037689, | |
| "learning_rate": 0.00017338853664321992, | |
| "loss": 1.3858, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.3248730964467005, | |
| "grad_norm": 0.28290048241615295, | |
| "learning_rate": 0.00017161521883143934, | |
| "loss": 1.3671, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.33225657591139823, | |
| "grad_norm": 0.2870519161224365, | |
| "learning_rate": 0.00016979441547663435, | |
| "loss": 1.5334, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.339640055376096, | |
| "grad_norm": 0.26375913619995117, | |
| "learning_rate": 0.00016792733388972932, | |
| "loss": 1.5627, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.34702353484079373, | |
| "grad_norm": 0.26975446939468384, | |
| "learning_rate": 0.00016601521206712318, | |
| "loss": 1.4987, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.3544070143054915, | |
| "grad_norm": 0.30166226625442505, | |
| "learning_rate": 0.00016405931786981755, | |
| "loss": 1.5128, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.3617904937701892, | |
| "grad_norm": 0.29190436005592346, | |
| "learning_rate": 0.00016206094818274229, | |
| "loss": 1.4914, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.36917397323488693, | |
| "grad_norm": 0.2847207486629486, | |
| "learning_rate": 0.00016002142805483685, | |
| "loss": 1.4755, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.3765574526995847, | |
| "grad_norm": 0.2877140939235687, | |
| "learning_rate": 0.00015794210982045636, | |
| "loss": 1.422, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.38394093216428243, | |
| "grad_norm": 0.2705255150794983, | |
| "learning_rate": 0.00015582437220268647, | |
| "loss": 1.4822, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.3913244116289802, | |
| "grad_norm": 0.2597866654396057, | |
| "learning_rate": 0.00015366961939916008, | |
| "loss": 1.5088, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.3987078910936779, | |
| "grad_norm": 0.2965547740459442, | |
| "learning_rate": 0.0001514792801509831, | |
| "loss": 1.5346, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.40609137055837563, | |
| "grad_norm": 0.2670862078666687, | |
| "learning_rate": 0.00014925480679538647, | |
| "loss": 1.5695, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.4134748500230734, | |
| "grad_norm": 0.30679062008857727, | |
| "learning_rate": 0.000146997674302732, | |
| "loss": 1.6911, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.42085832948777113, | |
| "grad_norm": 0.2812528908252716, | |
| "learning_rate": 0.0001447093792985114, | |
| "loss": 1.664, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.4282418089524688, | |
| "grad_norm": 0.26079821586608887, | |
| "learning_rate": 0.0001423914390709861, | |
| "loss": 1.3911, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.4356252884171666, | |
| "grad_norm": 0.3026648461818695, | |
| "learning_rate": 0.00014004539056512667, | |
| "loss": 1.5685, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.44300876788186433, | |
| "grad_norm": 0.28435277938842773, | |
| "learning_rate": 0.00013767278936351854, | |
| "loss": 1.3599, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.4503922473465621, | |
| "grad_norm": 0.29877325892448425, | |
| "learning_rate": 0.0001352752086549095, | |
| "loss": 1.5173, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.45777572681125983, | |
| "grad_norm": 0.29666033387184143, | |
| "learning_rate": 0.0001328542381910835, | |
| "loss": 1.5017, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.4651592062759575, | |
| "grad_norm": 0.26934438943862915, | |
| "learning_rate": 0.0001304114832327518, | |
| "loss": 1.4674, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.4725426857406553, | |
| "grad_norm": 0.30038923025131226, | |
| "learning_rate": 0.00012794856348516095, | |
| "loss": 1.5306, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.47992616520535303, | |
| "grad_norm": 0.2799171805381775, | |
| "learning_rate": 0.00012546711202412287, | |
| "loss": 1.4696, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.4873096446700508, | |
| "grad_norm": 0.313919335603714, | |
| "learning_rate": 0.0001229687742131796, | |
| "loss": 1.5927, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.4946931241347485, | |
| "grad_norm": 0.27579808235168457, | |
| "learning_rate": 0.0001204552066126201, | |
| "loss": 1.4654, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.5020766035994463, | |
| "grad_norm": 0.28434908390045166, | |
| "learning_rate": 0.00011792807588107357, | |
| "loss": 1.5039, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.509460083064144, | |
| "grad_norm": 0.2828267514705658, | |
| "learning_rate": 0.0001153890576704062, | |
| "loss": 1.6203, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.5168435625288417, | |
| "grad_norm": 0.30293530225753784, | |
| "learning_rate": 0.00011283983551465511, | |
| "loss": 1.5628, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.5242270419935394, | |
| "grad_norm": 0.2633240520954132, | |
| "learning_rate": 0.00011028209971373605, | |
| "loss": 1.5666, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.5316105214582372, | |
| "grad_norm": 0.24525777995586395, | |
| "learning_rate": 0.00010771754621266466, | |
| "loss": 1.592, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.5389940009229349, | |
| "grad_norm": 0.27619901299476624, | |
| "learning_rate": 0.00010514787547703466, | |
| "loss": 1.6212, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.5463774803876327, | |
| "grad_norm": 0.2783447504043579, | |
| "learning_rate": 0.00010257479136549889, | |
| "loss": 1.457, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.5537609598523304, | |
| "grad_norm": 0.2755623459815979, | |
| "learning_rate": 0.0001, | |
| "loss": 1.3652, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.5611444393170282, | |
| "grad_norm": 0.28049108386039734, | |
| "learning_rate": 9.742520863450115e-05, | |
| "loss": 1.4187, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.5685279187817259, | |
| "grad_norm": 0.27499496936798096, | |
| "learning_rate": 9.485212452296535e-05, | |
| "loss": 1.5488, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.5759113982464237, | |
| "grad_norm": 0.288296639919281, | |
| "learning_rate": 9.228245378733537e-05, | |
| "loss": 1.412, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.5832948777111213, | |
| "grad_norm": 0.35849323868751526, | |
| "learning_rate": 8.971790028626395e-05, | |
| "loss": 1.5214, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.5906783571758191, | |
| "grad_norm": 0.2951800525188446, | |
| "learning_rate": 8.71601644853449e-05, | |
| "loss": 1.4837, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.5980618366405168, | |
| "grad_norm": 0.2763918340206146, | |
| "learning_rate": 8.461094232959381e-05, | |
| "loss": 1.5419, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.6054453161052146, | |
| "grad_norm": 0.2745305895805359, | |
| "learning_rate": 8.207192411892646e-05, | |
| "loss": 1.4921, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.6128287955699123, | |
| "grad_norm": 0.30081912875175476, | |
| "learning_rate": 7.954479338737995e-05, | |
| "loss": 1.4745, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.6202122750346101, | |
| "grad_norm": 0.27640482783317566, | |
| "learning_rate": 7.703122578682046e-05, | |
| "loss": 1.4461, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.6275957544993078, | |
| "grad_norm": 0.3407949209213257, | |
| "learning_rate": 7.453288797587714e-05, | |
| "loss": 1.617, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.6349792339640056, | |
| "grad_norm": 0.2856479287147522, | |
| "learning_rate": 7.205143651483906e-05, | |
| "loss": 1.6538, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.6423627134287033, | |
| "grad_norm": 0.39668434858322144, | |
| "learning_rate": 6.958851676724823e-05, | |
| "loss": 1.5753, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.649746192893401, | |
| "grad_norm": 0.28521108627319336, | |
| "learning_rate": 6.714576180891654e-05, | |
| "loss": 1.3448, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.6571296723580987, | |
| "grad_norm": 0.2806543707847595, | |
| "learning_rate": 6.472479134509052e-05, | |
| "loss": 1.6431, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.6645131518227965, | |
| "grad_norm": 0.28897204995155334, | |
| "learning_rate": 6.232721063648148e-05, | |
| "loss": 1.5744, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.6718966312874942, | |
| "grad_norm": 0.25295817852020264, | |
| "learning_rate": 5.9954609434873344e-05, | |
| "loss": 1.4552, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.679280110752192, | |
| "grad_norm": 0.2824646234512329, | |
| "learning_rate": 5.7608560929013946e-05, | |
| "loss": 1.4193, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.6866635902168897, | |
| "grad_norm": 0.27227288484573364, | |
| "learning_rate": 5.5290620701488594e-05, | |
| "loss": 1.4602, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.6940470696815875, | |
| "grad_norm": 0.26284658908843994, | |
| "learning_rate": 5.300232569726804e-05, | |
| "loss": 1.5947, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.7014305491462852, | |
| "grad_norm": 0.26174792647361755, | |
| "learning_rate": 5.074519320461357e-05, | |
| "loss": 1.4122, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.708814028610983, | |
| "grad_norm": 0.3069747984409332, | |
| "learning_rate": 4.852071984901696e-05, | |
| "loss": 1.4805, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.7161975080756806, | |
| "grad_norm": 0.32007896900177, | |
| "learning_rate": 4.633038060083996e-05, | |
| "loss": 1.3798, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.7235809875403784, | |
| "grad_norm": 0.26006585359573364, | |
| "learning_rate": 4.417562779731355e-05, | |
| "loss": 1.4974, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.7309644670050761, | |
| "grad_norm": 0.2552710473537445, | |
| "learning_rate": 4.205789017954364e-05, | |
| "loss": 1.4851, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.7383479464697739, | |
| "grad_norm": 0.23933957517147064, | |
| "learning_rate": 3.997857194516319e-05, | |
| "loss": 1.5046, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.7457314259344716, | |
| "grad_norm": 0.2627595067024231, | |
| "learning_rate": 3.793905181725772e-05, | |
| "loss": 1.4958, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.7531149053991694, | |
| "grad_norm": 0.3138872981071472, | |
| "learning_rate": 3.594068213018249e-05, | |
| "loss": 1.4714, | |
| "memory/device_mem_reserved(gib)": 21.8, | |
| "memory/max_mem_active(gib)": 21.55, | |
| "memory/max_mem_allocated(gib)": 21.55, | |
| "step": 102 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 135, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 34, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.851878167145349e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |