| { |
| "best_metric": 0.60154372, |
| "best_model_checkpoint": "/nfs4/models/Qwen2.5-VL/Reject_sft_Qwen2.5-VL-3B-Instruct/v7-20250617-161549/checkpoint-800", |
| "epoch": 181.8372093023256, |
| "eval_steps": 200, |
| "global_step": 2000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.09302325581395349, |
| "grad_norm": 2.441588224180784, |
| "learning_rate": 2e-09, |
| "loss": 0.7878831624984741, |
| "memory(GiB)": 65.48, |
| "step": 1, |
| "token_acc": 0.7861313342463778, |
| "train_speed(iter/s)": 0.055435 |
| }, |
| { |
| "epoch": 0.46511627906976744, |
| "grad_norm": 2.8780390737909576, |
| "learning_rate": 1e-08, |
| "loss": 0.8473173379898071, |
| "memory(GiB)": 65.5, |
| "step": 5, |
| "token_acc": 0.7882713944766451, |
| "train_speed(iter/s)": 0.101539 |
| }, |
| { |
| "epoch": 0.9302325581395349, |
| "grad_norm": 2.5438182772777616, |
| "learning_rate": 2e-08, |
| "loss": 0.8371296882629394, |
| "memory(GiB)": 66.93, |
| "step": 10, |
| "token_acc": 0.7700506562717737, |
| "train_speed(iter/s)": 0.110961 |
| }, |
| { |
| "epoch": 1.372093023255814, |
| "grad_norm": 2.4572297135546735, |
| "learning_rate": 3e-08, |
| "loss": 0.8263990402221679, |
| "memory(GiB)": 66.93, |
| "step": 15, |
| "token_acc": 0.7914247785857225, |
| "train_speed(iter/s)": 0.118864 |
| }, |
| { |
| "epoch": 1.8372093023255816, |
| "grad_norm": 2.817513169380205, |
| "learning_rate": 4e-08, |
| "loss": 0.8524192810058594, |
| "memory(GiB)": 66.93, |
| "step": 20, |
| "token_acc": 0.8086610622604439, |
| "train_speed(iter/s)": 0.120417 |
| }, |
| { |
| "epoch": 2.2790697674418605, |
| "grad_norm": 2.5728578917158496, |
| "learning_rate": 5e-08, |
| "loss": 0.8347753524780274, |
| "memory(GiB)": 66.93, |
| "step": 25, |
| "token_acc": 0.7975967163791022, |
| "train_speed(iter/s)": 0.1197 |
| }, |
| { |
| "epoch": 2.744186046511628, |
| "grad_norm": 7.697786718127836, |
| "learning_rate": 6e-08, |
| "loss": 0.8387296676635743, |
| "memory(GiB)": 66.93, |
| "step": 30, |
| "token_acc": 0.7844551282051282, |
| "train_speed(iter/s)": 0.121618 |
| }, |
| { |
| "epoch": 3.186046511627907, |
| "grad_norm": 2.6215878302524973, |
| "learning_rate": 6.999999999999999e-08, |
| "loss": 0.8212770462036133, |
| "memory(GiB)": 66.94, |
| "step": 35, |
| "token_acc": 0.7918978074644326, |
| "train_speed(iter/s)": 0.121919 |
| }, |
| { |
| "epoch": 3.6511627906976747, |
| "grad_norm": 2.3356396729850886, |
| "learning_rate": 8e-08, |
| "loss": 0.8247488021850586, |
| "memory(GiB)": 66.94, |
| "step": 40, |
| "token_acc": 0.7788534837627688, |
| "train_speed(iter/s)": 0.122256 |
| }, |
| { |
| "epoch": 4.093023255813954, |
| "grad_norm": 2.24036454294963, |
| "learning_rate": 9e-08, |
| "loss": 0.8493685722351074, |
| "memory(GiB)": 66.94, |
| "step": 45, |
| "token_acc": 0.7944452759188386, |
| "train_speed(iter/s)": 0.124476 |
| }, |
| { |
| "epoch": 4.558139534883721, |
| "grad_norm": 2.3710774237116135, |
| "learning_rate": 1e-07, |
| "loss": 0.8277470588684082, |
| "memory(GiB)": 66.94, |
| "step": 50, |
| "token_acc": 0.8012501821832845, |
| "train_speed(iter/s)": 0.124595 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 3.0036536630441435, |
| "learning_rate": 1.1e-07, |
| "loss": 0.8450939178466796, |
| "memory(GiB)": 66.94, |
| "step": 55, |
| "token_acc": 0.7760649403867543, |
| "train_speed(iter/s)": 0.125053 |
| }, |
| { |
| "epoch": 5.465116279069767, |
| "grad_norm": 2.5881257114238627, |
| "learning_rate": 1.2e-07, |
| "loss": 0.8497460365295411, |
| "memory(GiB)": 66.94, |
| "step": 60, |
| "token_acc": 0.796596503868389, |
| "train_speed(iter/s)": 0.125941 |
| }, |
| { |
| "epoch": 5.930232558139535, |
| "grad_norm": 2.3744096454020376, |
| "learning_rate": 1.3e-07, |
| "loss": 0.8116294860839843, |
| "memory(GiB)": 66.94, |
| "step": 65, |
| "token_acc": 0.7651333807767786, |
| "train_speed(iter/s)": 0.125394 |
| }, |
| { |
| "epoch": 6.372093023255814, |
| "grad_norm": 2.235166994874194, |
| "learning_rate": 1.3999999999999998e-07, |
| "loss": 0.8369662284851074, |
| "memory(GiB)": 66.94, |
| "step": 70, |
| "token_acc": 0.7880281843764316, |
| "train_speed(iter/s)": 0.126206 |
| }, |
| { |
| "epoch": 6.837209302325581, |
| "grad_norm": 8.546638989645471, |
| "learning_rate": 1.5e-07, |
| "loss": 0.8286456108093262, |
| "memory(GiB)": 66.94, |
| "step": 75, |
| "token_acc": 0.7748851144806365, |
| "train_speed(iter/s)": 0.126162 |
| }, |
| { |
| "epoch": 7.27906976744186, |
| "grad_norm": 2.1654224438993, |
| "learning_rate": 1.6e-07, |
| "loss": 0.8329730033874512, |
| "memory(GiB)": 66.94, |
| "step": 80, |
| "token_acc": 0.80111933970264, |
| "train_speed(iter/s)": 0.126637 |
| }, |
| { |
| "epoch": 7.7441860465116275, |
| "grad_norm": 3.346508648878843, |
| "learning_rate": 1.7e-07, |
| "loss": 0.8077556610107421, |
| "memory(GiB)": 66.94, |
| "step": 85, |
| "token_acc": 0.7893712675300275, |
| "train_speed(iter/s)": 0.126958 |
| }, |
| { |
| "epoch": 8.186046511627907, |
| "grad_norm": 2.1195939617622908, |
| "learning_rate": 1.8e-07, |
| "loss": 0.8190940856933594, |
| "memory(GiB)": 66.94, |
| "step": 90, |
| "token_acc": 0.7837902316300859, |
| "train_speed(iter/s)": 0.127783 |
| }, |
| { |
| "epoch": 8.651162790697674, |
| "grad_norm": 2.196504569870541, |
| "learning_rate": 1.8999999999999998e-07, |
| "loss": 0.7897569179534912, |
| "memory(GiB)": 66.94, |
| "step": 95, |
| "token_acc": 0.8031453890349596, |
| "train_speed(iter/s)": 0.12748 |
| }, |
| { |
| "epoch": 9.093023255813954, |
| "grad_norm": 2.4902973786655798, |
| "learning_rate": 2e-07, |
| "loss": 0.8305625915527344, |
| "memory(GiB)": 66.94, |
| "step": 100, |
| "token_acc": 0.7491283167239546, |
| "train_speed(iter/s)": 0.127599 |
| }, |
| { |
| "epoch": 9.55813953488372, |
| "grad_norm": 1.9236232576368646, |
| "learning_rate": 1.9999658256641745e-07, |
| "loss": 0.8344329833984375, |
| "memory(GiB)": 66.94, |
| "step": 105, |
| "token_acc": 0.7713534087092802, |
| "train_speed(iter/s)": 0.128253 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 1.9264695576885342, |
| "learning_rate": 1.999863304992469e-07, |
| "loss": 0.772977876663208, |
| "memory(GiB)": 66.94, |
| "step": 110, |
| "token_acc": 0.7893902319663306, |
| "train_speed(iter/s)": 0.128413 |
| }, |
| { |
| "epoch": 10.465116279069768, |
| "grad_norm": 1.7921071186450859, |
| "learning_rate": 1.9996924449920347e-07, |
| "loss": 0.7723042488098144, |
| "memory(GiB)": 66.94, |
| "step": 115, |
| "token_acc": 0.8070967633232802, |
| "train_speed(iter/s)": 0.128698 |
| }, |
| { |
| "epoch": 10.930232558139535, |
| "grad_norm": 2.0488653924721487, |
| "learning_rate": 1.999453257340926e-07, |
| "loss": 0.805912971496582, |
| "memory(GiB)": 66.94, |
| "step": 120, |
| "token_acc": 0.7839763435738668, |
| "train_speed(iter/s)": 0.128633 |
| }, |
| { |
| "epoch": 11.372093023255815, |
| "grad_norm": 1.8887448764254238, |
| "learning_rate": 1.9991457583873009e-07, |
| "loss": 0.7916177272796631, |
| "memory(GiB)": 66.94, |
| "step": 125, |
| "token_acc": 0.7835127698472789, |
| "train_speed(iter/s)": 0.129044 |
| }, |
| { |
| "epoch": 11.837209302325581, |
| "grad_norm": 2.087347255237122, |
| "learning_rate": 1.9987699691483047e-07, |
| "loss": 0.7750067710876465, |
| "memory(GiB)": 66.94, |
| "step": 130, |
| "token_acc": 0.79361802524478, |
| "train_speed(iter/s)": 0.128698 |
| }, |
| { |
| "epoch": 12.279069767441861, |
| "grad_norm": 1.8497770530709863, |
| "learning_rate": 1.9983259153086325e-07, |
| "loss": 0.7334749698638916, |
| "memory(GiB)": 66.94, |
| "step": 135, |
| "token_acc": 0.8016341430131004, |
| "train_speed(iter/s)": 0.129134 |
| }, |
| { |
| "epoch": 12.744186046511627, |
| "grad_norm": 1.3780662997872353, |
| "learning_rate": 1.9978136272187745e-07, |
| "loss": 0.7617583274841309, |
| "memory(GiB)": 66.94, |
| "step": 140, |
| "token_acc": 0.8071877904067482, |
| "train_speed(iter/s)": 0.128965 |
| }, |
| { |
| "epoch": 13.186046511627907, |
| "grad_norm": 1.4470094463921936, |
| "learning_rate": 1.997233139892941e-07, |
| "loss": 0.7472479820251465, |
| "memory(GiB)": 66.94, |
| "step": 145, |
| "token_acc": 0.7839292328474627, |
| "train_speed(iter/s)": 0.129158 |
| }, |
| { |
| "epoch": 13.651162790697674, |
| "grad_norm": 1.969343282689861, |
| "learning_rate": 1.9965844930066698e-07, |
| "loss": 0.7178962707519532, |
| "memory(GiB)": 66.94, |
| "step": 150, |
| "token_acc": 0.7930578931176141, |
| "train_speed(iter/s)": 0.129381 |
| }, |
| { |
| "epoch": 14.093023255813954, |
| "grad_norm": 1.659886865517498, |
| "learning_rate": 1.9958677308941136e-07, |
| "loss": 0.7550750255584717, |
| "memory(GiB)": 66.94, |
| "step": 155, |
| "token_acc": 0.7681622703125359, |
| "train_speed(iter/s)": 0.129371 |
| }, |
| { |
| "epoch": 14.55813953488372, |
| "grad_norm": 1.3482878555174083, |
| "learning_rate": 1.9950829025450114e-07, |
| "loss": 0.7135652542114258, |
| "memory(GiB)": 66.94, |
| "step": 160, |
| "token_acc": 0.7849006160641636, |
| "train_speed(iter/s)": 0.129416 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 1.6524876656316168, |
| "learning_rate": 1.9942300616013377e-07, |
| "loss": 0.7475796699523926, |
| "memory(GiB)": 74.95, |
| "step": 165, |
| "token_acc": 0.796426354182834, |
| "train_speed(iter/s)": 0.129454 |
| }, |
| { |
| "epoch": 15.465116279069768, |
| "grad_norm": 1.4018266466879952, |
| "learning_rate": 1.993309266353638e-07, |
| "loss": 0.7252517223358155, |
| "memory(GiB)": 74.96, |
| "step": 170, |
| "token_acc": 0.8084311921640781, |
| "train_speed(iter/s)": 0.129786 |
| }, |
| { |
| "epoch": 15.930232558139535, |
| "grad_norm": 1.2395976325818243, |
| "learning_rate": 1.992320579737045e-07, |
| "loss": 0.7289777755737304, |
| "memory(GiB)": 74.96, |
| "step": 175, |
| "token_acc": 0.810318895442178, |
| "train_speed(iter/s)": 0.129608 |
| }, |
| { |
| "epoch": 16.372093023255815, |
| "grad_norm": 2.5675852224219553, |
| "learning_rate": 1.9912640693269751e-07, |
| "loss": 0.6915578365325927, |
| "memory(GiB)": 74.96, |
| "step": 180, |
| "token_acc": 0.7905717628859845, |
| "train_speed(iter/s)": 0.129489 |
| }, |
| { |
| "epoch": 16.837209302325583, |
| "grad_norm": 1.4358606025818346, |
| "learning_rate": 1.9901398073345117e-07, |
| "loss": 0.7248753547668457, |
| "memory(GiB)": 74.96, |
| "step": 185, |
| "token_acc": 0.8124335543968005, |
| "train_speed(iter/s)": 0.129395 |
| }, |
| { |
| "epoch": 17.27906976744186, |
| "grad_norm": 1.262748163163051, |
| "learning_rate": 1.9889478706014683e-07, |
| "loss": 0.7250626564025879, |
| "memory(GiB)": 74.96, |
| "step": 190, |
| "token_acc": 0.7930634826915087, |
| "train_speed(iter/s)": 0.129717 |
| }, |
| { |
| "epoch": 17.74418604651163, |
| "grad_norm": 1.467844482343943, |
| "learning_rate": 1.9876883405951376e-07, |
| "loss": 0.7151264190673828, |
| "memory(GiB)": 74.96, |
| "step": 195, |
| "token_acc": 0.8009663075081238, |
| "train_speed(iter/s)": 0.129659 |
| }, |
| { |
| "epoch": 18.186046511627907, |
| "grad_norm": 1.1965217379377007, |
| "learning_rate": 1.9863613034027222e-07, |
| "loss": 0.667814064025879, |
| "memory(GiB)": 74.96, |
| "step": 200, |
| "token_acc": 0.8150907451820857, |
| "train_speed(iter/s)": 0.12962 |
| }, |
| { |
| "epoch": 18.186046511627907, |
| "eval_loss": 0.708366334438324, |
| "eval_runtime": 0.7627, |
| "eval_samples_per_second": 17.045, |
| "eval_steps_per_second": 2.622, |
| "eval_token_acc": 0.8243126736277421, |
| "step": 200 |
| }, |
| { |
| "epoch": 18.651162790697676, |
| "grad_norm": 1.1855878327264966, |
| "learning_rate": 1.9849668497254518e-07, |
| "loss": 0.7150158882141113, |
| "memory(GiB)": 74.96, |
| "step": 205, |
| "token_acc": 0.8079891491231421, |
| "train_speed(iter/s)": 0.128041 |
| }, |
| { |
| "epoch": 19.093023255813954, |
| "grad_norm": 1.0438364306476957, |
| "learning_rate": 1.9835050748723822e-07, |
| "loss": 0.6731427669525146, |
| "memory(GiB)": 74.96, |
| "step": 210, |
| "token_acc": 0.8069213383230205, |
| "train_speed(iter/s)": 0.128417 |
| }, |
| { |
| "epoch": 19.558139534883722, |
| "grad_norm": 0.9853618641588676, |
| "learning_rate": 1.9819760787538837e-07, |
| "loss": 0.6843628883361816, |
| "memory(GiB)": 74.96, |
| "step": 215, |
| "token_acc": 0.8041327124563445, |
| "train_speed(iter/s)": 0.128537 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 1.1619660544446906, |
| "learning_rate": 1.9803799658748093e-07, |
| "loss": 0.6671229839324951, |
| "memory(GiB)": 74.97, |
| "step": 220, |
| "token_acc": 0.8175119885190241, |
| "train_speed(iter/s)": 0.12854 |
| }, |
| { |
| "epoch": 20.46511627906977, |
| "grad_norm": 1.2626864222948397, |
| "learning_rate": 1.9787168453273545e-07, |
| "loss": 0.6970132827758789, |
| "memory(GiB)": 74.97, |
| "step": 225, |
| "token_acc": 0.8270612144784331, |
| "train_speed(iter/s)": 0.128495 |
| }, |
| { |
| "epoch": 20.930232558139537, |
| "grad_norm": 0.7941657042448518, |
| "learning_rate": 1.9769868307835993e-07, |
| "loss": 0.6455688953399659, |
| "memory(GiB)": 74.97, |
| "step": 230, |
| "token_acc": 0.8348736389299637, |
| "train_speed(iter/s)": 0.128518 |
| }, |
| { |
| "epoch": 21.372093023255815, |
| "grad_norm": 1.1822694017861601, |
| "learning_rate": 1.9751900404877398e-07, |
| "loss": 0.6348400115966797, |
| "memory(GiB)": 74.97, |
| "step": 235, |
| "token_acc": 0.8331182941735705, |
| "train_speed(iter/s)": 0.128694 |
| }, |
| { |
| "epoch": 21.837209302325583, |
| "grad_norm": 0.9081549570182597, |
| "learning_rate": 1.9733265972480058e-07, |
| "loss": 0.6620560646057129, |
| "memory(GiB)": 74.97, |
| "step": 240, |
| "token_acc": 0.8267432385239223, |
| "train_speed(iter/s)": 0.128483 |
| }, |
| { |
| "epoch": 22.27906976744186, |
| "grad_norm": 0.8139654483754177, |
| "learning_rate": 1.9713966284282674e-07, |
| "loss": 0.6350464820861816, |
| "memory(GiB)": 74.97, |
| "step": 245, |
| "token_acc": 0.8140620540628695, |
| "train_speed(iter/s)": 0.128624 |
| }, |
| { |
| "epoch": 22.74418604651163, |
| "grad_norm": 1.39238370567191, |
| "learning_rate": 1.9694002659393302e-07, |
| "loss": 0.6755290031433105, |
| "memory(GiB)": 74.97, |
| "step": 250, |
| "token_acc": 0.8166282714604026, |
| "train_speed(iter/s)": 0.128733 |
| }, |
| { |
| "epoch": 23.186046511627907, |
| "grad_norm": 1.0689850821114422, |
| "learning_rate": 1.9673376462299182e-07, |
| "loss": 0.6278616905212402, |
| "memory(GiB)": 74.97, |
| "step": 255, |
| "token_acc": 0.8235556962260989, |
| "train_speed(iter/s)": 0.128805 |
| }, |
| { |
| "epoch": 23.651162790697676, |
| "grad_norm": 0.826203146475013, |
| "learning_rate": 1.9652089102773487e-07, |
| "loss": 0.6573570728302002, |
| "memory(GiB)": 74.97, |
| "step": 260, |
| "token_acc": 0.8203604745946925, |
| "train_speed(iter/s)": 0.128858 |
| }, |
| { |
| "epoch": 24.093023255813954, |
| "grad_norm": 1.0075863589078984, |
| "learning_rate": 1.963014203577896e-07, |
| "loss": 0.6461727619171143, |
| "memory(GiB)": 74.97, |
| "step": 265, |
| "token_acc": 0.799577569399313, |
| "train_speed(iter/s)": 0.128878 |
| }, |
| { |
| "epoch": 24.558139534883722, |
| "grad_norm": 1.1793630828397141, |
| "learning_rate": 1.9607536761368482e-07, |
| "loss": 0.634314775466919, |
| "memory(GiB)": 74.97, |
| "step": 270, |
| "token_acc": 0.7944581869582389, |
| "train_speed(iter/s)": 0.128808 |
| }, |
| { |
| "epoch": 25.0, |
| "grad_norm": 0.9698025031342606, |
| "learning_rate": 1.9584274824582527e-07, |
| "loss": 0.6515589714050293, |
| "memory(GiB)": 74.97, |
| "step": 275, |
| "token_acc": 0.8235917962402285, |
| "train_speed(iter/s)": 0.128916 |
| }, |
| { |
| "epoch": 25.46511627906977, |
| "grad_norm": 3.191013934790137, |
| "learning_rate": 1.9560357815343574e-07, |
| "loss": 0.6280710697174072, |
| "memory(GiB)": 74.97, |
| "step": 280, |
| "token_acc": 0.8348407138350025, |
| "train_speed(iter/s)": 0.128889 |
| }, |
| { |
| "epoch": 25.930232558139537, |
| "grad_norm": 2.6857457017550184, |
| "learning_rate": 1.9535787368347442e-07, |
| "loss": 0.6403141498565674, |
| "memory(GiB)": 74.97, |
| "step": 285, |
| "token_acc": 0.8304662656603196, |
| "train_speed(iter/s)": 0.128946 |
| }, |
| { |
| "epoch": 26.372093023255815, |
| "grad_norm": 0.7179384373982066, |
| "learning_rate": 1.9510565162951537e-07, |
| "loss": 0.6483189582824707, |
| "memory(GiB)": 74.97, |
| "step": 290, |
| "token_acc": 0.8042392190472208, |
| "train_speed(iter/s)": 0.129194 |
| }, |
| { |
| "epoch": 26.837209302325583, |
| "grad_norm": 0.8336349818317007, |
| "learning_rate": 1.9484692923060094e-07, |
| "loss": 0.6260199546813965, |
| "memory(GiB)": 74.97, |
| "step": 295, |
| "token_acc": 0.8142812170144997, |
| "train_speed(iter/s)": 0.129275 |
| }, |
| { |
| "epoch": 27.27906976744186, |
| "grad_norm": 0.8073425015755772, |
| "learning_rate": 1.9458172417006346e-07, |
| "loss": 0.6319057464599609, |
| "memory(GiB)": 74.97, |
| "step": 300, |
| "token_acc": 0.8167601892733382, |
| "train_speed(iter/s)": 0.129283 |
| }, |
| { |
| "epoch": 27.74418604651163, |
| "grad_norm": 0.8293440372694967, |
| "learning_rate": 1.943100545743165e-07, |
| "loss": 0.6321963310241699, |
| "memory(GiB)": 74.97, |
| "step": 305, |
| "token_acc": 0.8145223890527623, |
| "train_speed(iter/s)": 0.129158 |
| }, |
| { |
| "epoch": 28.186046511627907, |
| "grad_norm": 0.8851271223039491, |
| "learning_rate": 1.9403193901161612e-07, |
| "loss": 0.6186152935028076, |
| "memory(GiB)": 74.97, |
| "step": 310, |
| "token_acc": 0.8423929547525053, |
| "train_speed(iter/s)": 0.129305 |
| }, |
| { |
| "epoch": 28.651162790697676, |
| "grad_norm": 0.9560469073452553, |
| "learning_rate": 1.9374739649079154e-07, |
| "loss": 0.6388277053833008, |
| "memory(GiB)": 74.97, |
| "step": 315, |
| "token_acc": 0.8255307825359716, |
| "train_speed(iter/s)": 0.129291 |
| }, |
| { |
| "epoch": 29.093023255813954, |
| "grad_norm": 1.0797696361091218, |
| "learning_rate": 1.9345644645994608e-07, |
| "loss": 0.6270732879638672, |
| "memory(GiB)": 74.97, |
| "step": 320, |
| "token_acc": 0.8329987798638171, |
| "train_speed(iter/s)": 0.129427 |
| }, |
| { |
| "epoch": 29.558139534883722, |
| "grad_norm": 1.035746534298127, |
| "learning_rate": 1.9315910880512788e-07, |
| "loss": 0.6154883861541748, |
| "memory(GiB)": 74.97, |
| "step": 325, |
| "token_acc": 0.8229807039658683, |
| "train_speed(iter/s)": 0.129368 |
| }, |
| { |
| "epoch": 30.0, |
| "grad_norm": 0.9448004935095479, |
| "learning_rate": 1.928554038489707e-07, |
| "loss": 0.6246993541717529, |
| "memory(GiB)": 74.97, |
| "step": 330, |
| "token_acc": 0.8252855659397715, |
| "train_speed(iter/s)": 0.129558 |
| }, |
| { |
| "epoch": 30.46511627906977, |
| "grad_norm": 0.7400543933440672, |
| "learning_rate": 1.9254535234930483e-07, |
| "loss": 0.6015793323516846, |
| "memory(GiB)": 74.97, |
| "step": 335, |
| "token_acc": 0.8212677580369298, |
| "train_speed(iter/s)": 0.129568 |
| }, |
| { |
| "epoch": 30.930232558139537, |
| "grad_norm": 0.6862921067098382, |
| "learning_rate": 1.9222897549773846e-07, |
| "loss": 0.627756404876709, |
| "memory(GiB)": 74.97, |
| "step": 340, |
| "token_acc": 0.8131175537754646, |
| "train_speed(iter/s)": 0.129532 |
| }, |
| { |
| "epoch": 31.372093023255815, |
| "grad_norm": 1.0706787922118046, |
| "learning_rate": 1.9190629491820908e-07, |
| "loss": 0.6050760269165039, |
| "memory(GiB)": 74.97, |
| "step": 345, |
| "token_acc": 0.8153731376034056, |
| "train_speed(iter/s)": 0.129658 |
| }, |
| { |
| "epoch": 31.837209302325583, |
| "grad_norm": 0.7747208875253631, |
| "learning_rate": 1.9157733266550572e-07, |
| "loss": 0.6289189338684082, |
| "memory(GiB)": 74.97, |
| "step": 350, |
| "token_acc": 0.8139119876370594, |
| "train_speed(iter/s)": 0.129542 |
| }, |
| { |
| "epoch": 32.27906976744186, |
| "grad_norm": 0.773459886431363, |
| "learning_rate": 1.9124211122376135e-07, |
| "loss": 0.6157156944274902, |
| "memory(GiB)": 74.97, |
| "step": 355, |
| "token_acc": 0.8152114721365039, |
| "train_speed(iter/s)": 0.129801 |
| }, |
| { |
| "epoch": 32.74418604651163, |
| "grad_norm": 1.1738935206395225, |
| "learning_rate": 1.9090065350491624e-07, |
| "loss": 0.6239834785461426, |
| "memory(GiB)": 74.97, |
| "step": 360, |
| "token_acc": 0.833327410355734, |
| "train_speed(iter/s)": 0.129897 |
| }, |
| { |
| "epoch": 33.18604651162791, |
| "grad_norm": 0.848966063311304, |
| "learning_rate": 1.905529828471519e-07, |
| "loss": 0.5887202262878418, |
| "memory(GiB)": 74.97, |
| "step": 365, |
| "token_acc": 0.8398133748055988, |
| "train_speed(iter/s)": 0.129873 |
| }, |
| { |
| "epoch": 33.651162790697676, |
| "grad_norm": 2.144137430723947, |
| "learning_rate": 1.901991230132959e-07, |
| "loss": 0.6359727859497071, |
| "memory(GiB)": 74.97, |
| "step": 370, |
| "token_acc": 0.8069930345126126, |
| "train_speed(iter/s)": 0.129943 |
| }, |
| { |
| "epoch": 34.093023255813954, |
| "grad_norm": 0.7367545693321746, |
| "learning_rate": 1.8983909818919788e-07, |
| "loss": 0.5804174900054931, |
| "memory(GiB)": 74.97, |
| "step": 375, |
| "token_acc": 0.8437245411415153, |
| "train_speed(iter/s)": 0.129967 |
| }, |
| { |
| "epoch": 34.55813953488372, |
| "grad_norm": 0.7507232728161667, |
| "learning_rate": 1.8947293298207635e-07, |
| "loss": 0.5902613639831543, |
| "memory(GiB)": 74.97, |
| "step": 380, |
| "token_acc": 0.8308984660336012, |
| "train_speed(iter/s)": 0.129921 |
| }, |
| { |
| "epoch": 35.0, |
| "grad_norm": 1.3494911901833562, |
| "learning_rate": 1.8910065241883678e-07, |
| "loss": 0.6213099479675293, |
| "memory(GiB)": 74.97, |
| "step": 385, |
| "token_acc": 0.8180765456329735, |
| "train_speed(iter/s)": 0.129994 |
| }, |
| { |
| "epoch": 35.46511627906977, |
| "grad_norm": 0.9327927885382011, |
| "learning_rate": 1.8872228194436116e-07, |
| "loss": 0.61426682472229, |
| "memory(GiB)": 74.97, |
| "step": 390, |
| "token_acc": 0.8016005335111704, |
| "train_speed(iter/s)": 0.130043 |
| }, |
| { |
| "epoch": 35.93023255813954, |
| "grad_norm": 0.8590493021171992, |
| "learning_rate": 1.8833784741976886e-07, |
| "loss": 0.5930656433105469, |
| "memory(GiB)": 74.97, |
| "step": 395, |
| "token_acc": 0.8238509177734666, |
| "train_speed(iter/s)": 0.129952 |
| }, |
| { |
| "epoch": 36.372093023255815, |
| "grad_norm": 0.692718053612059, |
| "learning_rate": 1.8794737512064888e-07, |
| "loss": 0.601491117477417, |
| "memory(GiB)": 74.97, |
| "step": 400, |
| "token_acc": 0.8390804597701149, |
| "train_speed(iter/s)": 0.130015 |
| }, |
| { |
| "epoch": 36.372093023255815, |
| "eval_loss": 0.6246538758277893, |
| "eval_runtime": 0.7289, |
| "eval_samples_per_second": 17.836, |
| "eval_steps_per_second": 2.744, |
| "eval_token_acc": 0.8392566337771817, |
| "step": 400 |
| }, |
| { |
| "epoch": 36.83720930232558, |
| "grad_norm": 0.8580896624897943, |
| "learning_rate": 1.875508917352643e-07, |
| "loss": 0.6003564834594727, |
| "memory(GiB)": 74.97, |
| "step": 405, |
| "token_acc": 0.8357933251629633, |
| "train_speed(iter/s)": 0.129242 |
| }, |
| { |
| "epoch": 37.27906976744186, |
| "grad_norm": 0.9684611433600051, |
| "learning_rate": 1.871484243627277e-07, |
| "loss": 0.6055225372314453, |
| "memory(GiB)": 74.97, |
| "step": 410, |
| "token_acc": 0.8125408092339449, |
| "train_speed(iter/s)": 0.129415 |
| }, |
| { |
| "epoch": 37.74418604651163, |
| "grad_norm": 0.8148508280992611, |
| "learning_rate": 1.867400005111495e-07, |
| "loss": 0.5952893257141113, |
| "memory(GiB)": 74.97, |
| "step": 415, |
| "token_acc": 0.8260123541523678, |
| "train_speed(iter/s)": 0.129433 |
| }, |
| { |
| "epoch": 38.18604651162791, |
| "grad_norm": 0.7992095356192797, |
| "learning_rate": 1.8632564809575738e-07, |
| "loss": 0.6156826496124268, |
| "memory(GiB)": 74.97, |
| "step": 420, |
| "token_acc": 0.8205879974118409, |
| "train_speed(iter/s)": 0.12938 |
| }, |
| { |
| "epoch": 38.651162790697676, |
| "grad_norm": 3.6721651326108775, |
| "learning_rate": 1.859053954369885e-07, |
| "loss": 0.610502815246582, |
| "memory(GiB)": 74.97, |
| "step": 425, |
| "token_acc": 0.8172398589065256, |
| "train_speed(iter/s)": 0.129325 |
| }, |
| { |
| "epoch": 39.093023255813954, |
| "grad_norm": 0.9272484168885702, |
| "learning_rate": 1.854792712585539e-07, |
| "loss": 0.5535663604736328, |
| "memory(GiB)": 74.97, |
| "step": 430, |
| "token_acc": 0.8236255683739807, |
| "train_speed(iter/s)": 0.129482 |
| }, |
| { |
| "epoch": 39.55813953488372, |
| "grad_norm": 0.8018214646006986, |
| "learning_rate": 1.8504730468547506e-07, |
| "loss": 0.5991367340087891, |
| "memory(GiB)": 74.97, |
| "step": 435, |
| "token_acc": 0.8261135086719322, |
| "train_speed(iter/s)": 0.129405 |
| }, |
| { |
| "epoch": 40.0, |
| "grad_norm": 1.2379263967079543, |
| "learning_rate": 1.846095252420935e-07, |
| "loss": 0.585663890838623, |
| "memory(GiB)": 74.97, |
| "step": 440, |
| "token_acc": 0.8266845321477151, |
| "train_speed(iter/s)": 0.129434 |
| }, |
| { |
| "epoch": 40.46511627906977, |
| "grad_norm": 0.833466025772104, |
| "learning_rate": 1.841659628500527e-07, |
| "loss": 0.5750086784362793, |
| "memory(GiB)": 74.97, |
| "step": 445, |
| "token_acc": 0.8343643862202814, |
| "train_speed(iter/s)": 0.129525 |
| }, |
| { |
| "epoch": 40.93023255813954, |
| "grad_norm": 0.7870441769315963, |
| "learning_rate": 1.8371664782625284e-07, |
| "loss": 0.5996095180511475, |
| "memory(GiB)": 74.97, |
| "step": 450, |
| "token_acc": 0.8262060770106785, |
| "train_speed(iter/s)": 0.129426 |
| }, |
| { |
| "epoch": 41.372093023255815, |
| "grad_norm": 0.7270750065258582, |
| "learning_rate": 1.8326161088077904e-07, |
| "loss": 0.5774937629699707, |
| "memory(GiB)": 74.97, |
| "step": 455, |
| "token_acc": 0.8339674588455729, |
| "train_speed(iter/s)": 0.129531 |
| }, |
| { |
| "epoch": 41.83720930232558, |
| "grad_norm": 0.6345626674708744, |
| "learning_rate": 1.82800883114802e-07, |
| "loss": 0.5982451438903809, |
| "memory(GiB)": 74.97, |
| "step": 460, |
| "token_acc": 0.83098393668337, |
| "train_speed(iter/s)": 0.129577 |
| }, |
| { |
| "epoch": 42.27906976744186, |
| "grad_norm": 0.8020609888197409, |
| "learning_rate": 1.8233449601845256e-07, |
| "loss": 0.5845087051391602, |
| "memory(GiB)": 74.97, |
| "step": 465, |
| "token_acc": 0.8175882797882081, |
| "train_speed(iter/s)": 0.129629 |
| }, |
| { |
| "epoch": 42.74418604651163, |
| "grad_norm": 0.8480884031667174, |
| "learning_rate": 1.8186248146866925e-07, |
| "loss": 0.591459846496582, |
| "memory(GiB)": 74.97, |
| "step": 470, |
| "token_acc": 0.8345550327140474, |
| "train_speed(iter/s)": 0.129656 |
| }, |
| { |
| "epoch": 43.18604651162791, |
| "grad_norm": 4.128756169670704, |
| "learning_rate": 1.8138487172701948e-07, |
| "loss": 0.5832277297973633, |
| "memory(GiB)": 74.97, |
| "step": 475, |
| "token_acc": 0.8327794561933535, |
| "train_speed(iter/s)": 0.129649 |
| }, |
| { |
| "epoch": 43.651162790697676, |
| "grad_norm": 0.691292587718326, |
| "learning_rate": 1.8090169943749475e-07, |
| "loss": 0.5771265029907227, |
| "memory(GiB)": 74.97, |
| "step": 480, |
| "token_acc": 0.8235854875792071, |
| "train_speed(iter/s)": 0.129594 |
| }, |
| { |
| "epoch": 44.093023255813954, |
| "grad_norm": 0.9411447489425482, |
| "learning_rate": 1.8041299762427914e-07, |
| "loss": 0.5849340438842774, |
| "memory(GiB)": 74.97, |
| "step": 485, |
| "token_acc": 0.8348119811167182, |
| "train_speed(iter/s)": 0.12971 |
| }, |
| { |
| "epoch": 44.55813953488372, |
| "grad_norm": 0.9906151143939281, |
| "learning_rate": 1.7991879968949247e-07, |
| "loss": 0.6044949531555176, |
| "memory(GiB)": 74.97, |
| "step": 490, |
| "token_acc": 0.8391592252658489, |
| "train_speed(iter/s)": 0.129794 |
| }, |
| { |
| "epoch": 45.0, |
| "grad_norm": 0.6320054379409873, |
| "learning_rate": 1.794191394109071e-07, |
| "loss": 0.5554977893829346, |
| "memory(GiB)": 74.97, |
| "step": 495, |
| "token_acc": 0.8345945945945946, |
| "train_speed(iter/s)": 0.12979 |
| }, |
| { |
| "epoch": 45.46511627906977, |
| "grad_norm": 0.7061286584704719, |
| "learning_rate": 1.7891405093963936e-07, |
| "loss": 0.5755014896392823, |
| "memory(GiB)": 74.97, |
| "step": 500, |
| "token_acc": 0.8254359194017598, |
| "train_speed(iter/s)": 0.129688 |
| }, |
| { |
| "epoch": 45.93023255813954, |
| "grad_norm": 0.7195669164082512, |
| "learning_rate": 1.7840356879781529e-07, |
| "loss": 0.5827363014221192, |
| "memory(GiB)": 74.97, |
| "step": 505, |
| "token_acc": 0.839882368874185, |
| "train_speed(iter/s)": 0.129779 |
| }, |
| { |
| "epoch": 46.372093023255815, |
| "grad_norm": 0.6968950428332337, |
| "learning_rate": 1.7788772787621125e-07, |
| "loss": 0.5568270683288574, |
| "memory(GiB)": 74.97, |
| "step": 510, |
| "token_acc": 0.8614190870002142, |
| "train_speed(iter/s)": 0.129886 |
| }, |
| { |
| "epoch": 46.83720930232558, |
| "grad_norm": 0.7064063028804808, |
| "learning_rate": 1.7736656343186894e-07, |
| "loss": 0.5865127563476562, |
| "memory(GiB)": 74.97, |
| "step": 515, |
| "token_acc": 0.8082950799781602, |
| "train_speed(iter/s)": 0.129852 |
| }, |
| { |
| "epoch": 47.27906976744186, |
| "grad_norm": 0.6403030213655208, |
| "learning_rate": 1.768401110856859e-07, |
| "loss": 0.5599156379699707, |
| "memory(GiB)": 74.97, |
| "step": 520, |
| "token_acc": 0.8375492061100334, |
| "train_speed(iter/s)": 0.129883 |
| }, |
| { |
| "epoch": 47.74418604651163, |
| "grad_norm": 0.9706781013185869, |
| "learning_rate": 1.7630840681998066e-07, |
| "loss": 0.5808145523071289, |
| "memory(GiB)": 74.97, |
| "step": 525, |
| "token_acc": 0.8431429663747747, |
| "train_speed(iter/s)": 0.129855 |
| }, |
| { |
| "epoch": 48.18604651162791, |
| "grad_norm": 0.7377603527859908, |
| "learning_rate": 1.7577148697603348e-07, |
| "loss": 0.5715710639953613, |
| "memory(GiB)": 74.97, |
| "step": 530, |
| "token_acc": 0.8220905089196077, |
| "train_speed(iter/s)": 0.129985 |
| }, |
| { |
| "epoch": 48.651162790697676, |
| "grad_norm": 0.8535997732414037, |
| "learning_rate": 1.7522938825160247e-07, |
| "loss": 0.5609760284423828, |
| "memory(GiB)": 74.97, |
| "step": 535, |
| "token_acc": 0.8485186181454867, |
| "train_speed(iter/s)": 0.12997 |
| }, |
| { |
| "epoch": 49.093023255813954, |
| "grad_norm": 1.6196917405162314, |
| "learning_rate": 1.7468214769841538e-07, |
| "loss": 0.5788634777069092, |
| "memory(GiB)": 74.97, |
| "step": 540, |
| "token_acc": 0.8494809430899153, |
| "train_speed(iter/s)": 0.129998 |
| }, |
| { |
| "epoch": 49.55813953488372, |
| "grad_norm": 0.8074628776305832, |
| "learning_rate": 1.7412980271963708e-07, |
| "loss": 0.5682050704956054, |
| "memory(GiB)": 74.97, |
| "step": 545, |
| "token_acc": 0.8164148196748201, |
| "train_speed(iter/s)": 0.129923 |
| }, |
| { |
| "epoch": 50.0, |
| "grad_norm": 0.9098109454481578, |
| "learning_rate": 1.7357239106731316e-07, |
| "loss": 0.5588317394256592, |
| "memory(GiB)": 74.97, |
| "step": 550, |
| "token_acc": 0.8282426370196996, |
| "train_speed(iter/s)": 0.130049 |
| }, |
| { |
| "epoch": 50.46511627906977, |
| "grad_norm": 0.8717894931304141, |
| "learning_rate": 1.7300995083978961e-07, |
| "loss": 0.560645866394043, |
| "memory(GiB)": 74.97, |
| "step": 555, |
| "token_acc": 0.8580127632625887, |
| "train_speed(iter/s)": 0.129978 |
| }, |
| { |
| "epoch": 50.93023255813954, |
| "grad_norm": 0.901907102378853, |
| "learning_rate": 1.724425204791089e-07, |
| "loss": 0.5699704647064209, |
| "memory(GiB)": 74.97, |
| "step": 560, |
| "token_acc": 0.8169467583456241, |
| "train_speed(iter/s)": 0.129949 |
| }, |
| { |
| "epoch": 51.372093023255815, |
| "grad_norm": 0.6850047743663971, |
| "learning_rate": 1.7187013876838238e-07, |
| "loss": 0.5511385917663574, |
| "memory(GiB)": 74.97, |
| "step": 565, |
| "token_acc": 0.8470804299681305, |
| "train_speed(iter/s)": 0.130045 |
| }, |
| { |
| "epoch": 51.83720930232558, |
| "grad_norm": 0.693108198878134, |
| "learning_rate": 1.712928448291397e-07, |
| "loss": 0.560858964920044, |
| "memory(GiB)": 74.97, |
| "step": 570, |
| "token_acc": 0.8014341226733077, |
| "train_speed(iter/s)": 0.130065 |
| }, |
| { |
| "epoch": 52.27906976744186, |
| "grad_norm": 1.168154430184055, |
| "learning_rate": 1.7071067811865473e-07, |
| "loss": 0.5584731578826905, |
| "memory(GiB)": 74.97, |
| "step": 575, |
| "token_acc": 0.8305429323128438, |
| "train_speed(iter/s)": 0.130047 |
| }, |
| { |
| "epoch": 52.74418604651163, |
| "grad_norm": 0.8940504753420614, |
| "learning_rate": 1.7012367842724884e-07, |
| "loss": 0.5449427127838135, |
| "memory(GiB)": 74.97, |
| "step": 580, |
| "token_acc": 0.8454416804487562, |
| "train_speed(iter/s)": 0.13012 |
| }, |
| { |
| "epoch": 53.18604651162791, |
| "grad_norm": 1.3182438739088296, |
| "learning_rate": 1.695318858755712e-07, |
| "loss": 0.5867147445678711, |
| "memory(GiB)": 74.97, |
| "step": 585, |
| "token_acc": 0.8317076233934776, |
| "train_speed(iter/s)": 0.130155 |
| }, |
| { |
| "epoch": 53.651162790697676, |
| "grad_norm": 5.89431265738365, |
| "learning_rate": 1.6893534091185658e-07, |
| "loss": 0.5429623603820801, |
| "memory(GiB)": 74.97, |
| "step": 590, |
| "token_acc": 0.8596458176337604, |
| "train_speed(iter/s)": 0.130144 |
| }, |
| { |
| "epoch": 54.093023255813954, |
| "grad_norm": 0.8228392742664287, |
| "learning_rate": 1.6833408430916082e-07, |
| "loss": 0.5783446311950684, |
| "memory(GiB)": 74.97, |
| "step": 595, |
| "token_acc": 0.8510537851964256, |
| "train_speed(iter/s)": 0.130222 |
| }, |
| { |
| "epoch": 54.55813953488372, |
| "grad_norm": 0.6782178805084175, |
| "learning_rate": 1.6772815716257412e-07, |
| "loss": 0.5568069458007813, |
| "memory(GiB)": 74.97, |
| "step": 600, |
| "token_acc": 0.8492520719628057, |
| "train_speed(iter/s)": 0.130138 |
| }, |
| { |
| "epoch": 54.55813953488372, |
| "eval_loss": 0.6069812774658203, |
| "eval_runtime": 0.7244, |
| "eval_samples_per_second": 17.946, |
| "eval_steps_per_second": 2.761, |
| "eval_token_acc": 0.8424178561164862, |
| "step": 600 |
| }, |
| { |
| "epoch": 55.0, |
| "grad_norm": 0.9571327352378861, |
| "learning_rate": 1.6711760088641197e-07, |
| "loss": 0.549845027923584, |
| "memory(GiB)": 74.97, |
| "step": 605, |
| "token_acc": 0.8441368444744543, |
| "train_speed(iter/s)": 0.129683 |
| }, |
| { |
| "epoch": 55.46511627906977, |
| "grad_norm": 0.6574337050432097, |
| "learning_rate": 1.665024572113848e-07, |
| "loss": 0.5540960311889649, |
| "memory(GiB)": 74.97, |
| "step": 610, |
| "token_acc": 0.8468528296996988, |
| "train_speed(iter/s)": 0.12963 |
| }, |
| { |
| "epoch": 55.93023255813954, |
| "grad_norm": 1.3701583003213704, |
| "learning_rate": 1.6588276818174578e-07, |
| "loss": 0.5496389389038085, |
| "memory(GiB)": 74.97, |
| "step": 615, |
| "token_acc": 0.8450532311656608, |
| "train_speed(iter/s)": 0.129682 |
| }, |
| { |
| "epoch": 56.372093023255815, |
| "grad_norm": 0.6379537701462664, |
| "learning_rate": 1.6525857615241686e-07, |
| "loss": 0.5491930484771729, |
| "memory(GiB)": 74.97, |
| "step": 620, |
| "token_acc": 0.8525308496423799, |
| "train_speed(iter/s)": 0.129772 |
| }, |
| { |
| "epoch": 56.83720930232558, |
| "grad_norm": 1.0493433605209441, |
| "learning_rate": 1.6462992378609406e-07, |
| "loss": 0.5360322952270508, |
| "memory(GiB)": 74.97, |
| "step": 625, |
| "token_acc": 0.8368756439119319, |
| "train_speed(iter/s)": 0.129772 |
| }, |
| { |
| "epoch": 57.27906976744186, |
| "grad_norm": 1.1362722651257062, |
| "learning_rate": 1.6399685405033166e-07, |
| "loss": 0.5665555000305176, |
| "memory(GiB)": 74.97, |
| "step": 630, |
| "token_acc": 0.8487739334900907, |
| "train_speed(iter/s)": 0.129826 |
| }, |
| { |
| "epoch": 57.74418604651163, |
| "grad_norm": 0.6512954800566325, |
| "learning_rate": 1.6335941021460504e-07, |
| "loss": 0.5384564399719238, |
| "memory(GiB)": 74.97, |
| "step": 635, |
| "token_acc": 0.8314396783289121, |
| "train_speed(iter/s)": 0.129882 |
| }, |
| { |
| "epoch": 58.18604651162791, |
| "grad_norm": 0.6514693162473681, |
| "learning_rate": 1.627176358473537e-07, |
| "loss": 0.5575238227844238, |
| "memory(GiB)": 74.97, |
| "step": 640, |
| "token_acc": 0.8362654193227916, |
| "train_speed(iter/s)": 0.129896 |
| }, |
| { |
| "epoch": 58.651162790697676, |
| "grad_norm": 0.6211369831346565, |
| "learning_rate": 1.6207157481300312e-07, |
| "loss": 0.5277935981750488, |
| "memory(GiB)": 74.97, |
| "step": 645, |
| "token_acc": 0.8476069720412159, |
| "train_speed(iter/s)": 0.129829 |
| }, |
| { |
| "epoch": 59.093023255813954, |
| "grad_norm": 0.93341409437694, |
| "learning_rate": 1.614212712689668e-07, |
| "loss": 0.5535923480987549, |
| "memory(GiB)": 74.97, |
| "step": 650, |
| "token_acc": 0.8373809799159632, |
| "train_speed(iter/s)": 0.129933 |
| }, |
| { |
| "epoch": 59.55813953488372, |
| "grad_norm": 0.7951026197647952, |
| "learning_rate": 1.607667696626281e-07, |
| "loss": 0.5427175045013428, |
| "memory(GiB)": 74.97, |
| "step": 655, |
| "token_acc": 0.845807408479236, |
| "train_speed(iter/s)": 0.129879 |
| }, |
| { |
| "epoch": 60.0, |
| "grad_norm": 0.8112289345971331, |
| "learning_rate": 1.601081147283025e-07, |
| "loss": 0.544118070602417, |
| "memory(GiB)": 74.97, |
| "step": 660, |
| "token_acc": 0.8465872536213518, |
| "train_speed(iter/s)": 0.130007 |
| }, |
| { |
| "epoch": 60.46511627906977, |
| "grad_norm": 0.8973071989809348, |
| "learning_rate": 1.594453514841798e-07, |
| "loss": 0.5551681041717529, |
| "memory(GiB)": 74.97, |
| "step": 665, |
| "token_acc": 0.8406223717409588, |
| "train_speed(iter/s)": 0.129985 |
| }, |
| { |
| "epoch": 60.93023255813954, |
| "grad_norm": 0.6961112129897833, |
| "learning_rate": 1.5877852522924732e-07, |
| "loss": 0.5278561592102051, |
| "memory(GiB)": 74.97, |
| "step": 670, |
| "token_acc": 0.8361272191105745, |
| "train_speed(iter/s)": 0.12996 |
| }, |
| { |
| "epoch": 61.372093023255815, |
| "grad_norm": 0.8454621530526435, |
| "learning_rate": 1.5810768154019382e-07, |
| "loss": 0.5304566383361816, |
| "memory(GiB)": 74.97, |
| "step": 675, |
| "token_acc": 0.8467184191954834, |
| "train_speed(iter/s)": 0.130101 |
| }, |
| { |
| "epoch": 61.83720930232558, |
| "grad_norm": 0.8048317682461219, |
| "learning_rate": 1.5743286626829435e-07, |
| "loss": 0.556386137008667, |
| "memory(GiB)": 74.97, |
| "step": 680, |
| "token_acc": 0.8513160602079739, |
| "train_speed(iter/s)": 0.130049 |
| }, |
| { |
| "epoch": 62.27906976744186, |
| "grad_norm": 1.2555197833096778, |
| "learning_rate": 1.5675412553627636e-07, |
| "loss": 0.5487345695495606, |
| "memory(GiB)": 74.97, |
| "step": 685, |
| "token_acc": 0.8283330021855752, |
| "train_speed(iter/s)": 0.130158 |
| }, |
| { |
| "epoch": 62.74418604651163, |
| "grad_norm": 0.6737924387221673, |
| "learning_rate": 1.5607150573516727e-07, |
| "loss": 0.5273719787597656, |
| "memory(GiB)": 74.97, |
| "step": 690, |
| "token_acc": 0.8344278568974075, |
| "train_speed(iter/s)": 0.130149 |
| }, |
| { |
| "epoch": 63.18604651162791, |
| "grad_norm": 0.6321277650100168, |
| "learning_rate": 1.5538505352112372e-07, |
| "loss": 0.5302412986755372, |
| "memory(GiB)": 74.97, |
| "step": 695, |
| "token_acc": 0.838855421686747, |
| "train_speed(iter/s)": 0.130224 |
| }, |
| { |
| "epoch": 63.651162790697676, |
| "grad_norm": 0.6665444493375449, |
| "learning_rate": 1.546948158122427e-07, |
| "loss": 0.5358945846557617, |
| "memory(GiB)": 74.97, |
| "step": 700, |
| "token_acc": 0.826061751191652, |
| "train_speed(iter/s)": 0.130179 |
| }, |
| { |
| "epoch": 64.09302325581395, |
| "grad_norm": 0.7540141255217923, |
| "learning_rate": 1.540008397853547e-07, |
| "loss": 0.5356395244598389, |
| "memory(GiB)": 74.97, |
| "step": 705, |
| "token_acc": 0.8476590569896634, |
| "train_speed(iter/s)": 0.130248 |
| }, |
| { |
| "epoch": 64.55813953488372, |
| "grad_norm": 0.7630626447995367, |
| "learning_rate": 1.5330317287279937e-07, |
| "loss": 0.5312513828277587, |
| "memory(GiB)": 74.97, |
| "step": 710, |
| "token_acc": 0.8489824739281576, |
| "train_speed(iter/s)": 0.130176 |
| }, |
| { |
| "epoch": 65.0, |
| "grad_norm": 1.2266930256462827, |
| "learning_rate": 1.526018627591834e-07, |
| "loss": 0.5403413295745849, |
| "memory(GiB)": 74.97, |
| "step": 715, |
| "token_acc": 0.8551674468851278, |
| "train_speed(iter/s)": 0.130251 |
| }, |
| { |
| "epoch": 65.46511627906976, |
| "grad_norm": 0.7496283095791967, |
| "learning_rate": 1.5189695737812152e-07, |
| "loss": 0.5270286560058594, |
| "memory(GiB)": 74.97, |
| "step": 720, |
| "token_acc": 0.8398781740525149, |
| "train_speed(iter/s)": 0.130162 |
| }, |
| { |
| "epoch": 65.93023255813954, |
| "grad_norm": 0.8680329950142557, |
| "learning_rate": 1.511885049089601e-07, |
| "loss": 0.5444748878479004, |
| "memory(GiB)": 74.97, |
| "step": 725, |
| "token_acc": 0.8464486183074266, |
| "train_speed(iter/s)": 0.130252 |
| }, |
| { |
| "epoch": 66.37209302325581, |
| "grad_norm": 0.6415609894652046, |
| "learning_rate": 1.5047655377348439e-07, |
| "loss": 0.5128337383270264, |
| "memory(GiB)": 74.97, |
| "step": 730, |
| "token_acc": 0.864701716521094, |
| "train_speed(iter/s)": 0.130315 |
| }, |
| { |
| "epoch": 66.83720930232558, |
| "grad_norm": 0.6939531108133022, |
| "learning_rate": 1.4976115263260874e-07, |
| "loss": 0.5571429252624511, |
| "memory(GiB)": 74.97, |
| "step": 735, |
| "token_acc": 0.8357370669769121, |
| "train_speed(iter/s)": 0.130322 |
| }, |
| { |
| "epoch": 67.27906976744185, |
| "grad_norm": 0.7218530264815206, |
| "learning_rate": 1.4904235038305082e-07, |
| "loss": 0.5194293975830078, |
| "memory(GiB)": 74.97, |
| "step": 740, |
| "token_acc": 0.8460596389007441, |
| "train_speed(iter/s)": 0.130299 |
| }, |
| { |
| "epoch": 67.74418604651163, |
| "grad_norm": 1.285168120381986, |
| "learning_rate": 1.483201961539896e-07, |
| "loss": 0.5455545425415039, |
| "memory(GiB)": 74.97, |
| "step": 745, |
| "token_acc": 0.8313979656134666, |
| "train_speed(iter/s)": 0.130272 |
| }, |
| { |
| "epoch": 68.18604651162791, |
| "grad_norm": 1.94952748533025, |
| "learning_rate": 1.4759473930370737e-07, |
| "loss": 0.5241846084594727, |
| "memory(GiB)": 74.97, |
| "step": 750, |
| "token_acc": 0.8599992655699178, |
| "train_speed(iter/s)": 0.130347 |
| }, |
| { |
| "epoch": 68.65116279069767, |
| "grad_norm": 0.7193543863488733, |
| "learning_rate": 1.4686602941621615e-07, |
| "loss": 0.5322785377502441, |
| "memory(GiB)": 74.97, |
| "step": 755, |
| "token_acc": 0.8371367656348705, |
| "train_speed(iter/s)": 0.130295 |
| }, |
| { |
| "epoch": 69.09302325581395, |
| "grad_norm": 1.0867783614431274, |
| "learning_rate": 1.4613411629786877e-07, |
| "loss": 0.521461296081543, |
| "memory(GiB)": 74.97, |
| "step": 760, |
| "token_acc": 0.8467171046810017, |
| "train_speed(iter/s)": 0.130339 |
| }, |
| { |
| "epoch": 69.55813953488372, |
| "grad_norm": 0.7455956742708548, |
| "learning_rate": 1.4539904997395468e-07, |
| "loss": 0.5118254661560059, |
| "memory(GiB)": 74.97, |
| "step": 765, |
| "token_acc": 0.8578669369898095, |
| "train_speed(iter/s)": 0.13034 |
| }, |
| { |
| "epoch": 70.0, |
| "grad_norm": 0.8528350805883835, |
| "learning_rate": 1.4466088068528067e-07, |
| "loss": 0.5299886703491211, |
| "memory(GiB)": 74.97, |
| "step": 770, |
| "token_acc": 0.8476385063027893, |
| "train_speed(iter/s)": 0.130365 |
| }, |
| { |
| "epoch": 70.46511627906976, |
| "grad_norm": 0.6395748070686201, |
| "learning_rate": 1.4391965888473702e-07, |
| "loss": 0.5287624359130859, |
| "memory(GiB)": 74.97, |
| "step": 775, |
| "token_acc": 0.8381954887218045, |
| "train_speed(iter/s)": 0.130324 |
| }, |
| { |
| "epoch": 70.93023255813954, |
| "grad_norm": 0.842531216333987, |
| "learning_rate": 1.4317543523384928e-07, |
| "loss": 0.5287698745727539, |
| "memory(GiB)": 74.97, |
| "step": 780, |
| "token_acc": 0.8566830651213208, |
| "train_speed(iter/s)": 0.130359 |
| }, |
| { |
| "epoch": 71.37209302325581, |
| "grad_norm": 0.722140572381901, |
| "learning_rate": 1.4242826059931536e-07, |
| "loss": 0.5152388572692871, |
| "memory(GiB)": 74.97, |
| "step": 785, |
| "token_acc": 0.8451972291311229, |
| "train_speed(iter/s)": 0.130398 |
| }, |
| { |
| "epoch": 71.83720930232558, |
| "grad_norm": 1.1033571214972513, |
| "learning_rate": 1.4167818604952903e-07, |
| "loss": 0.5234486579895019, |
| "memory(GiB)": 74.97, |
| "step": 790, |
| "token_acc": 0.8461698837673958, |
| "train_speed(iter/s)": 0.130382 |
| }, |
| { |
| "epoch": 72.27906976744185, |
| "grad_norm": 0.7546592396468452, |
| "learning_rate": 1.4092526285108939e-07, |
| "loss": 0.5231525897979736, |
| "memory(GiB)": 74.97, |
| "step": 795, |
| "token_acc": 0.8471460044061686, |
| "train_speed(iter/s)": 0.130495 |
| }, |
| { |
| "epoch": 72.74418604651163, |
| "grad_norm": 0.7665462491639092, |
| "learning_rate": 1.4016954246529695e-07, |
| "loss": 0.5139668941497803, |
| "memory(GiB)": 74.97, |
| "step": 800, |
| "token_acc": 0.8447760249371035, |
| "train_speed(iter/s)": 0.130432 |
| }, |
| { |
| "epoch": 72.74418604651163, |
| "eval_loss": 0.6015437245368958, |
| "eval_runtime": 0.7284, |
| "eval_samples_per_second": 17.847, |
| "eval_steps_per_second": 2.746, |
| "eval_token_acc": 0.8434715968962544, |
| "step": 800 |
| }, |
| { |
| "epoch": 73.18604651162791, |
| "grad_norm": 0.7272029873141171, |
| "learning_rate": 1.3941107654463616e-07, |
| "loss": 0.5379150390625, |
| "memory(GiB)": 74.97, |
| "step": 805, |
| "token_acc": 0.8408949295116442, |
| "train_speed(iter/s)": 0.130058 |
| }, |
| { |
| "epoch": 73.65116279069767, |
| "grad_norm": 0.7995205555897585, |
| "learning_rate": 1.3864991692924522e-07, |
| "loss": 0.5211355209350585, |
| "memory(GiB)": 74.97, |
| "step": 810, |
| "token_acc": 0.8419526596025093, |
| "train_speed(iter/s)": 0.130097 |
| }, |
| { |
| "epoch": 74.09302325581395, |
| "grad_norm": 0.9263844311361451, |
| "learning_rate": 1.3788611564337276e-07, |
| "loss": 0.5166553497314453, |
| "memory(GiB)": 74.97, |
| "step": 815, |
| "token_acc": 0.8460784079221183, |
| "train_speed(iter/s)": 0.130043 |
| }, |
| { |
| "epoch": 74.55813953488372, |
| "grad_norm": 0.9100848478509656, |
| "learning_rate": 1.3711972489182207e-07, |
| "loss": 0.5152887344360352, |
| "memory(GiB)": 74.97, |
| "step": 820, |
| "token_acc": 0.8641304347826086, |
| "train_speed(iter/s)": 0.129973 |
| }, |
| { |
| "epoch": 75.0, |
| "grad_norm": 0.8520157723565999, |
| "learning_rate": 1.3635079705638297e-07, |
| "loss": 0.5118432998657226, |
| "memory(GiB)": 74.97, |
| "step": 825, |
| "token_acc": 0.8406333086780081, |
| "train_speed(iter/s)": 0.130038 |
| }, |
| { |
| "epoch": 75.46511627906976, |
| "grad_norm": 1.89559334384708, |
| "learning_rate": 1.3557938469225164e-07, |
| "loss": 0.5238603591918946, |
| "memory(GiB)": 74.97, |
| "step": 830, |
| "token_acc": 0.8296420958151015, |
| "train_speed(iter/s)": 0.129953 |
| }, |
| { |
| "epoch": 75.93023255813954, |
| "grad_norm": 0.8445066662231647, |
| "learning_rate": 1.3480554052443843e-07, |
| "loss": 0.5140830516815186, |
| "memory(GiB)": 74.97, |
| "step": 835, |
| "token_acc": 0.8494189687565236, |
| "train_speed(iter/s)": 0.13002 |
| }, |
| { |
| "epoch": 76.37209302325581, |
| "grad_norm": 1.5322849456525907, |
| "learning_rate": 1.340293174441643e-07, |
| "loss": 0.5148379325866699, |
| "memory(GiB)": 74.97, |
| "step": 840, |
| "token_acc": 0.8386292834890966, |
| "train_speed(iter/s)": 0.13 |
| }, |
| { |
| "epoch": 76.83720930232558, |
| "grad_norm": 0.7284489005308602, |
| "learning_rate": 1.332507685052457e-07, |
| "loss": 0.5148776531219482, |
| "memory(GiB)": 74.97, |
| "step": 845, |
| "token_acc": 0.8438160869248159, |
| "train_speed(iter/s)": 0.130024 |
| }, |
| { |
| "epoch": 77.27906976744185, |
| "grad_norm": 0.8254251521761937, |
| "learning_rate": 1.3246994692046836e-07, |
| "loss": 0.5172486305236816, |
| "memory(GiB)": 74.97, |
| "step": 850, |
| "token_acc": 0.8467165799851403, |
| "train_speed(iter/s)": 0.130065 |
| }, |
| { |
| "epoch": 77.74418604651163, |
| "grad_norm": 0.888794754410688, |
| "learning_rate": 1.3168690605795043e-07, |
| "loss": 0.515445613861084, |
| "memory(GiB)": 74.97, |
| "step": 855, |
| "token_acc": 0.8480349170918368, |
| "train_speed(iter/s)": 0.130098 |
| }, |
| { |
| "epoch": 78.18604651162791, |
| "grad_norm": 0.8024083233168969, |
| "learning_rate": 1.3090169943749475e-07, |
| "loss": 0.5077299118041992, |
| "memory(GiB)": 74.97, |
| "step": 860, |
| "token_acc": 0.8461068818804495, |
| "train_speed(iter/s)": 0.130157 |
| }, |
| { |
| "epoch": 78.65116279069767, |
| "grad_norm": 0.7968691650808981, |
| "learning_rate": 1.3011438072693074e-07, |
| "loss": 0.5154001235961914, |
| "memory(GiB)": 74.97, |
| "step": 865, |
| "token_acc": 0.8603395311236863, |
| "train_speed(iter/s)": 0.130118 |
| }, |
| { |
| "epoch": 79.09302325581395, |
| "grad_norm": 1.4489088486628856, |
| "learning_rate": 1.2932500373844649e-07, |
| "loss": 0.5220766544342041, |
| "memory(GiB)": 74.97, |
| "step": 870, |
| "token_acc": 0.8575108409621586, |
| "train_speed(iter/s)": 0.130187 |
| }, |
| { |
| "epoch": 79.55813953488372, |
| "grad_norm": 0.833164944608322, |
| "learning_rate": 1.2853362242491051e-07, |
| "loss": 0.5146864414215088, |
| "memory(GiB)": 74.97, |
| "step": 875, |
| "token_acc": 0.8354072612769832, |
| "train_speed(iter/s)": 0.130231 |
| }, |
| { |
| "epoch": 80.0, |
| "grad_norm": 1.0334544104049193, |
| "learning_rate": 1.2774029087618446e-07, |
| "loss": 0.5196131706237793, |
| "memory(GiB)": 74.97, |
| "step": 880, |
| "token_acc": 0.8273188610093036, |
| "train_speed(iter/s)": 0.130243 |
| }, |
| { |
| "epoch": 80.46511627906976, |
| "grad_norm": 0.7327428116602168, |
| "learning_rate": 1.2694506331542577e-07, |
| "loss": 0.5012516975402832, |
| "memory(GiB)": 74.97, |
| "step": 885, |
| "token_acc": 0.8552629297640307, |
| "train_speed(iter/s)": 0.130266 |
| }, |
| { |
| "epoch": 80.93023255813954, |
| "grad_norm": 0.7823436928202996, |
| "learning_rate": 1.2614799409538198e-07, |
| "loss": 0.5132665634155273, |
| "memory(GiB)": 74.97, |
| "step": 890, |
| "token_acc": 0.8614560088497263, |
| "train_speed(iter/s)": 0.130236 |
| }, |
| { |
| "epoch": 81.37209302325581, |
| "grad_norm": 0.8496813139641767, |
| "learning_rate": 1.253491376946754e-07, |
| "loss": 0.5047847747802734, |
| "memory(GiB)": 74.97, |
| "step": 895, |
| "token_acc": 0.8672797358731915, |
| "train_speed(iter/s)": 0.130316 |
| }, |
| { |
| "epoch": 81.83720930232558, |
| "grad_norm": 0.7662540093111049, |
| "learning_rate": 1.2454854871407992e-07, |
| "loss": 0.5070115566253662, |
| "memory(GiB)": 74.97, |
| "step": 900, |
| "token_acc": 0.8437890633276128, |
| "train_speed(iter/s)": 0.130345 |
| }, |
| { |
| "epoch": 82.27906976744185, |
| "grad_norm": 1.1403186852474703, |
| "learning_rate": 1.2374628187278885e-07, |
| "loss": 0.5135304450988769, |
| "memory(GiB)": 74.97, |
| "step": 905, |
| "token_acc": 0.8760885832099473, |
| "train_speed(iter/s)": 0.130362 |
| }, |
| { |
| "epoch": 82.74418604651163, |
| "grad_norm": 0.6850775896882327, |
| "learning_rate": 1.2294239200467515e-07, |
| "loss": 0.48610854148864746, |
| "memory(GiB)": 74.97, |
| "step": 910, |
| "token_acc": 0.864081524616199, |
| "train_speed(iter/s)": 0.130315 |
| }, |
| { |
| "epoch": 83.18604651162791, |
| "grad_norm": 1.7277139603374756, |
| "learning_rate": 1.2213693405454345e-07, |
| "loss": 0.5195373058319092, |
| "memory(GiB)": 74.97, |
| "step": 915, |
| "token_acc": 0.842862242005585, |
| "train_speed(iter/s)": 0.130334 |
| }, |
| { |
| "epoch": 83.65116279069767, |
| "grad_norm": 1.562225291111122, |
| "learning_rate": 1.213299630743747e-07, |
| "loss": 0.5000184059143067, |
| "memory(GiB)": 74.97, |
| "step": 920, |
| "token_acc": 0.8502656832421286, |
| "train_speed(iter/s)": 0.130352 |
| }, |
| { |
| "epoch": 84.09302325581395, |
| "grad_norm": 0.7432167354378622, |
| "learning_rate": 1.205215342195634e-07, |
| "loss": 0.4978955745697021, |
| "memory(GiB)": 74.97, |
| "step": 925, |
| "token_acc": 0.8535459925769887, |
| "train_speed(iter/s)": 0.130407 |
| }, |
| { |
| "epoch": 84.55813953488372, |
| "grad_norm": 2.2667269366172267, |
| "learning_rate": 1.1971170274514802e-07, |
| "loss": 0.5232599258422852, |
| "memory(GiB)": 74.97, |
| "step": 930, |
| "token_acc": 0.8631094983089064, |
| "train_speed(iter/s)": 0.130392 |
| }, |
| { |
| "epoch": 85.0, |
| "grad_norm": 0.7640021499203492, |
| "learning_rate": 1.1890052400203402e-07, |
| "loss": 0.48494710922241213, |
| "memory(GiB)": 74.97, |
| "step": 935, |
| "token_acc": 0.8383060054320491, |
| "train_speed(iter/s)": 0.130461 |
| }, |
| { |
| "epoch": 85.46511627906976, |
| "grad_norm": 0.797825246843515, |
| "learning_rate": 1.18088053433211e-07, |
| "loss": 0.4894867897033691, |
| "memory(GiB)": 74.97, |
| "step": 940, |
| "token_acc": 0.862217698107348, |
| "train_speed(iter/s)": 0.130536 |
| }, |
| { |
| "epoch": 85.93023255813954, |
| "grad_norm": 1.118805326320862, |
| "learning_rate": 1.1727434656996305e-07, |
| "loss": 0.5085083961486816, |
| "memory(GiB)": 74.97, |
| "step": 945, |
| "token_acc": 0.8468460041903622, |
| "train_speed(iter/s)": 0.130472 |
| }, |
| { |
| "epoch": 86.37209302325581, |
| "grad_norm": 0.8642381524493187, |
| "learning_rate": 1.1645945902807339e-07, |
| "loss": 0.501039457321167, |
| "memory(GiB)": 74.97, |
| "step": 950, |
| "token_acc": 0.8637289013917678, |
| "train_speed(iter/s)": 0.130524 |
| }, |
| { |
| "epoch": 86.83720930232558, |
| "grad_norm": 0.876594093463965, |
| "learning_rate": 1.1564344650402309e-07, |
| "loss": 0.5047001838684082, |
| "memory(GiB)": 74.97, |
| "step": 955, |
| "token_acc": 0.8469405442884382, |
| "train_speed(iter/s)": 0.130517 |
| }, |
| { |
| "epoch": 87.27906976744185, |
| "grad_norm": 1.2339377952227535, |
| "learning_rate": 1.1482636477118419e-07, |
| "loss": 0.5183281898498535, |
| "memory(GiB)": 74.97, |
| "step": 960, |
| "token_acc": 0.848177734504658, |
| "train_speed(iter/s)": 0.130587 |
| }, |
| { |
| "epoch": 87.74418604651163, |
| "grad_norm": 0.631851683029857, |
| "learning_rate": 1.1400826967600779e-07, |
| "loss": 0.483397912979126, |
| "memory(GiB)": 74.97, |
| "step": 965, |
| "token_acc": 0.8719364241861677, |
| "train_speed(iter/s)": 0.130556 |
| }, |
| { |
| "epoch": 88.18604651162791, |
| "grad_norm": 1.0951446409255636, |
| "learning_rate": 1.131892171342069e-07, |
| "loss": 0.5028903007507324, |
| "memory(GiB)": 74.97, |
| "step": 970, |
| "token_acc": 0.8738672544697527, |
| "train_speed(iter/s)": 0.130594 |
| }, |
| { |
| "epoch": 88.65116279069767, |
| "grad_norm": 0.7683275760751048, |
| "learning_rate": 1.1236926312693478e-07, |
| "loss": 0.4880162239074707, |
| "memory(GiB)": 74.97, |
| "step": 975, |
| "token_acc": 0.8594904599095622, |
| "train_speed(iter/s)": 0.130573 |
| }, |
| { |
| "epoch": 89.09302325581395, |
| "grad_norm": 6.943858471099767, |
| "learning_rate": 1.1154846369695863e-07, |
| "loss": 0.5035033226013184, |
| "memory(GiB)": 74.97, |
| "step": 980, |
| "token_acc": 0.8718237375361853, |
| "train_speed(iter/s)": 0.130588 |
| }, |
| { |
| "epoch": 89.55813953488372, |
| "grad_norm": 0.722153826562248, |
| "learning_rate": 1.1072687494482918e-07, |
| "loss": 0.5015533447265625, |
| "memory(GiB)": 74.97, |
| "step": 985, |
| "token_acc": 0.8497986934062595, |
| "train_speed(iter/s)": 0.130571 |
| }, |
| { |
| "epoch": 90.0, |
| "grad_norm": 0.7791081924406386, |
| "learning_rate": 1.0990455302504628e-07, |
| "loss": 0.4978206157684326, |
| "memory(GiB)": 74.97, |
| "step": 990, |
| "token_acc": 0.8598159926863901, |
| "train_speed(iter/s)": 0.130602 |
| }, |
| { |
| "epoch": 90.46511627906976, |
| "grad_norm": 0.7993364463951824, |
| "learning_rate": 1.0908155414222082e-07, |
| "loss": 0.47749814987182615, |
| "memory(GiB)": 74.97, |
| "step": 995, |
| "token_acc": 0.8648952240771585, |
| "train_speed(iter/s)": 0.13055 |
| }, |
| { |
| "epoch": 90.93023255813954, |
| "grad_norm": 0.9293826361291836, |
| "learning_rate": 1.0825793454723325e-07, |
| "loss": 0.4996511936187744, |
| "memory(GiB)": 74.97, |
| "step": 1000, |
| "token_acc": 0.8631259732808786, |
| "train_speed(iter/s)": 0.13058 |
| }, |
| { |
| "epoch": 90.93023255813954, |
| "eval_loss": 0.602931022644043, |
| "eval_runtime": 0.716, |
| "eval_samples_per_second": 18.155, |
| "eval_steps_per_second": 2.793, |
| "eval_token_acc": 0.8428010345818565, |
| "step": 1000 |
| }, |
| { |
| "epoch": 91.37209302325581, |
| "grad_norm": 1.1841722413103843, |
| "learning_rate": 1.0743375053338877e-07, |
| "loss": 0.5005837440490722, |
| "memory(GiB)": 74.97, |
| "step": 1005, |
| "token_acc": 0.8554249955862447, |
| "train_speed(iter/s)": 0.130297 |
| }, |
| { |
| "epoch": 91.83720930232558, |
| "grad_norm": 1.4009925352920263, |
| "learning_rate": 1.0660905843256993e-07, |
| "loss": 0.504381799697876, |
| "memory(GiB)": 74.97, |
| "step": 1010, |
| "token_acc": 0.8340634861704103, |
| "train_speed(iter/s)": 0.130275 |
| }, |
| { |
| "epoch": 92.27906976744185, |
| "grad_norm": 0.7236486242500604, |
| "learning_rate": 1.057839146113864e-07, |
| "loss": 0.4767627716064453, |
| "memory(GiB)": 74.97, |
| "step": 1015, |
| "token_acc": 0.8686680165507527, |
| "train_speed(iter/s)": 0.130259 |
| }, |
| { |
| "epoch": 92.74418604651163, |
| "grad_norm": 1.436377509073585, |
| "learning_rate": 1.0495837546732223e-07, |
| "loss": 0.5024114131927491, |
| "memory(GiB)": 74.97, |
| "step": 1020, |
| "token_acc": 0.8457889431344258, |
| "train_speed(iter/s)": 0.1303 |
| }, |
| { |
| "epoch": 93.18604651162791, |
| "grad_norm": 0.9279689257580228, |
| "learning_rate": 1.0413249742488131e-07, |
| "loss": 0.48839874267578126, |
| "memory(GiB)": 74.97, |
| "step": 1025, |
| "token_acc": 0.8741351653515239, |
| "train_speed(iter/s)": 0.130339 |
| }, |
| { |
| "epoch": 93.65116279069767, |
| "grad_norm": 1.5611563038818324, |
| "learning_rate": 1.033063369317308e-07, |
| "loss": 0.48693456649780276, |
| "memory(GiB)": 74.97, |
| "step": 1030, |
| "token_acc": 0.8771067535162163, |
| "train_speed(iter/s)": 0.130324 |
| }, |
| { |
| "epoch": 94.09302325581395, |
| "grad_norm": 1.1172420689296867, |
| "learning_rate": 1.0247995045484301e-07, |
| "loss": 0.5037758350372314, |
| "memory(GiB)": 74.97, |
| "step": 1035, |
| "token_acc": 0.8510888627433569, |
| "train_speed(iter/s)": 0.130325 |
| }, |
| { |
| "epoch": 94.55813953488372, |
| "grad_norm": 0.8609018320733309, |
| "learning_rate": 1.0165339447663586e-07, |
| "loss": 0.4941869258880615, |
| "memory(GiB)": 74.97, |
| "step": 1040, |
| "token_acc": 0.8680718468508801, |
| "train_speed(iter/s)": 0.130309 |
| }, |
| { |
| "epoch": 95.0, |
| "grad_norm": 1.9807352700715366, |
| "learning_rate": 1.0082672549111248e-07, |
| "loss": 0.4907430648803711, |
| "memory(GiB)": 74.97, |
| "step": 1045, |
| "token_acc": 0.8594207248443011, |
| "train_speed(iter/s)": 0.130342 |
| }, |
| { |
| "epoch": 95.46511627906976, |
| "grad_norm": 1.2132659915520214, |
| "learning_rate": 1e-07, |
| "loss": 0.5144547462463379, |
| "memory(GiB)": 74.97, |
| "step": 1050, |
| "token_acc": 0.8396730861192019, |
| "train_speed(iter/s)": 0.130346 |
| }, |
| { |
| "epoch": 95.93023255813954, |
| "grad_norm": 1.6515430345069437, |
| "learning_rate": 9.917327450888751e-08, |
| "loss": 0.46764235496520995, |
| "memory(GiB)": 74.97, |
| "step": 1055, |
| "token_acc": 0.8370761686275335, |
| "train_speed(iter/s)": 0.130367 |
| }, |
| { |
| "epoch": 96.37209302325581, |
| "grad_norm": 0.7140536621397322, |
| "learning_rate": 9.834660552336415e-08, |
| "loss": 0.48370823860168455, |
| "memory(GiB)": 74.97, |
| "step": 1060, |
| "token_acc": 0.8530308955807587, |
| "train_speed(iter/s)": 0.13034 |
| }, |
| { |
| "epoch": 96.83720930232558, |
| "grad_norm": 1.0809702853567489, |
| "learning_rate": 9.752004954515699e-08, |
| "loss": 0.49426803588867185, |
| "memory(GiB)": 74.97, |
| "step": 1065, |
| "token_acc": 0.8571793110216901, |
| "train_speed(iter/s)": 0.130299 |
| }, |
| { |
| "epoch": 97.27906976744185, |
| "grad_norm": 0.7163522482069422, |
| "learning_rate": 9.669366306826918e-08, |
| "loss": 0.4718944072723389, |
| "memory(GiB)": 74.97, |
| "step": 1070, |
| "token_acc": 0.8619141314767166, |
| "train_speed(iter/s)": 0.130378 |
| }, |
| { |
| "epoch": 97.74418604651163, |
| "grad_norm": 0.83946396188462, |
| "learning_rate": 9.586750257511866e-08, |
| "loss": 0.4911818504333496, |
| "memory(GiB)": 74.97, |
| "step": 1075, |
| "token_acc": 0.8650800071189347, |
| "train_speed(iter/s)": 0.1303 |
| }, |
| { |
| "epoch": 98.18604651162791, |
| "grad_norm": 0.9728064150742605, |
| "learning_rate": 9.504162453267775e-08, |
| "loss": 0.4725058078765869, |
| "memory(GiB)": 74.97, |
| "step": 1080, |
| "token_acc": 0.876843910806175, |
| "train_speed(iter/s)": 0.130339 |
| }, |
| { |
| "epoch": 98.65116279069767, |
| "grad_norm": 0.7909880351612323, |
| "learning_rate": 9.421608538861361e-08, |
| "loss": 0.4865569114685059, |
| "memory(GiB)": 74.97, |
| "step": 1085, |
| "token_acc": 0.8610528723363702, |
| "train_speed(iter/s)": 0.130299 |
| }, |
| { |
| "epoch": 99.09302325581395, |
| "grad_norm": 0.8239796766786783, |
| "learning_rate": 9.339094156743006e-08, |
| "loss": 0.49038195610046387, |
| "memory(GiB)": 74.97, |
| "step": 1090, |
| "token_acc": 0.8451910122126125, |
| "train_speed(iter/s)": 0.130321 |
| }, |
| { |
| "epoch": 99.55813953488372, |
| "grad_norm": 0.714832653552484, |
| "learning_rate": 9.256624946661125e-08, |
| "loss": 0.47361068725585936, |
| "memory(GiB)": 74.97, |
| "step": 1095, |
| "token_acc": 0.8569815516103255, |
| "train_speed(iter/s)": 0.13029 |
| }, |
| { |
| "epoch": 100.0, |
| "grad_norm": 1.1976610090490132, |
| "learning_rate": 9.174206545276677e-08, |
| "loss": 0.49490890502929685, |
| "memory(GiB)": 74.97, |
| "step": 1100, |
| "token_acc": 0.8424860734638123, |
| "train_speed(iter/s)": 0.130347 |
| }, |
| { |
| "epoch": 100.46511627906976, |
| "grad_norm": 0.8008632586934444, |
| "learning_rate": 9.091844585777917e-08, |
| "loss": 0.4697834014892578, |
| "memory(GiB)": 74.97, |
| "step": 1105, |
| "token_acc": 0.8580395195660596, |
| "train_speed(iter/s)": 0.130358 |
| }, |
| { |
| "epoch": 100.93023255813954, |
| "grad_norm": 0.6845439357302979, |
| "learning_rate": 9.009544697495372e-08, |
| "loss": 0.48686370849609373, |
| "memory(GiB)": 74.97, |
| "step": 1110, |
| "token_acc": 0.8574517231821122, |
| "train_speed(iter/s)": 0.13037 |
| }, |
| { |
| "epoch": 101.37209302325581, |
| "grad_norm": 0.7958237623480675, |
| "learning_rate": 8.927312505517084e-08, |
| "loss": 0.4824103832244873, |
| "memory(GiB)": 74.97, |
| "step": 1115, |
| "token_acc": 0.8406652121643884, |
| "train_speed(iter/s)": 0.130425 |
| }, |
| { |
| "epoch": 101.83720930232558, |
| "grad_norm": 0.7562640332446442, |
| "learning_rate": 8.845153630304139e-08, |
| "loss": 0.4883410453796387, |
| "memory(GiB)": 74.97, |
| "step": 1120, |
| "token_acc": 0.8654994502241394, |
| "train_speed(iter/s)": 0.130404 |
| }, |
| { |
| "epoch": 102.27906976744185, |
| "grad_norm": 0.91816981994612, |
| "learning_rate": 8.763073687306523e-08, |
| "loss": 0.47723941802978515, |
| "memory(GiB)": 74.97, |
| "step": 1125, |
| "token_acc": 0.8617350394493566, |
| "train_speed(iter/s)": 0.13041 |
| }, |
| { |
| "epoch": 102.74418604651163, |
| "grad_norm": 1.0207292255363964, |
| "learning_rate": 8.68107828657931e-08, |
| "loss": 0.48489856719970703, |
| "memory(GiB)": 74.97, |
| "step": 1130, |
| "token_acc": 0.8609944029573764, |
| "train_speed(iter/s)": 0.130389 |
| }, |
| { |
| "epoch": 103.18604651162791, |
| "grad_norm": 1.0123784499736115, |
| "learning_rate": 8.59917303239922e-08, |
| "loss": 0.4814739227294922, |
| "memory(GiB)": 74.97, |
| "step": 1135, |
| "token_acc": 0.8705958429561201, |
| "train_speed(iter/s)": 0.130431 |
| }, |
| { |
| "epoch": 103.65116279069767, |
| "grad_norm": 0.7408077875426933, |
| "learning_rate": 8.517363522881579e-08, |
| "loss": 0.47219066619873046, |
| "memory(GiB)": 74.97, |
| "step": 1140, |
| "token_acc": 0.8524354155002799, |
| "train_speed(iter/s)": 0.130432 |
| }, |
| { |
| "epoch": 104.09302325581395, |
| "grad_norm": 0.7314596110135979, |
| "learning_rate": 8.435655349597689e-08, |
| "loss": 0.4839695930480957, |
| "memory(GiB)": 74.97, |
| "step": 1145, |
| "token_acc": 0.8638605778320128, |
| "train_speed(iter/s)": 0.130438 |
| }, |
| { |
| "epoch": 104.55813953488372, |
| "grad_norm": 0.8022214373595549, |
| "learning_rate": 8.354054097192658e-08, |
| "loss": 0.4761360168457031, |
| "memory(GiB)": 74.97, |
| "step": 1150, |
| "token_acc": 0.8594843717513341, |
| "train_speed(iter/s)": 0.130347 |
| }, |
| { |
| "epoch": 105.0, |
| "grad_norm": 0.9319907606891521, |
| "learning_rate": 8.2725653430037e-08, |
| "loss": 0.4859612941741943, |
| "memory(GiB)": 74.97, |
| "step": 1155, |
| "token_acc": 0.8549445575922154, |
| "train_speed(iter/s)": 0.130405 |
| }, |
| { |
| "epoch": 105.46511627906976, |
| "grad_norm": 1.2930176911390905, |
| "learning_rate": 8.191194656678904e-08, |
| "loss": 0.4661128044128418, |
| "memory(GiB)": 74.97, |
| "step": 1160, |
| "token_acc": 0.8626862925482981, |
| "train_speed(iter/s)": 0.130393 |
| }, |
| { |
| "epoch": 105.93023255813954, |
| "grad_norm": 0.9575779480555059, |
| "learning_rate": 8.109947599796598e-08, |
| "loss": 0.484060001373291, |
| "memory(GiB)": 74.97, |
| "step": 1165, |
| "token_acc": 0.8556487381611823, |
| "train_speed(iter/s)": 0.130404 |
| }, |
| { |
| "epoch": 106.37209302325581, |
| "grad_norm": 0.7486234774787734, |
| "learning_rate": 8.028829725485198e-08, |
| "loss": 0.4818765640258789, |
| "memory(GiB)": 74.97, |
| "step": 1170, |
| "token_acc": 0.8624224886316659, |
| "train_speed(iter/s)": 0.130438 |
| }, |
| { |
| "epoch": 106.83720930232558, |
| "grad_norm": 0.7280471700597845, |
| "learning_rate": 7.947846578043658e-08, |
| "loss": 0.48406553268432617, |
| "memory(GiB)": 74.97, |
| "step": 1175, |
| "token_acc": 0.8612848675893546, |
| "train_speed(iter/s)": 0.130403 |
| }, |
| { |
| "epoch": 107.27906976744185, |
| "grad_norm": 0.9703016724934369, |
| "learning_rate": 7.867003692562532e-08, |
| "loss": 0.46012191772460936, |
| "memory(GiB)": 74.97, |
| "step": 1180, |
| "token_acc": 0.8720765414599575, |
| "train_speed(iter/s)": 0.13046 |
| }, |
| { |
| "epoch": 107.74418604651163, |
| "grad_norm": 1.7207486244429357, |
| "learning_rate": 7.786306594545656e-08, |
| "loss": 0.47897043228149416, |
| "memory(GiB)": 74.97, |
| "step": 1185, |
| "token_acc": 0.8613559838243008, |
| "train_speed(iter/s)": 0.130449 |
| }, |
| { |
| "epoch": 108.18604651162791, |
| "grad_norm": 1.0944806454073215, |
| "learning_rate": 7.705760799532485e-08, |
| "loss": 0.48472142219543457, |
| "memory(GiB)": 74.97, |
| "step": 1190, |
| "token_acc": 0.8510737233682787, |
| "train_speed(iter/s)": 0.130447 |
| }, |
| { |
| "epoch": 108.65116279069767, |
| "grad_norm": 0.7340918962562681, |
| "learning_rate": 7.625371812721114e-08, |
| "loss": 0.46958436965942385, |
| "memory(GiB)": 74.97, |
| "step": 1195, |
| "token_acc": 0.8719202394209354, |
| "train_speed(iter/s)": 0.130463 |
| }, |
| { |
| "epoch": 109.09302325581395, |
| "grad_norm": 0.939464587476609, |
| "learning_rate": 7.545145128592009e-08, |
| "loss": 0.47149295806884767, |
| "memory(GiB)": 74.97, |
| "step": 1200, |
| "token_acc": 0.8800350262697023, |
| "train_speed(iter/s)": 0.130453 |
| }, |
| { |
| "epoch": 109.09302325581395, |
| "eval_loss": 0.6058527827262878, |
| "eval_runtime": 0.7066, |
| "eval_samples_per_second": 18.397, |
| "eval_steps_per_second": 2.83, |
| "eval_token_acc": 0.8434715968962544, |
| "step": 1200 |
| }, |
| { |
| "epoch": 109.55813953488372, |
| "grad_norm": 0.8652359563773929, |
| "learning_rate": 7.465086230532459e-08, |
| "loss": 0.476532506942749, |
| "memory(GiB)": 74.97, |
| "step": 1205, |
| "token_acc": 0.8694151027245068, |
| "train_speed(iter/s)": 0.130187 |
| }, |
| { |
| "epoch": 110.0, |
| "grad_norm": 0.8098360520222708, |
| "learning_rate": 7.385200590461802e-08, |
| "loss": 0.4804817199707031, |
| "memory(GiB)": 74.97, |
| "step": 1210, |
| "token_acc": 0.8504993058976311, |
| "train_speed(iter/s)": 0.130211 |
| }, |
| { |
| "epoch": 110.46511627906976, |
| "grad_norm": 0.7864179053648999, |
| "learning_rate": 7.305493668457419e-08, |
| "loss": 0.46163101196289064, |
| "memory(GiB)": 74.97, |
| "step": 1215, |
| "token_acc": 0.8520807581376184, |
| "train_speed(iter/s)": 0.130209 |
| }, |
| { |
| "epoch": 110.93023255813954, |
| "grad_norm": 1.2076707405286862, |
| "learning_rate": 7.225970912381556e-08, |
| "loss": 0.4753293991088867, |
| "memory(GiB)": 74.97, |
| "step": 1220, |
| "token_acc": 0.8554707472061939, |
| "train_speed(iter/s)": 0.1302 |
| }, |
| { |
| "epoch": 111.37209302325581, |
| "grad_norm": 0.871709312109685, |
| "learning_rate": 7.146637757508949e-08, |
| "loss": 0.47620530128479005, |
| "memory(GiB)": 74.97, |
| "step": 1225, |
| "token_acc": 0.8760574752720532, |
| "train_speed(iter/s)": 0.13021 |
| }, |
| { |
| "epoch": 111.83720930232558, |
| "grad_norm": 0.7334760311164147, |
| "learning_rate": 7.067499626155353e-08, |
| "loss": 0.46177024841308595, |
| "memory(GiB)": 74.97, |
| "step": 1230, |
| "token_acc": 0.8513760840189522, |
| "train_speed(iter/s)": 0.130203 |
| }, |
| { |
| "epoch": 112.27906976744185, |
| "grad_norm": 0.9429213919362676, |
| "learning_rate": 6.988561927306926e-08, |
| "loss": 0.4705217361450195, |
| "memory(GiB)": 74.97, |
| "step": 1235, |
| "token_acc": 0.8782852564102565, |
| "train_speed(iter/s)": 0.130244 |
| }, |
| { |
| "epoch": 112.74418604651163, |
| "grad_norm": 1.0006229504211153, |
| "learning_rate": 6.909830056250527e-08, |
| "loss": 0.46991333961486814, |
| "memory(GiB)": 74.97, |
| "step": 1240, |
| "token_acc": 0.8570367690462136, |
| "train_speed(iter/s)": 0.130239 |
| }, |
| { |
| "epoch": 113.18604651162791, |
| "grad_norm": 1.5600658321413452, |
| "learning_rate": 6.831309394204956e-08, |
| "loss": 0.5063477039337159, |
| "memory(GiB)": 74.97, |
| "step": 1245, |
| "token_acc": 0.8328871703351179, |
| "train_speed(iter/s)": 0.130265 |
| }, |
| { |
| "epoch": 113.65116279069767, |
| "grad_norm": 0.7100324996989047, |
| "learning_rate": 6.753005307953166e-08, |
| "loss": 0.4718203544616699, |
| "memory(GiB)": 74.97, |
| "step": 1250, |
| "token_acc": 0.846406587098945, |
| "train_speed(iter/s)": 0.130269 |
| }, |
| { |
| "epoch": 114.09302325581395, |
| "grad_norm": 1.02085122390004, |
| "learning_rate": 6.674923149475432e-08, |
| "loss": 0.46040911674499513, |
| "memory(GiB)": 74.97, |
| "step": 1255, |
| "token_acc": 0.8600188738597043, |
| "train_speed(iter/s)": 0.130273 |
| }, |
| { |
| "epoch": 114.55813953488372, |
| "grad_norm": 0.7602372463858895, |
| "learning_rate": 6.597068255583569e-08, |
| "loss": 0.4706200122833252, |
| "memory(GiB)": 74.97, |
| "step": 1260, |
| "token_acc": 0.850320256204964, |
| "train_speed(iter/s)": 0.130273 |
| }, |
| { |
| "epoch": 115.0, |
| "grad_norm": 0.880014706373256, |
| "learning_rate": 6.519445947556154e-08, |
| "loss": 0.4695608139038086, |
| "memory(GiB)": 74.97, |
| "step": 1265, |
| "token_acc": 0.8624032731477363, |
| "train_speed(iter/s)": 0.1303 |
| }, |
| { |
| "epoch": 115.46511627906976, |
| "grad_norm": 1.2127086778344998, |
| "learning_rate": 6.442061530774834e-08, |
| "loss": 0.47931528091430664, |
| "memory(GiB)": 74.97, |
| "step": 1270, |
| "token_acc": 0.844140842826416, |
| "train_speed(iter/s)": 0.130299 |
| }, |
| { |
| "epoch": 115.93023255813954, |
| "grad_norm": 1.083099670256692, |
| "learning_rate": 6.3649202943617e-08, |
| "loss": 0.4720285415649414, |
| "memory(GiB)": 74.97, |
| "step": 1275, |
| "token_acc": 0.8607366273040511, |
| "train_speed(iter/s)": 0.130282 |
| }, |
| { |
| "epoch": 116.37209302325581, |
| "grad_norm": 0.767737493501071, |
| "learning_rate": 6.288027510817791e-08, |
| "loss": 0.4558729648590088, |
| "memory(GiB)": 74.97, |
| "step": 1280, |
| "token_acc": 0.8583624139902605, |
| "train_speed(iter/s)": 0.130308 |
| }, |
| { |
| "epoch": 116.83720930232558, |
| "grad_norm": 1.6440663696409548, |
| "learning_rate": 6.211388435662721e-08, |
| "loss": 0.47510428428649903, |
| "memory(GiB)": 74.97, |
| "step": 1285, |
| "token_acc": 0.8627756653992396, |
| "train_speed(iter/s)": 0.130302 |
| }, |
| { |
| "epoch": 117.27906976744185, |
| "grad_norm": 0.8947253671514697, |
| "learning_rate": 6.135008307075479e-08, |
| "loss": 0.48160324096679685, |
| "memory(GiB)": 74.97, |
| "step": 1290, |
| "token_acc": 0.8668218530666949, |
| "train_speed(iter/s)": 0.130333 |
| }, |
| { |
| "epoch": 117.74418604651163, |
| "grad_norm": 0.7850295846326071, |
| "learning_rate": 6.058892345536387e-08, |
| "loss": 0.4656852722167969, |
| "memory(GiB)": 74.97, |
| "step": 1295, |
| "token_acc": 0.8760795485278474, |
| "train_speed(iter/s)": 0.13033 |
| }, |
| { |
| "epoch": 118.18604651162791, |
| "grad_norm": 0.7825259584750254, |
| "learning_rate": 5.983045753470308e-08, |
| "loss": 0.4575822830200195, |
| "memory(GiB)": 74.97, |
| "step": 1300, |
| "token_acc": 0.8609389541215373, |
| "train_speed(iter/s)": 0.130341 |
| }, |
| { |
| "epoch": 118.65116279069767, |
| "grad_norm": 1.3046914177444136, |
| "learning_rate": 5.9074737148910606e-08, |
| "loss": 0.45604352951049804, |
| "memory(GiB)": 74.97, |
| "step": 1305, |
| "token_acc": 0.8543227692364619, |
| "train_speed(iter/s)": 0.130361 |
| }, |
| { |
| "epoch": 119.09302325581395, |
| "grad_norm": 1.1212563362731731, |
| "learning_rate": 5.832181395047098e-08, |
| "loss": 0.4669440269470215, |
| "memory(GiB)": 74.97, |
| "step": 1310, |
| "token_acc": 0.868457034673772, |
| "train_speed(iter/s)": 0.130368 |
| }, |
| { |
| "epoch": 119.55813953488372, |
| "grad_norm": 0.8339959692059283, |
| "learning_rate": 5.7571739400684635e-08, |
| "loss": 0.47755279541015627, |
| "memory(GiB)": 74.97, |
| "step": 1315, |
| "token_acc": 0.8567007810897974, |
| "train_speed(iter/s)": 0.130336 |
| }, |
| { |
| "epoch": 120.0, |
| "grad_norm": 0.9954950376422352, |
| "learning_rate": 5.682456476615072e-08, |
| "loss": 0.4645816802978516, |
| "memory(GiB)": 74.97, |
| "step": 1320, |
| "token_acc": 0.8440125792344356, |
| "train_speed(iter/s)": 0.130377 |
| }, |
| { |
| "epoch": 120.46511627906976, |
| "grad_norm": 0.6612384359472665, |
| "learning_rate": 5.6080341115262976e-08, |
| "loss": 0.45533552169799807, |
| "memory(GiB)": 74.97, |
| "step": 1325, |
| "token_acc": 0.8586772074823821, |
| "train_speed(iter/s)": 0.130382 |
| }, |
| { |
| "epoch": 120.93023255813954, |
| "grad_norm": 1.098228237433943, |
| "learning_rate": 5.533911931471935e-08, |
| "loss": 0.4692089080810547, |
| "memory(GiB)": 74.97, |
| "step": 1330, |
| "token_acc": 0.8699830311690632, |
| "train_speed(iter/s)": 0.13038 |
| }, |
| { |
| "epoch": 121.37209302325581, |
| "grad_norm": 0.7854095634086957, |
| "learning_rate": 5.460095002604532e-08, |
| "loss": 0.46064138412475586, |
| "memory(GiB)": 74.97, |
| "step": 1335, |
| "token_acc": 0.8677652211026369, |
| "train_speed(iter/s)": 0.130369 |
| }, |
| { |
| "epoch": 121.83720930232558, |
| "grad_norm": 2.1438550225472506, |
| "learning_rate": 5.386588370213123e-08, |
| "loss": 0.47399129867553713, |
| "memory(GiB)": 74.97, |
| "step": 1340, |
| "token_acc": 0.8342529761205946, |
| "train_speed(iter/s)": 0.130402 |
| }, |
| { |
| "epoch": 122.27906976744185, |
| "grad_norm": 0.7685065811470108, |
| "learning_rate": 5.313397058378386e-08, |
| "loss": 0.46064081192016604, |
| "memory(GiB)": 74.97, |
| "step": 1345, |
| "token_acc": 0.8655901006480077, |
| "train_speed(iter/s)": 0.130445 |
| }, |
| { |
| "epoch": 122.74418604651163, |
| "grad_norm": 0.7484657906315015, |
| "learning_rate": 5.240526069629264e-08, |
| "loss": 0.4805141925811768, |
| "memory(GiB)": 74.97, |
| "step": 1350, |
| "token_acc": 0.8551861286142021, |
| "train_speed(iter/s)": 0.13041 |
| }, |
| { |
| "epoch": 123.18604651162791, |
| "grad_norm": 0.7881353244361399, |
| "learning_rate": 5.1679803846010403e-08, |
| "loss": 0.4467328071594238, |
| "memory(GiB)": 74.97, |
| "step": 1355, |
| "token_acc": 0.8620426261271331, |
| "train_speed(iter/s)": 0.13044 |
| }, |
| { |
| "epoch": 123.65116279069767, |
| "grad_norm": 0.9062139816497382, |
| "learning_rate": 5.0957649616949215e-08, |
| "loss": 0.4628152847290039, |
| "memory(GiB)": 74.97, |
| "step": 1360, |
| "token_acc": 0.8650447427293065, |
| "train_speed(iter/s)": 0.13041 |
| }, |
| { |
| "epoch": 124.09302325581395, |
| "grad_norm": 0.7919450228717162, |
| "learning_rate": 5.0238847367391314e-08, |
| "loss": 0.45865640640258787, |
| "memory(GiB)": 74.97, |
| "step": 1365, |
| "token_acc": 0.8601923709624354, |
| "train_speed(iter/s)": 0.130427 |
| }, |
| { |
| "epoch": 124.55813953488372, |
| "grad_norm": 0.6379402091206297, |
| "learning_rate": 4.952344622651565e-08, |
| "loss": 0.4563908576965332, |
| "memory(GiB)": 74.97, |
| "step": 1370, |
| "token_acc": 0.8681956209045869, |
| "train_speed(iter/s)": 0.130458 |
| }, |
| { |
| "epoch": 125.0, |
| "grad_norm": 1.558446245583931, |
| "learning_rate": 4.8811495091039923e-08, |
| "loss": 0.4724306106567383, |
| "memory(GiB)": 74.97, |
| "step": 1375, |
| "token_acc": 0.860769332539525, |
| "train_speed(iter/s)": 0.13045 |
| }, |
| { |
| "epoch": 125.46511627906976, |
| "grad_norm": 0.7458216850987389, |
| "learning_rate": 4.810304262187851e-08, |
| "loss": 0.46082763671875, |
| "memory(GiB)": 74.97, |
| "step": 1380, |
| "token_acc": 0.8532610918012676, |
| "train_speed(iter/s)": 0.130461 |
| }, |
| { |
| "epoch": 125.93023255813954, |
| "grad_norm": 3.0851446793520743, |
| "learning_rate": 4.739813724081661e-08, |
| "loss": 0.47005910873413087, |
| "memory(GiB)": 74.97, |
| "step": 1385, |
| "token_acc": 0.8665938394822649, |
| "train_speed(iter/s)": 0.13044 |
| }, |
| { |
| "epoch": 126.37209302325581, |
| "grad_norm": 0.6792623717144913, |
| "learning_rate": 4.6696827127200644e-08, |
| "loss": 0.44311208724975587, |
| "memory(GiB)": 74.97, |
| "step": 1390, |
| "token_acc": 0.8821935667868566, |
| "train_speed(iter/s)": 0.130476 |
| }, |
| { |
| "epoch": 126.83720930232558, |
| "grad_norm": 0.8236864003533888, |
| "learning_rate": 4.599916021464531e-08, |
| "loss": 0.4629988670349121, |
| "memory(GiB)": 74.97, |
| "step": 1395, |
| "token_acc": 0.8768711824231926, |
| "train_speed(iter/s)": 0.130449 |
| }, |
| { |
| "epoch": 127.27906976744185, |
| "grad_norm": 1.9627254088333494, |
| "learning_rate": 4.530518418775733e-08, |
| "loss": 0.48299736976623536, |
| "memory(GiB)": 74.97, |
| "step": 1400, |
| "token_acc": 0.8665078296300133, |
| "train_speed(iter/s)": 0.130474 |
| }, |
| { |
| "epoch": 127.27906976744185, |
| "eval_loss": 0.6098422408103943, |
| "eval_runtime": 0.6984, |
| "eval_samples_per_second": 18.613, |
| "eval_steps_per_second": 2.864, |
| "eval_token_acc": 0.8434715968962544, |
| "step": 1400 |
| }, |
| { |
| "epoch": 127.74418604651163, |
| "grad_norm": 0.8045410987121008, |
| "learning_rate": 4.4614946478876306e-08, |
| "loss": 0.45166778564453125, |
| "memory(GiB)": 74.97, |
| "step": 1405, |
| "token_acc": 0.8502284891267625, |
| "train_speed(iter/s)": 0.130135 |
| }, |
| { |
| "epoch": 128.1860465116279, |
| "grad_norm": 3.0860005142595193, |
| "learning_rate": 4.392849426483274e-08, |
| "loss": 0.4591231822967529, |
| "memory(GiB)": 74.97, |
| "step": 1410, |
| "token_acc": 0.8607654013690106, |
| "train_speed(iter/s)": 0.130182 |
| }, |
| { |
| "epoch": 128.65116279069767, |
| "grad_norm": 1.1087292922703431, |
| "learning_rate": 4.324587446372364e-08, |
| "loss": 0.474017858505249, |
| "memory(GiB)": 74.97, |
| "step": 1415, |
| "token_acc": 0.8642677323380807, |
| "train_speed(iter/s)": 0.130185 |
| }, |
| { |
| "epoch": 129.09302325581396, |
| "grad_norm": 1.0228218711643116, |
| "learning_rate": 4.256713373170564e-08, |
| "loss": 0.4518399715423584, |
| "memory(GiB)": 74.97, |
| "step": 1420, |
| "token_acc": 0.8715422807155804, |
| "train_speed(iter/s)": 0.130231 |
| }, |
| { |
| "epoch": 129.5581395348837, |
| "grad_norm": 0.8006830274772974, |
| "learning_rate": 4.1892318459806175e-08, |
| "loss": 0.46432695388793943, |
| "memory(GiB)": 74.97, |
| "step": 1425, |
| "token_acc": 0.8545170039641143, |
| "train_speed(iter/s)": 0.130209 |
| }, |
| { |
| "epoch": 130.0, |
| "grad_norm": 1.0265209199413956, |
| "learning_rate": 4.1221474770752695e-08, |
| "loss": 0.44231014251708983, |
| "memory(GiB)": 74.97, |
| "step": 1430, |
| "token_acc": 0.8699077672962582, |
| "train_speed(iter/s)": 0.130226 |
| }, |
| { |
| "epoch": 130.46511627906978, |
| "grad_norm": 0.8188302695487818, |
| "learning_rate": 4.055464851582021e-08, |
| "loss": 0.4583402156829834, |
| "memory(GiB)": 74.97, |
| "step": 1435, |
| "token_acc": 0.8707192214769637, |
| "train_speed(iter/s)": 0.130227 |
| }, |
| { |
| "epoch": 130.93023255813952, |
| "grad_norm": 0.8252804258656437, |
| "learning_rate": 3.989188527169749e-08, |
| "loss": 0.46385898590087893, |
| "memory(GiB)": 74.97, |
| "step": 1440, |
| "token_acc": 0.8788993882124901, |
| "train_speed(iter/s)": 0.130217 |
| }, |
| { |
| "epoch": 131.37209302325581, |
| "grad_norm": 0.8872565038088099, |
| "learning_rate": 3.923323033737188e-08, |
| "loss": 0.4746572017669678, |
| "memory(GiB)": 74.97, |
| "step": 1445, |
| "token_acc": 0.8457805814107371, |
| "train_speed(iter/s)": 0.130274 |
| }, |
| { |
| "epoch": 131.8372093023256, |
| "grad_norm": 1.1550562475118538, |
| "learning_rate": 3.857872873103322e-08, |
| "loss": 0.44470739364624023, |
| "memory(GiB)": 74.97, |
| "step": 1450, |
| "token_acc": 0.8544268219233085, |
| "train_speed(iter/s)": 0.130237 |
| }, |
| { |
| "epoch": 132.27906976744185, |
| "grad_norm": 0.7549641151940925, |
| "learning_rate": 3.7928425186996883e-08, |
| "loss": 0.46361541748046875, |
| "memory(GiB)": 74.97, |
| "step": 1455, |
| "token_acc": 0.8666913610733835, |
| "train_speed(iter/s)": 0.130263 |
| }, |
| { |
| "epoch": 132.74418604651163, |
| "grad_norm": 0.8290416052434509, |
| "learning_rate": 3.7282364152646295e-08, |
| "loss": 0.45833826065063477, |
| "memory(GiB)": 74.97, |
| "step": 1460, |
| "token_acc": 0.8540965869971476, |
| "train_speed(iter/s)": 0.130268 |
| }, |
| { |
| "epoch": 133.1860465116279, |
| "grad_norm": 0.8534414388843884, |
| "learning_rate": 3.664058978539495e-08, |
| "loss": 0.4486083507537842, |
| "memory(GiB)": 74.97, |
| "step": 1465, |
| "token_acc": 0.8745001477395844, |
| "train_speed(iter/s)": 0.130303 |
| }, |
| { |
| "epoch": 133.65116279069767, |
| "grad_norm": 0.8212409711926915, |
| "learning_rate": 3.600314594966833e-08, |
| "loss": 0.4511223316192627, |
| "memory(GiB)": 74.97, |
| "step": 1470, |
| "token_acc": 0.8836138231258182, |
| "train_speed(iter/s)": 0.130277 |
| }, |
| { |
| "epoch": 134.09302325581396, |
| "grad_norm": 1.0201258607355366, |
| "learning_rate": 3.53700762139059e-08, |
| "loss": 0.48140726089477537, |
| "memory(GiB)": 74.97, |
| "step": 1475, |
| "token_acc": 0.8690138329330979, |
| "train_speed(iter/s)": 0.130297 |
| }, |
| { |
| "epoch": 134.5581395348837, |
| "grad_norm": 0.7617400291414114, |
| "learning_rate": 3.474142384758313e-08, |
| "loss": 0.4485898017883301, |
| "memory(GiB)": 74.97, |
| "step": 1480, |
| "token_acc": 0.864516129032258, |
| "train_speed(iter/s)": 0.130302 |
| }, |
| { |
| "epoch": 135.0, |
| "grad_norm": 1.0462722965857336, |
| "learning_rate": 3.41172318182542e-08, |
| "loss": 0.45436367988586424, |
| "memory(GiB)": 74.97, |
| "step": 1485, |
| "token_acc": 0.8761111111111111, |
| "train_speed(iter/s)": 0.130327 |
| }, |
| { |
| "epoch": 135.46511627906978, |
| "grad_norm": 0.8458888970103854, |
| "learning_rate": 3.349754278861516e-08, |
| "loss": 0.4582218170166016, |
| "memory(GiB)": 74.97, |
| "step": 1490, |
| "token_acc": 0.8546937352291658, |
| "train_speed(iter/s)": 0.130305 |
| }, |
| { |
| "epoch": 135.93023255813952, |
| "grad_norm": 1.0227265853515555, |
| "learning_rate": 3.2882399113588066e-08, |
| "loss": 0.44946842193603515, |
| "memory(GiB)": 74.97, |
| "step": 1495, |
| "token_acc": 0.8783018139714396, |
| "train_speed(iter/s)": 0.130303 |
| }, |
| { |
| "epoch": 136.37209302325581, |
| "grad_norm": 0.9319234599915691, |
| "learning_rate": 3.227184283742591e-08, |
| "loss": 0.4635480880737305, |
| "memory(GiB)": 74.97, |
| "step": 1500, |
| "token_acc": 0.8700904636260837, |
| "train_speed(iter/s)": 0.130364 |
| }, |
| { |
| "epoch": 136.8372093023256, |
| "grad_norm": 0.7448189618376913, |
| "learning_rate": 3.166591569083916e-08, |
| "loss": 0.45705451965332033, |
| "memory(GiB)": 74.97, |
| "step": 1505, |
| "token_acc": 0.8637782801950199, |
| "train_speed(iter/s)": 0.130342 |
| }, |
| { |
| "epoch": 137.27906976744185, |
| "grad_norm": 1.1529755818910967, |
| "learning_rate": 3.106465908814342e-08, |
| "loss": 0.45585179328918457, |
| "memory(GiB)": 74.97, |
| "step": 1510, |
| "token_acc": 0.8591232839778012, |
| "train_speed(iter/s)": 0.130363 |
| }, |
| { |
| "epoch": 137.74418604651163, |
| "grad_norm": 0.8260584253674346, |
| "learning_rate": 3.04681141244288e-08, |
| "loss": 0.46056065559387205, |
| "memory(GiB)": 74.97, |
| "step": 1515, |
| "token_acc": 0.8692046456648592, |
| "train_speed(iter/s)": 0.130323 |
| }, |
| { |
| "epoch": 138.1860465116279, |
| "grad_norm": 1.052950893981831, |
| "learning_rate": 2.987632157275114e-08, |
| "loss": 0.45586233139038085, |
| "memory(GiB)": 74.97, |
| "step": 1520, |
| "token_acc": 0.863406408094435, |
| "train_speed(iter/s)": 0.130349 |
| }, |
| { |
| "epoch": 138.65116279069767, |
| "grad_norm": 0.7991045733474148, |
| "learning_rate": 2.928932188134525e-08, |
| "loss": 0.4538632869720459, |
| "memory(GiB)": 74.97, |
| "step": 1525, |
| "token_acc": 0.8717533864610406, |
| "train_speed(iter/s)": 0.130328 |
| }, |
| { |
| "epoch": 139.09302325581396, |
| "grad_norm": 1.434227664193626, |
| "learning_rate": 2.8707155170860297e-08, |
| "loss": 0.46680850982666017, |
| "memory(GiB)": 74.97, |
| "step": 1530, |
| "token_acc": 0.8410292981517798, |
| "train_speed(iter/s)": 0.130343 |
| }, |
| { |
| "epoch": 139.5581395348837, |
| "grad_norm": 0.7631653651545482, |
| "learning_rate": 2.8129861231617612e-08, |
| "loss": 0.44613943099975584, |
| "memory(GiB)": 74.97, |
| "step": 1535, |
| "token_acc": 0.8678071275982503, |
| "train_speed(iter/s)": 0.130345 |
| }, |
| { |
| "epoch": 140.0, |
| "grad_norm": 1.1321589909418222, |
| "learning_rate": 2.7557479520891104e-08, |
| "loss": 0.4599461078643799, |
| "memory(GiB)": 74.97, |
| "step": 1540, |
| "token_acc": 0.8688507394846334, |
| "train_speed(iter/s)": 0.130373 |
| }, |
| { |
| "epoch": 140.46511627906978, |
| "grad_norm": 0.9020009960345104, |
| "learning_rate": 2.699004916021038e-08, |
| "loss": 0.4559918403625488, |
| "memory(GiB)": 74.97, |
| "step": 1545, |
| "token_acc": 0.8607777938412606, |
| "train_speed(iter/s)": 0.130363 |
| }, |
| { |
| "epoch": 140.93023255813952, |
| "grad_norm": 0.7719651412897752, |
| "learning_rate": 2.642760893268684e-08, |
| "loss": 0.459440279006958, |
| "memory(GiB)": 74.97, |
| "step": 1550, |
| "token_acc": 0.881872014598279, |
| "train_speed(iter/s)": 0.130356 |
| }, |
| { |
| "epoch": 141.37209302325581, |
| "grad_norm": 0.7388402954698886, |
| "learning_rate": 2.5870197280362915e-08, |
| "loss": 0.42969484329223634, |
| "memory(GiB)": 74.97, |
| "step": 1555, |
| "token_acc": 0.8883613399742432, |
| "train_speed(iter/s)": 0.130344 |
| }, |
| { |
| "epoch": 141.8372093023256, |
| "grad_norm": 0.7267745408294942, |
| "learning_rate": 2.5317852301584643e-08, |
| "loss": 0.4578805923461914, |
| "memory(GiB)": 74.97, |
| "step": 1560, |
| "token_acc": 0.8614507600793126, |
| "train_speed(iter/s)": 0.130359 |
| }, |
| { |
| "epoch": 142.27906976744185, |
| "grad_norm": 0.7380347392311346, |
| "learning_rate": 2.477061174839755e-08, |
| "loss": 0.465103816986084, |
| "memory(GiB)": 74.97, |
| "step": 1565, |
| "token_acc": 0.8470271187879302, |
| "train_speed(iter/s)": 0.130395 |
| }, |
| { |
| "epoch": 142.74418604651163, |
| "grad_norm": 0.8136693379385729, |
| "learning_rate": 2.4228513023966547e-08, |
| "loss": 0.45352745056152344, |
| "memory(GiB)": 74.97, |
| "step": 1570, |
| "token_acc": 0.8680695298875026, |
| "train_speed(iter/s)": 0.130411 |
| }, |
| { |
| "epoch": 143.1860465116279, |
| "grad_norm": 0.8229594379364835, |
| "learning_rate": 2.3691593180019364e-08, |
| "loss": 0.46236839294433596, |
| "memory(GiB)": 74.97, |
| "step": 1575, |
| "token_acc": 0.8841950432568365, |
| "train_speed(iter/s)": 0.130422 |
| }, |
| { |
| "epoch": 143.65116279069767, |
| "grad_norm": 0.8764589511746724, |
| "learning_rate": 2.315988891431412e-08, |
| "loss": 0.44404191970825196, |
| "memory(GiB)": 74.97, |
| "step": 1580, |
| "token_acc": 0.8711640164847799, |
| "train_speed(iter/s)": 0.130418 |
| }, |
| { |
| "epoch": 144.09302325581396, |
| "grad_norm": 0.7989067686578916, |
| "learning_rate": 2.263343656813107e-08, |
| "loss": 0.46502885818481443, |
| "memory(GiB)": 74.97, |
| "step": 1585, |
| "token_acc": 0.8433810096689391, |
| "train_speed(iter/s)": 0.130431 |
| }, |
| { |
| "epoch": 144.5581395348837, |
| "grad_norm": 0.7139519442470533, |
| "learning_rate": 2.2112272123788767e-08, |
| "loss": 0.4445913314819336, |
| "memory(GiB)": 74.97, |
| "step": 1590, |
| "token_acc": 0.8719508074869924, |
| "train_speed(iter/s)": 0.130449 |
| }, |
| { |
| "epoch": 145.0, |
| "grad_norm": 0.854611201984833, |
| "learning_rate": 2.1596431202184705e-08, |
| "loss": 0.45667543411254885, |
| "memory(GiB)": 74.97, |
| "step": 1595, |
| "token_acc": 0.8543597957753529, |
| "train_speed(iter/s)": 0.130485 |
| }, |
| { |
| "epoch": 145.46511627906978, |
| "grad_norm": 0.8370879177525832, |
| "learning_rate": 2.108594906036065e-08, |
| "loss": 0.45542278289794924, |
| "memory(GiB)": 74.97, |
| "step": 1600, |
| "token_acc": 0.8767741127199183, |
| "train_speed(iter/s)": 0.130498 |
| }, |
| { |
| "epoch": 145.46511627906978, |
| "eval_loss": 0.612120509147644, |
| "eval_runtime": 0.6973, |
| "eval_samples_per_second": 18.643, |
| "eval_steps_per_second": 2.868, |
| "eval_token_acc": 0.8432800076635693, |
| "step": 1600 |
| }, |
| { |
| "epoch": 145.93023255813952, |
| "grad_norm": 1.8496079436558843, |
| "learning_rate": 2.0580860589092895e-08, |
| "loss": 0.4458228588104248, |
| "memory(GiB)": 74.97, |
| "step": 1605, |
| "token_acc": 0.8655583853748735, |
| "train_speed(iter/s)": 0.130281 |
| }, |
| { |
| "epoch": 146.37209302325581, |
| "grad_norm": 0.8949636135857424, |
| "learning_rate": 2.008120031050753e-08, |
| "loss": 0.4534448146820068, |
| "memory(GiB)": 74.97, |
| "step": 1610, |
| "token_acc": 0.8604011376099039, |
| "train_speed(iter/s)": 0.130303 |
| }, |
| { |
| "epoch": 146.8372093023256, |
| "grad_norm": 1.4041818864948623, |
| "learning_rate": 1.9587002375720862e-08, |
| "loss": 0.46073060035705565, |
| "memory(GiB)": 74.97, |
| "step": 1615, |
| "token_acc": 0.8637630263007214, |
| "train_speed(iter/s)": 0.130307 |
| }, |
| { |
| "epoch": 147.27906976744185, |
| "grad_norm": 0.8001120538073951, |
| "learning_rate": 1.9098300562505266e-08, |
| "loss": 0.44887795448303225, |
| "memory(GiB)": 74.97, |
| "step": 1620, |
| "token_acc": 0.8565026887074288, |
| "train_speed(iter/s)": 0.130322 |
| }, |
| { |
| "epoch": 147.74418604651163, |
| "grad_norm": 1.3994263394516653, |
| "learning_rate": 1.8615128272980507e-08, |
| "loss": 0.4529706001281738, |
| "memory(GiB)": 74.97, |
| "step": 1625, |
| "token_acc": 0.8614175728232399, |
| "train_speed(iter/s)": 0.130317 |
| }, |
| { |
| "epoch": 148.1860465116279, |
| "grad_norm": 0.7986626000623837, |
| "learning_rate": 1.8137518531330763e-08, |
| "loss": 0.45129976272583006, |
| "memory(GiB)": 74.97, |
| "step": 1630, |
| "token_acc": 0.88025613660619, |
| "train_speed(iter/s)": 0.130337 |
| }, |
| { |
| "epoch": 148.65116279069767, |
| "grad_norm": 0.7053069152982997, |
| "learning_rate": 1.7665503981547425e-08, |
| "loss": 0.45789132118225095, |
| "memory(GiB)": 74.97, |
| "step": 1635, |
| "token_acc": 0.8718905932360007, |
| "train_speed(iter/s)": 0.13032 |
| }, |
| { |
| "epoch": 149.09302325581396, |
| "grad_norm": 1.327231576897276, |
| "learning_rate": 1.7199116885197997e-08, |
| "loss": 0.45948057174682616, |
| "memory(GiB)": 74.97, |
| "step": 1640, |
| "token_acc": 0.8687992670776631, |
| "train_speed(iter/s)": 0.13036 |
| }, |
| { |
| "epoch": 149.5581395348837, |
| "grad_norm": 1.8690818535078901, |
| "learning_rate": 1.6738389119220965e-08, |
| "loss": 0.4487407684326172, |
| "memory(GiB)": 74.97, |
| "step": 1645, |
| "token_acc": 0.8717857813184292, |
| "train_speed(iter/s)": 0.130356 |
| }, |
| { |
| "epoch": 150.0, |
| "grad_norm": 1.4986410325133508, |
| "learning_rate": 1.6283352173747144e-08, |
| "loss": 0.46256265640258787, |
| "memory(GiB)": 74.97, |
| "step": 1650, |
| "token_acc": 0.8687363834422658, |
| "train_speed(iter/s)": 0.130382 |
| }, |
| { |
| "epoch": 150.46511627906978, |
| "grad_norm": 0.9212362231868645, |
| "learning_rate": 1.5834037149947288e-08, |
| "loss": 0.45532588958740233, |
| "memory(GiB)": 74.97, |
| "step": 1655, |
| "token_acc": 0.8637431617337635, |
| "train_speed(iter/s)": 0.130388 |
| }, |
| { |
| "epoch": 150.93023255813952, |
| "grad_norm": 0.7589204558012844, |
| "learning_rate": 1.5390474757906446e-08, |
| "loss": 0.4434979438781738, |
| "memory(GiB)": 74.97, |
| "step": 1660, |
| "token_acc": 0.8520731295389292, |
| "train_speed(iter/s)": 0.130379 |
| }, |
| { |
| "epoch": 151.37209302325581, |
| "grad_norm": 0.7171576824463824, |
| "learning_rate": 1.495269531452491e-08, |
| "loss": 0.45127115249633787, |
| "memory(GiB)": 74.97, |
| "step": 1665, |
| "token_acc": 0.8684483740245822, |
| "train_speed(iter/s)": 0.130418 |
| }, |
| { |
| "epoch": 151.8372093023256, |
| "grad_norm": 0.7991722745507821, |
| "learning_rate": 1.4520728741446087e-08, |
| "loss": 0.4588929176330566, |
| "memory(GiB)": 74.97, |
| "step": 1670, |
| "token_acc": 0.8637134079593206, |
| "train_speed(iter/s)": 0.130412 |
| }, |
| { |
| "epoch": 152.27906976744185, |
| "grad_norm": 1.041259181485301, |
| "learning_rate": 1.409460456301147e-08, |
| "loss": 0.4453131675720215, |
| "memory(GiB)": 74.97, |
| "step": 1675, |
| "token_acc": 0.8544123886296139, |
| "train_speed(iter/s)": 0.130435 |
| }, |
| { |
| "epoch": 152.74418604651163, |
| "grad_norm": 0.8313710748011637, |
| "learning_rate": 1.367435190424261e-08, |
| "loss": 0.45928287506103516, |
| "memory(GiB)": 74.97, |
| "step": 1680, |
| "token_acc": 0.8679964503247146, |
| "train_speed(iter/s)": 0.13042 |
| }, |
| { |
| "epoch": 153.1860465116279, |
| "grad_norm": 0.8698596114757391, |
| "learning_rate": 1.3259999488850471e-08, |
| "loss": 0.4635627746582031, |
| "memory(GiB)": 74.97, |
| "step": 1685, |
| "token_acc": 0.8450357565069091, |
| "train_speed(iter/s)": 0.130437 |
| }, |
| { |
| "epoch": 153.65116279069767, |
| "grad_norm": 0.8269395824162962, |
| "learning_rate": 1.285157563727226e-08, |
| "loss": 0.44847860336303713, |
| "memory(GiB)": 74.97, |
| "step": 1690, |
| "token_acc": 0.8680811179277437, |
| "train_speed(iter/s)": 0.130425 |
| }, |
| { |
| "epoch": 154.09302325581396, |
| "grad_norm": 0.9761994911989668, |
| "learning_rate": 1.244910826473572e-08, |
| "loss": 0.45370187759399416, |
| "memory(GiB)": 74.97, |
| "step": 1695, |
| "token_acc": 0.8793547562067264, |
| "train_speed(iter/s)": 0.130443 |
| }, |
| { |
| "epoch": 154.5581395348837, |
| "grad_norm": 1.6323959605839558, |
| "learning_rate": 1.2052624879351104e-08, |
| "loss": 0.4481173515319824, |
| "memory(GiB)": 74.97, |
| "step": 1700, |
| "token_acc": 0.8561244744199944, |
| "train_speed(iter/s)": 0.13047 |
| }, |
| { |
| "epoch": 155.0, |
| "grad_norm": 0.9207580708371824, |
| "learning_rate": 1.1662152580231144e-08, |
| "loss": 0.4539341926574707, |
| "memory(GiB)": 74.97, |
| "step": 1705, |
| "token_acc": 0.8649734464445824, |
| "train_speed(iter/s)": 0.130465 |
| }, |
| { |
| "epoch": 155.46511627906978, |
| "grad_norm": 0.745689965265747, |
| "learning_rate": 1.1277718055638818e-08, |
| "loss": 0.4519050598144531, |
| "memory(GiB)": 74.97, |
| "step": 1710, |
| "token_acc": 0.852934204004224, |
| "train_speed(iter/s)": 0.13047 |
| }, |
| { |
| "epoch": 155.93023255813952, |
| "grad_norm": 0.8506585464720108, |
| "learning_rate": 1.089934758116322e-08, |
| "loss": 0.4458354949951172, |
| "memory(GiB)": 74.97, |
| "step": 1715, |
| "token_acc": 0.874605201263356, |
| "train_speed(iter/s)": 0.130466 |
| }, |
| { |
| "epoch": 156.37209302325581, |
| "grad_norm": 0.8256841894574871, |
| "learning_rate": 1.0527067017923653e-08, |
| "loss": 0.4461174011230469, |
| "memory(GiB)": 74.97, |
| "step": 1720, |
| "token_acc": 0.8700296735905044, |
| "train_speed(iter/s)": 0.130504 |
| }, |
| { |
| "epoch": 156.8372093023256, |
| "grad_norm": 1.507219892035112, |
| "learning_rate": 1.0160901810802114e-08, |
| "loss": 0.45079655647277833, |
| "memory(GiB)": 74.97, |
| "step": 1725, |
| "token_acc": 0.8675626379955842, |
| "train_speed(iter/s)": 0.130482 |
| }, |
| { |
| "epoch": 157.27906976744185, |
| "grad_norm": 0.8360642824375936, |
| "learning_rate": 9.800876986704109e-09, |
| "loss": 0.46644229888916017, |
| "memory(GiB)": 74.97, |
| "step": 1730, |
| "token_acc": 0.8489071709233792, |
| "train_speed(iter/s)": 0.13048 |
| }, |
| { |
| "epoch": 157.74418604651163, |
| "grad_norm": 1.1246527506944004, |
| "learning_rate": 9.447017152848125e-09, |
| "loss": 0.4457961082458496, |
| "memory(GiB)": 74.97, |
| "step": 1735, |
| "token_acc": 0.8624011633190948, |
| "train_speed(iter/s)": 0.130482 |
| }, |
| { |
| "epoch": 158.1860465116279, |
| "grad_norm": 1.166595856803442, |
| "learning_rate": 9.099346495083749e-09, |
| "loss": 0.46271514892578125, |
| "memory(GiB)": 74.97, |
| "step": 1740, |
| "token_acc": 0.870665567772931, |
| "train_speed(iter/s)": 0.130529 |
| }, |
| { |
| "epoch": 158.65116279069767, |
| "grad_norm": 0.997579039313746, |
| "learning_rate": 8.75788877623862e-09, |
| "loss": 0.45302181243896483, |
| "memory(GiB)": 74.97, |
| "step": 1745, |
| "token_acc": 0.8601099764336214, |
| "train_speed(iter/s)": 0.130496 |
| }, |
| { |
| "epoch": 159.09302325581396, |
| "grad_norm": 0.8847455985487748, |
| "learning_rate": 8.422667334494249e-09, |
| "loss": 0.44652571678161623, |
| "memory(GiB)": 74.97, |
| "step": 1750, |
| "token_acc": 0.8695352691736444, |
| "train_speed(iter/s)": 0.130543 |
| }, |
| { |
| "epoch": 159.5581395348837, |
| "grad_norm": 0.7005559155585159, |
| "learning_rate": 8.093705081790891e-09, |
| "loss": 0.45291786193847655, |
| "memory(GiB)": 74.97, |
| "step": 1755, |
| "token_acc": 0.8535476796830787, |
| "train_speed(iter/s)": 0.130532 |
| }, |
| { |
| "epoch": 160.0, |
| "grad_norm": 1.2943680843819054, |
| "learning_rate": 7.771024502261525e-09, |
| "loss": 0.4609353542327881, |
| "memory(GiB)": 74.97, |
| "step": 1760, |
| "token_acc": 0.8666812131791403, |
| "train_speed(iter/s)": 0.130545 |
| }, |
| { |
| "epoch": 160.46511627906978, |
| "grad_norm": 0.7963107860861562, |
| "learning_rate": 7.454647650695157e-09, |
| "loss": 0.44596638679504397, |
| "memory(GiB)": 74.97, |
| "step": 1765, |
| "token_acc": 0.8749486582694413, |
| "train_speed(iter/s)": 0.130569 |
| }, |
| { |
| "epoch": 160.93023255813952, |
| "grad_norm": 0.8631048995115475, |
| "learning_rate": 7.144596151029303e-09, |
| "loss": 0.4524871826171875, |
| "memory(GiB)": 74.97, |
| "step": 1770, |
| "token_acc": 0.8677113770449089, |
| "train_speed(iter/s)": 0.130541 |
| }, |
| { |
| "epoch": 161.37209302325581, |
| "grad_norm": 2.75385310377207, |
| "learning_rate": 6.840891194872111e-09, |
| "loss": 0.4484891891479492, |
| "memory(GiB)": 74.97, |
| "step": 1775, |
| "token_acc": 0.8703089199652366, |
| "train_speed(iter/s)": 0.130568 |
| }, |
| { |
| "epoch": 161.8372093023256, |
| "grad_norm": 0.8722893432749486, |
| "learning_rate": 6.5435535400539254e-09, |
| "loss": 0.45218324661254883, |
| "memory(GiB)": 74.97, |
| "step": 1780, |
| "token_acc": 0.868553358560287, |
| "train_speed(iter/s)": 0.130557 |
| }, |
| { |
| "epoch": 162.27906976744185, |
| "grad_norm": 0.8215719185780701, |
| "learning_rate": 6.252603509208465e-09, |
| "loss": 0.4554037094116211, |
| "memory(GiB)": 74.97, |
| "step": 1785, |
| "token_acc": 0.8677862595419847, |
| "train_speed(iter/s)": 0.130539 |
| }, |
| { |
| "epoch": 162.74418604651163, |
| "grad_norm": 0.7384288783097476, |
| "learning_rate": 5.9680609883838825e-09, |
| "loss": 0.44667611122131345, |
| "memory(GiB)": 74.97, |
| "step": 1790, |
| "token_acc": 0.8610665481183679, |
| "train_speed(iter/s)": 0.130557 |
| }, |
| { |
| "epoch": 163.1860465116279, |
| "grad_norm": 1.061403944970525, |
| "learning_rate": 5.689945425683473e-09, |
| "loss": 0.44474124908447266, |
| "memory(GiB)": 74.97, |
| "step": 1795, |
| "token_acc": 0.8644834307992203, |
| "train_speed(iter/s)": 0.130593 |
| }, |
| { |
| "epoch": 163.65116279069767, |
| "grad_norm": 0.7777597746319437, |
| "learning_rate": 5.418275829936536e-09, |
| "loss": 0.44541053771972655, |
| "memory(GiB)": 74.97, |
| "step": 1800, |
| "token_acc": 0.8673607496095783, |
| "train_speed(iter/s)": 0.130589 |
| }, |
| { |
| "epoch": 163.65116279069767, |
| "eval_loss": 0.6119173765182495, |
| "eval_runtime": 0.6953, |
| "eval_samples_per_second": 18.698, |
| "eval_steps_per_second": 2.877, |
| "eval_token_acc": 0.8430884184308842, |
| "step": 1800 |
| }, |
| { |
| "epoch": 164.09302325581396, |
| "grad_norm": 0.7945916641293757, |
| "learning_rate": 5.15307076939906e-09, |
| "loss": 0.47254066467285155, |
| "memory(GiB)": 74.97, |
| "step": 1805, |
| "token_acc": 0.8580266386260077, |
| "train_speed(iter/s)": 0.130407 |
| }, |
| { |
| "epoch": 164.5581395348837, |
| "grad_norm": 1.0008056456948866, |
| "learning_rate": 4.8943483704846465e-09, |
| "loss": 0.45273590087890625, |
| "memory(GiB)": 74.97, |
| "step": 1810, |
| "token_acc": 0.8646250808015514, |
| "train_speed(iter/s)": 0.130426 |
| }, |
| { |
| "epoch": 165.0, |
| "grad_norm": 1.4076505417290193, |
| "learning_rate": 4.6421263165255855e-09, |
| "loss": 0.4405077934265137, |
| "memory(GiB)": 74.97, |
| "step": 1815, |
| "token_acc": 0.8686048572948059, |
| "train_speed(iter/s)": 0.130467 |
| }, |
| { |
| "epoch": 165.46511627906978, |
| "grad_norm": 0.9122077782409643, |
| "learning_rate": 4.396421846564236e-09, |
| "loss": 0.4534634590148926, |
| "memory(GiB)": 74.97, |
| "step": 1820, |
| "token_acc": 0.8500332069327506, |
| "train_speed(iter/s)": 0.13046 |
| }, |
| { |
| "epoch": 165.93023255813952, |
| "grad_norm": 1.0037794680637797, |
| "learning_rate": 4.157251754174729e-09, |
| "loss": 0.4450718402862549, |
| "memory(GiB)": 74.97, |
| "step": 1825, |
| "token_acc": 0.8572525948963915, |
| "train_speed(iter/s)": 0.130467 |
| }, |
| { |
| "epoch": 166.37209302325581, |
| "grad_norm": 0.7603089866068351, |
| "learning_rate": 3.924632386315185e-09, |
| "loss": 0.44524030685424804, |
| "memory(GiB)": 74.97, |
| "step": 1830, |
| "token_acc": 0.8798618132794068, |
| "train_speed(iter/s)": 0.130461 |
| }, |
| { |
| "epoch": 166.8372093023256, |
| "grad_norm": 0.7741124344133163, |
| "learning_rate": 3.6985796422103977e-09, |
| "loss": 0.4650570392608643, |
| "memory(GiB)": 74.97, |
| "step": 1835, |
| "token_acc": 0.8640802573718376, |
| "train_speed(iter/s)": 0.13049 |
| }, |
| { |
| "epoch": 167.27906976744185, |
| "grad_norm": 0.7769430246424489, |
| "learning_rate": 3.4791089722651433e-09, |
| "loss": 0.4513576507568359, |
| "memory(GiB)": 74.97, |
| "step": 1840, |
| "token_acc": 0.870817885379908, |
| "train_speed(iter/s)": 0.130488 |
| }, |
| { |
| "epoch": 167.74418604651163, |
| "grad_norm": 2.1575767592775823, |
| "learning_rate": 3.266235377008175e-09, |
| "loss": 0.4532448768615723, |
| "memory(GiB)": 74.97, |
| "step": 1845, |
| "token_acc": 0.8802010930626702, |
| "train_speed(iter/s)": 0.130491 |
| }, |
| { |
| "epoch": 168.1860465116279, |
| "grad_norm": 0.8581455080813751, |
| "learning_rate": 3.0599734060669624e-09, |
| "loss": 0.44078683853149414, |
| "memory(GiB)": 74.97, |
| "step": 1850, |
| "token_acc": 0.8616869584293079, |
| "train_speed(iter/s)": 0.130513 |
| }, |
| { |
| "epoch": 168.65116279069767, |
| "grad_norm": 0.7254996365029248, |
| "learning_rate": 2.860337157173243e-09, |
| "loss": 0.45212836265563966, |
| "memory(GiB)": 74.97, |
| "step": 1855, |
| "token_acc": 0.8733549684432675, |
| "train_speed(iter/s)": 0.130513 |
| }, |
| { |
| "epoch": 169.09302325581396, |
| "grad_norm": 1.0391226655473043, |
| "learning_rate": 2.6673402751994255e-09, |
| "loss": 0.45039982795715333, |
| "memory(GiB)": 74.97, |
| "step": 1860, |
| "token_acc": 0.8479883283766343, |
| "train_speed(iter/s)": 0.130544 |
| }, |
| { |
| "epoch": 169.5581395348837, |
| "grad_norm": 4.061114170885048, |
| "learning_rate": 2.480995951226028e-09, |
| "loss": 0.4557363510131836, |
| "memory(GiB)": 74.97, |
| "step": 1865, |
| "token_acc": 0.8549472607052897, |
| "train_speed(iter/s)": 0.130538 |
| }, |
| { |
| "epoch": 170.0, |
| "grad_norm": 0.7661862324584979, |
| "learning_rate": 2.301316921640073e-09, |
| "loss": 0.44440832138061526, |
| "memory(GiB)": 74.97, |
| "step": 1870, |
| "token_acc": 0.864181855416752, |
| "train_speed(iter/s)": 0.130548 |
| }, |
| { |
| "epoch": 170.46511627906978, |
| "grad_norm": 1.0139594423822822, |
| "learning_rate": 2.128315467264552e-09, |
| "loss": 0.44567031860351564, |
| "memory(GiB)": 74.97, |
| "step": 1875, |
| "token_acc": 0.8773299316489919, |
| "train_speed(iter/s)": 0.130532 |
| }, |
| { |
| "epoch": 170.93023255813952, |
| "grad_norm": 4.102723548733547, |
| "learning_rate": 1.962003412519064e-09, |
| "loss": 0.45189361572265624, |
| "memory(GiB)": 74.97, |
| "step": 1880, |
| "token_acc": 0.8593179414523178, |
| "train_speed(iter/s)": 0.130547 |
| }, |
| { |
| "epoch": 171.37209302325581, |
| "grad_norm": 0.8536208597740141, |
| "learning_rate": 1.8023921246116402e-09, |
| "loss": 0.45585269927978517, |
| "memory(GiB)": 74.97, |
| "step": 1885, |
| "token_acc": 0.8772535999691156, |
| "train_speed(iter/s)": 0.130545 |
| }, |
| { |
| "epoch": 171.8372093023256, |
| "grad_norm": 1.2245160632333336, |
| "learning_rate": 1.6494925127617632e-09, |
| "loss": 0.4523616790771484, |
| "memory(GiB)": 74.97, |
| "step": 1890, |
| "token_acc": 0.853437876960193, |
| "train_speed(iter/s)": 0.130555 |
| }, |
| { |
| "epoch": 172.27906976744185, |
| "grad_norm": 0.9530973263407838, |
| "learning_rate": 1.5033150274548324e-09, |
| "loss": 0.4454800605773926, |
| "memory(GiB)": 74.97, |
| "step": 1895, |
| "token_acc": 0.8595509191527256, |
| "train_speed(iter/s)": 0.130569 |
| }, |
| { |
| "epoch": 172.74418604651163, |
| "grad_norm": 0.7332081781662043, |
| "learning_rate": 1.3638696597277677e-09, |
| "loss": 0.443679666519165, |
| "memory(GiB)": 74.97, |
| "step": 1900, |
| "token_acc": 0.8559887049964703, |
| "train_speed(iter/s)": 0.130587 |
| }, |
| { |
| "epoch": 173.1860465116279, |
| "grad_norm": 0.920253466736325, |
| "learning_rate": 1.231165940486234e-09, |
| "loss": 0.469818115234375, |
| "memory(GiB)": 74.97, |
| "step": 1905, |
| "token_acc": 0.8453635280095352, |
| "train_speed(iter/s)": 0.130597 |
| }, |
| { |
| "epoch": 173.65116279069767, |
| "grad_norm": 0.8835815802653249, |
| "learning_rate": 1.1052129398531506e-09, |
| "loss": 0.44182252883911133, |
| "memory(GiB)": 74.97, |
| "step": 1910, |
| "token_acc": 0.8679900339010742, |
| "train_speed(iter/s)": 0.130609 |
| }, |
| { |
| "epoch": 174.09302325581396, |
| "grad_norm": 1.0067834379405356, |
| "learning_rate": 9.86019266548821e-10, |
| "loss": 0.4615338802337646, |
| "memory(GiB)": 74.97, |
| "step": 1915, |
| "token_acc": 0.8516490943498243, |
| "train_speed(iter/s)": 0.130627 |
| }, |
| { |
| "epoch": 174.5581395348837, |
| "grad_norm": 0.7527783775468317, |
| "learning_rate": 8.735930673024805e-10, |
| "loss": 0.4349226951599121, |
| "memory(GiB)": 74.97, |
| "step": 1920, |
| "token_acc": 0.8792523552149395, |
| "train_speed(iter/s)": 0.130639 |
| }, |
| { |
| "epoch": 175.0, |
| "grad_norm": 1.117914154380228, |
| "learning_rate": 7.679420262954983e-10, |
| "loss": 0.45952515602111815, |
| "memory(GiB)": 74.97, |
| "step": 1925, |
| "token_acc": 0.85297977378299, |
| "train_speed(iter/s)": 0.130658 |
| }, |
| { |
| "epoch": 175.46511627906978, |
| "grad_norm": 0.9275458758815365, |
| "learning_rate": 6.690733646361856e-10, |
| "loss": 0.4419642448425293, |
| "memory(GiB)": 74.97, |
| "step": 1930, |
| "token_acc": 0.8648351648351649, |
| "train_speed(iter/s)": 0.130648 |
| }, |
| { |
| "epoch": 175.93023255813952, |
| "grad_norm": 0.9626017497568045, |
| "learning_rate": 5.769938398662355e-10, |
| "loss": 0.4574889659881592, |
| "memory(GiB)": 74.97, |
| "step": 1935, |
| "token_acc": 0.862218660255126, |
| "train_speed(iter/s)": 0.130652 |
| }, |
| { |
| "epoch": 176.37209302325581, |
| "grad_norm": 0.7475798565408234, |
| "learning_rate": 4.917097454988583e-10, |
| "loss": 0.4532492637634277, |
| "memory(GiB)": 74.97, |
| "step": 1940, |
| "token_acc": 0.8698313950032691, |
| "train_speed(iter/s)": 0.130672 |
| }, |
| { |
| "epoch": 176.8372093023256, |
| "grad_norm": 0.8211466589757828, |
| "learning_rate": 4.132269105886155e-10, |
| "loss": 0.4510762691497803, |
| "memory(GiB)": 74.97, |
| "step": 1945, |
| "token_acc": 0.8704986701068692, |
| "train_speed(iter/s)": 0.130681 |
| }, |
| { |
| "epoch": 177.27906976744185, |
| "grad_norm": 1.7704160910518656, |
| "learning_rate": 3.4155069933301526e-10, |
| "loss": 0.44258646965026854, |
| "memory(GiB)": 74.97, |
| "step": 1950, |
| "token_acc": 0.8673919489954778, |
| "train_speed(iter/s)": 0.130706 |
| }, |
| { |
| "epoch": 177.74418604651163, |
| "grad_norm": 0.7984443068601499, |
| "learning_rate": 2.7668601070588436e-10, |
| "loss": 0.4494297027587891, |
| "memory(GiB)": 74.97, |
| "step": 1955, |
| "token_acc": 0.8809769787056883, |
| "train_speed(iter/s)": 0.130709 |
| }, |
| { |
| "epoch": 178.1860465116279, |
| "grad_norm": 1.034280653667984, |
| "learning_rate": 2.186372781225465e-10, |
| "loss": 0.4531251430511475, |
| "memory(GiB)": 74.97, |
| "step": 1960, |
| "token_acc": 0.8546573936837305, |
| "train_speed(iter/s)": 0.130721 |
| }, |
| { |
| "epoch": 178.65116279069767, |
| "grad_norm": 0.7838331091869942, |
| "learning_rate": 1.6740846913674279e-10, |
| "loss": 0.45207509994506834, |
| "memory(GiB)": 74.97, |
| "step": 1965, |
| "token_acc": 0.8692857883279776, |
| "train_speed(iter/s)": 0.130714 |
| }, |
| { |
| "epoch": 179.09302325581396, |
| "grad_norm": 1.030250317436395, |
| "learning_rate": 1.2300308516952628e-10, |
| "loss": 0.45918664932250974, |
| "memory(GiB)": 74.97, |
| "step": 1970, |
| "token_acc": 0.8727756076388888, |
| "train_speed(iter/s)": 0.130734 |
| }, |
| { |
| "epoch": 179.5581395348837, |
| "grad_norm": 0.9787925643257926, |
| "learning_rate": 8.542416126989804e-11, |
| "loss": 0.4371158599853516, |
| "memory(GiB)": 74.97, |
| "step": 1975, |
| "token_acc": 0.8778103770180585, |
| "train_speed(iter/s)": 0.130716 |
| }, |
| { |
| "epoch": 180.0, |
| "grad_norm": 15.04723370590281, |
| "learning_rate": 5.46742659073951e-11, |
| "loss": 0.4714357852935791, |
| "memory(GiB)": 74.97, |
| "step": 1980, |
| "token_acc": 0.8756799192508271, |
| "train_speed(iter/s)": 0.130752 |
| }, |
| { |
| "epoch": 180.46511627906978, |
| "grad_norm": 0.8235554306239253, |
| "learning_rate": 3.0755500796531e-11, |
| "loss": 0.44407100677490235, |
| "memory(GiB)": 74.97, |
| "step": 1985, |
| "token_acc": 0.8770921605870226, |
| "train_speed(iter/s)": 0.130773 |
| }, |
| { |
| "epoch": 180.93023255813952, |
| "grad_norm": 1.1709340797573902, |
| "learning_rate": 1.3669500753099584e-11, |
| "loss": 0.44757466316223143, |
| "memory(GiB)": 74.97, |
| "step": 1990, |
| "token_acc": 0.8692709656348659, |
| "train_speed(iter/s)": 0.130762 |
| }, |
| { |
| "epoch": 181.37209302325581, |
| "grad_norm": 1.040441435930544, |
| "learning_rate": 3.417433582542095e-12, |
| "loss": 0.4524868011474609, |
| "memory(GiB)": 74.97, |
| "step": 1995, |
| "token_acc": 0.8582090965920931, |
| "train_speed(iter/s)": 0.130794 |
| }, |
| { |
| "epoch": 181.8372093023256, |
| "grad_norm": 2.1432807975254313, |
| "learning_rate": 0.0, |
| "loss": 0.46004161834716795, |
| "memory(GiB)": 74.97, |
| "step": 2000, |
| "token_acc": 0.8837088162521162, |
| "train_speed(iter/s)": 0.130787 |
| }, |
| { |
| "epoch": 181.8372093023256, |
| "eval_loss": 0.6126144528388977, |
| "eval_runtime": 0.6963, |
| "eval_samples_per_second": 18.669, |
| "eval_steps_per_second": 2.872, |
| "eval_token_acc": 0.8433758022799118, |
| "step": 2000 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 2000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 200, |
| "save_steps": 200, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4205321758179328.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|