| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 16503, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0030298439630359036, | |
| "grad_norm": 1.5333168506622314, | |
| "learning_rate": 9.970308428770528e-06, | |
| "loss": 2.3086, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.006059687926071807, | |
| "grad_norm": 1.427223563194275, | |
| "learning_rate": 9.9400109071078e-06, | |
| "loss": 2.222, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.00908953188910771, | |
| "grad_norm": 1.435951590538025, | |
| "learning_rate": 9.909713385445072e-06, | |
| "loss": 2.1361, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.012119375852143614, | |
| "grad_norm": 1.7074558734893799, | |
| "learning_rate": 9.879415863782343e-06, | |
| "loss": 2.0743, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.015149219815179518, | |
| "grad_norm": 1.5592257976531982, | |
| "learning_rate": 9.849118342119616e-06, | |
| "loss": 2.0526, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.01817906377821542, | |
| "grad_norm": 1.5479929447174072, | |
| "learning_rate": 9.818820820456887e-06, | |
| "loss": 2.0152, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.021208907741251327, | |
| "grad_norm": 1.8903971910476685, | |
| "learning_rate": 9.78852329879416e-06, | |
| "loss": 1.9692, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.02423875170428723, | |
| "grad_norm": 1.7212985754013062, | |
| "learning_rate": 9.758225777131432e-06, | |
| "loss": 1.9869, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.027268595667323134, | |
| "grad_norm": 1.5003551244735718, | |
| "learning_rate": 9.727928255468703e-06, | |
| "loss": 1.9526, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.030298439630359036, | |
| "grad_norm": 1.938314437866211, | |
| "learning_rate": 9.697630733805974e-06, | |
| "loss": 1.9265, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.03332828359339494, | |
| "grad_norm": 2.260500431060791, | |
| "learning_rate": 9.667333212143247e-06, | |
| "loss": 1.9519, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.03635812755643084, | |
| "grad_norm": 1.8720899820327759, | |
| "learning_rate": 9.63703569048052e-06, | |
| "loss": 1.9269, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.03938797151946675, | |
| "grad_norm": 1.6958003044128418, | |
| "learning_rate": 9.606738168817791e-06, | |
| "loss": 1.8689, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.042417815482502653, | |
| "grad_norm": 1.8129528760910034, | |
| "learning_rate": 9.576440647155064e-06, | |
| "loss": 1.9042, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.04544765944553855, | |
| "grad_norm": 1.8754149675369263, | |
| "learning_rate": 9.546143125492335e-06, | |
| "loss": 1.8763, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.04847750340857446, | |
| "grad_norm": 1.8043217658996582, | |
| "learning_rate": 9.515845603829608e-06, | |
| "loss": 1.8885, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.05150734737161036, | |
| "grad_norm": 1.7908295392990112, | |
| "learning_rate": 9.48554808216688e-06, | |
| "loss": 1.894, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.05453719133464627, | |
| "grad_norm": 1.8442623615264893, | |
| "learning_rate": 9.455250560504152e-06, | |
| "loss": 1.8439, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.057567035297682166, | |
| "grad_norm": 2.4318511486053467, | |
| "learning_rate": 9.424953038841423e-06, | |
| "loss": 1.8462, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.06059687926071807, | |
| "grad_norm": 1.9251976013183594, | |
| "learning_rate": 9.394655517178695e-06, | |
| "loss": 1.8359, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.06362672322375397, | |
| "grad_norm": 2.037912368774414, | |
| "learning_rate": 9.364357995515968e-06, | |
| "loss": 1.8454, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.06665656718678988, | |
| "grad_norm": 1.7860537767410278, | |
| "learning_rate": 9.33406047385324e-06, | |
| "loss": 1.8139, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.06968641114982578, | |
| "grad_norm": 2.082537889480591, | |
| "learning_rate": 9.303762952190512e-06, | |
| "loss": 1.8248, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.07271625511286169, | |
| "grad_norm": 1.863715648651123, | |
| "learning_rate": 9.273465430527783e-06, | |
| "loss": 1.8146, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.07574609907589759, | |
| "grad_norm": 1.9475610256195068, | |
| "learning_rate": 9.243167908865056e-06, | |
| "loss": 1.8335, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.0787759430389335, | |
| "grad_norm": 1.8209477663040161, | |
| "learning_rate": 9.212870387202327e-06, | |
| "loss": 1.841, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.0818057870019694, | |
| "grad_norm": 1.7817108631134033, | |
| "learning_rate": 9.1825728655396e-06, | |
| "loss": 1.8559, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.08483563096500531, | |
| "grad_norm": 2.1402652263641357, | |
| "learning_rate": 9.152275343876873e-06, | |
| "loss": 1.7597, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.08786547492804121, | |
| "grad_norm": 1.7914282083511353, | |
| "learning_rate": 9.121977822214144e-06, | |
| "loss": 1.7555, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.0908953188910771, | |
| "grad_norm": 1.8525111675262451, | |
| "learning_rate": 9.091680300551415e-06, | |
| "loss": 1.7685, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.09392516285411301, | |
| "grad_norm": 1.7323678731918335, | |
| "learning_rate": 9.061382778888688e-06, | |
| "loss": 1.7729, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.09695500681714891, | |
| "grad_norm": 1.8842493295669556, | |
| "learning_rate": 9.03108525722596e-06, | |
| "loss": 1.8152, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.09998485078018482, | |
| "grad_norm": 2.3138351440429688, | |
| "learning_rate": 9.000787735563232e-06, | |
| "loss": 1.7888, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.10301469474322072, | |
| "grad_norm": 1.5812338590621948, | |
| "learning_rate": 8.970490213900503e-06, | |
| "loss": 1.736, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.10604453870625663, | |
| "grad_norm": 1.7662309408187866, | |
| "learning_rate": 8.940192692237775e-06, | |
| "loss": 1.7532, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.10907438266929254, | |
| "grad_norm": 1.6153583526611328, | |
| "learning_rate": 8.909895170575048e-06, | |
| "loss": 1.7995, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.11210422663232844, | |
| "grad_norm": 1.7088361978530884, | |
| "learning_rate": 8.87959764891232e-06, | |
| "loss": 1.7612, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.11513407059536433, | |
| "grad_norm": 1.7324771881103516, | |
| "learning_rate": 8.849300127249592e-06, | |
| "loss": 1.7803, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.11816391455840024, | |
| "grad_norm": 2.0461599826812744, | |
| "learning_rate": 8.819002605586863e-06, | |
| "loss": 1.7642, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.12119375852143614, | |
| "grad_norm": 1.9488065242767334, | |
| "learning_rate": 8.788705083924136e-06, | |
| "loss": 1.7281, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.12422360248447205, | |
| "grad_norm": 1.6670262813568115, | |
| "learning_rate": 8.758407562261407e-06, | |
| "loss": 1.7446, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.12725344644750794, | |
| "grad_norm": 2.1428778171539307, | |
| "learning_rate": 8.72811004059868e-06, | |
| "loss": 1.7129, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.13028329041054384, | |
| "grad_norm": 1.863022804260254, | |
| "learning_rate": 8.697812518935953e-06, | |
| "loss": 1.7471, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.13331313437357975, | |
| "grad_norm": 1.9406341314315796, | |
| "learning_rate": 8.667514997273224e-06, | |
| "loss": 1.7381, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.13634297833661566, | |
| "grad_norm": 2.044616222381592, | |
| "learning_rate": 8.637217475610495e-06, | |
| "loss": 1.6925, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.13937282229965156, | |
| "grad_norm": 2.1815521717071533, | |
| "learning_rate": 8.606919953947768e-06, | |
| "loss": 1.7528, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.14240266626268747, | |
| "grad_norm": 1.943009853363037, | |
| "learning_rate": 8.57662243228504e-06, | |
| "loss": 1.7448, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.14543251022572337, | |
| "grad_norm": 1.806156873703003, | |
| "learning_rate": 8.546324910622311e-06, | |
| "loss": 1.7086, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.14846235418875928, | |
| "grad_norm": 1.6711620092391968, | |
| "learning_rate": 8.516027388959584e-06, | |
| "loss": 1.7235, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.15149219815179518, | |
| "grad_norm": 1.807391881942749, | |
| "learning_rate": 8.48633581773011e-06, | |
| "loss": 1.7185, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.1545220421148311, | |
| "grad_norm": 1.809409499168396, | |
| "learning_rate": 8.456038296067382e-06, | |
| "loss": 1.7292, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.157551886077867, | |
| "grad_norm": 1.953994870185852, | |
| "learning_rate": 8.425740774404655e-06, | |
| "loss": 1.7225, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.1605817300409029, | |
| "grad_norm": 1.776473045349121, | |
| "learning_rate": 8.395443252741926e-06, | |
| "loss": 1.707, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.1636115740039388, | |
| "grad_norm": 2.0671963691711426, | |
| "learning_rate": 8.365145731079199e-06, | |
| "loss": 1.6829, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.1666414179669747, | |
| "grad_norm": 1.7150015830993652, | |
| "learning_rate": 8.33484820941647e-06, | |
| "loss": 1.6755, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.16967126193001061, | |
| "grad_norm": 1.7115235328674316, | |
| "learning_rate": 8.304550687753743e-06, | |
| "loss": 1.7345, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.17270110589304652, | |
| "grad_norm": 1.6895759105682373, | |
| "learning_rate": 8.274253166091014e-06, | |
| "loss": 1.6911, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.17573094985608242, | |
| "grad_norm": 1.8337711095809937, | |
| "learning_rate": 8.243955644428287e-06, | |
| "loss": 1.717, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.1787607938191183, | |
| "grad_norm": 1.6573454141616821, | |
| "learning_rate": 8.21365812276556e-06, | |
| "loss": 1.6788, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.1817906377821542, | |
| "grad_norm": 1.6749043464660645, | |
| "learning_rate": 8.18336060110283e-06, | |
| "loss": 1.6658, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.1848204817451901, | |
| "grad_norm": 1.8717437982559204, | |
| "learning_rate": 8.153063079440102e-06, | |
| "loss": 1.6504, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.18785032570822602, | |
| "grad_norm": 1.8825408220291138, | |
| "learning_rate": 8.122765557777375e-06, | |
| "loss": 1.689, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.19088016967126192, | |
| "grad_norm": 1.528969645500183, | |
| "learning_rate": 8.092468036114647e-06, | |
| "loss": 1.6896, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.19391001363429783, | |
| "grad_norm": 1.6371997594833374, | |
| "learning_rate": 8.062170514451918e-06, | |
| "loss": 1.6924, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.19693985759733373, | |
| "grad_norm": 1.6460844278335571, | |
| "learning_rate": 8.03187299278919e-06, | |
| "loss": 1.7174, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.19996970156036964, | |
| "grad_norm": 1.8048419952392578, | |
| "learning_rate": 8.001575471126462e-06, | |
| "loss": 1.6752, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.20299954552340554, | |
| "grad_norm": 1.5663115978240967, | |
| "learning_rate": 7.971277949463735e-06, | |
| "loss": 1.67, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.20602938948644145, | |
| "grad_norm": 1.651563048362732, | |
| "learning_rate": 7.940980427801006e-06, | |
| "loss": 1.676, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.20905923344947736, | |
| "grad_norm": 1.7851747274398804, | |
| "learning_rate": 7.910682906138279e-06, | |
| "loss": 1.6498, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.21208907741251326, | |
| "grad_norm": 1.839625358581543, | |
| "learning_rate": 7.88038538447555e-06, | |
| "loss": 1.6772, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.21511892137554917, | |
| "grad_norm": 1.769681453704834, | |
| "learning_rate": 7.850087862812823e-06, | |
| "loss": 1.6827, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.21814876533858507, | |
| "grad_norm": 1.9082216024398804, | |
| "learning_rate": 7.819790341150094e-06, | |
| "loss": 1.6918, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.22117860930162098, | |
| "grad_norm": 1.7255308628082275, | |
| "learning_rate": 7.789492819487367e-06, | |
| "loss": 1.6702, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.22420845326465688, | |
| "grad_norm": 1.5342808961868286, | |
| "learning_rate": 7.75919529782464e-06, | |
| "loss": 1.6514, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.2272382972276928, | |
| "grad_norm": 1.6255989074707031, | |
| "learning_rate": 7.72889777616191e-06, | |
| "loss": 1.6619, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.23026814119072866, | |
| "grad_norm": 1.726608157157898, | |
| "learning_rate": 7.698600254499182e-06, | |
| "loss": 1.6691, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.23329798515376457, | |
| "grad_norm": 1.7505511045455933, | |
| "learning_rate": 7.668302732836454e-06, | |
| "loss": 1.6603, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.23632782911680048, | |
| "grad_norm": 1.7649147510528564, | |
| "learning_rate": 7.638005211173727e-06, | |
| "loss": 1.7008, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.23935767307983638, | |
| "grad_norm": 1.7847344875335693, | |
| "learning_rate": 7.607707689510999e-06, | |
| "loss": 1.6701, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.24238751704287229, | |
| "grad_norm": 1.7049511671066284, | |
| "learning_rate": 7.57741016784827e-06, | |
| "loss": 1.6446, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.2454173610059082, | |
| "grad_norm": 1.5229992866516113, | |
| "learning_rate": 7.547112646185543e-06, | |
| "loss": 1.6482, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.2484472049689441, | |
| "grad_norm": 1.7003657817840576, | |
| "learning_rate": 7.516815124522815e-06, | |
| "loss": 1.6154, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.25147704893198003, | |
| "grad_norm": 2.140820026397705, | |
| "learning_rate": 7.486517602860086e-06, | |
| "loss": 1.6522, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.2545068928950159, | |
| "grad_norm": 1.580733060836792, | |
| "learning_rate": 7.456220081197358e-06, | |
| "loss": 1.6252, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.2575367368580518, | |
| "grad_norm": 1.912180781364441, | |
| "learning_rate": 7.425922559534631e-06, | |
| "loss": 1.6581, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.2605665808210877, | |
| "grad_norm": 1.6341553926467896, | |
| "learning_rate": 7.395625037871903e-06, | |
| "loss": 1.6569, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.2635964247841236, | |
| "grad_norm": 1.6689584255218506, | |
| "learning_rate": 7.365327516209174e-06, | |
| "loss": 1.67, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.2666262687471595, | |
| "grad_norm": 1.5784380435943604, | |
| "learning_rate": 7.335029994546447e-06, | |
| "loss": 1.6469, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.2696561127101954, | |
| "grad_norm": 1.8624850511550903, | |
| "learning_rate": 7.3047324728837186e-06, | |
| "loss": 1.6654, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.2726859566732313, | |
| "grad_norm": 1.8794349431991577, | |
| "learning_rate": 7.274434951220991e-06, | |
| "loss": 1.6662, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.2757158006362672, | |
| "grad_norm": 1.8854588270187378, | |
| "learning_rate": 7.2441374295582624e-06, | |
| "loss": 1.6837, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.2787456445993031, | |
| "grad_norm": 2.1526780128479004, | |
| "learning_rate": 7.213839907895534e-06, | |
| "loss": 1.6632, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.281775488562339, | |
| "grad_norm": 1.9971776008605957, | |
| "learning_rate": 7.183542386232807e-06, | |
| "loss": 1.6099, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.28480533252537493, | |
| "grad_norm": 1.598711609840393, | |
| "learning_rate": 7.153244864570079e-06, | |
| "loss": 1.6607, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.28783517648841084, | |
| "grad_norm": 1.4923897981643677, | |
| "learning_rate": 7.12294734290735e-06, | |
| "loss": 1.6116, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.29086502045144674, | |
| "grad_norm": 1.8050360679626465, | |
| "learning_rate": 7.092649821244623e-06, | |
| "loss": 1.6655, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.29389486441448265, | |
| "grad_norm": 1.7371289730072021, | |
| "learning_rate": 7.062352299581895e-06, | |
| "loss": 1.6276, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.29692470837751855, | |
| "grad_norm": 1.7267305850982666, | |
| "learning_rate": 7.032054777919167e-06, | |
| "loss": 1.6399, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.29995455234055446, | |
| "grad_norm": 1.5485690832138062, | |
| "learning_rate": 7.001757256256439e-06, | |
| "loss": 1.6188, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.30298439630359036, | |
| "grad_norm": 1.78826904296875, | |
| "learning_rate": 6.971459734593711e-06, | |
| "loss": 1.6426, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.30601424026662627, | |
| "grad_norm": 1.8727006912231445, | |
| "learning_rate": 6.941768163364238e-06, | |
| "loss": 1.6329, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.3090440842296622, | |
| "grad_norm": 1.8767826557159424, | |
| "learning_rate": 6.91147064170151e-06, | |
| "loss": 1.6724, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.3120739281926981, | |
| "grad_norm": 1.4456415176391602, | |
| "learning_rate": 6.881173120038781e-06, | |
| "loss": 1.6698, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.315103772155734, | |
| "grad_norm": 1.652795433998108, | |
| "learning_rate": 6.8508755983760535e-06, | |
| "loss": 1.6425, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.3181336161187699, | |
| "grad_norm": 1.609587550163269, | |
| "learning_rate": 6.8205780767133255e-06, | |
| "loss": 1.5815, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.3211634600818058, | |
| "grad_norm": 1.6337134838104248, | |
| "learning_rate": 6.790280555050598e-06, | |
| "loss": 1.6195, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.3241933040448417, | |
| "grad_norm": 1.3837682008743286, | |
| "learning_rate": 6.759983033387869e-06, | |
| "loss": 1.6397, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.3272231480078776, | |
| "grad_norm": 1.5774863958358765, | |
| "learning_rate": 6.729685511725141e-06, | |
| "loss": 1.6391, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.3302529919709135, | |
| "grad_norm": 1.6407530307769775, | |
| "learning_rate": 6.699387990062414e-06, | |
| "loss": 1.6216, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.3332828359339494, | |
| "grad_norm": 1.7346445322036743, | |
| "learning_rate": 6.669090468399686e-06, | |
| "loss": 1.593, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.3363126798969853, | |
| "grad_norm": 1.907456636428833, | |
| "learning_rate": 6.638792946736957e-06, | |
| "loss": 1.5871, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.33934252386002123, | |
| "grad_norm": 1.8702911138534546, | |
| "learning_rate": 6.60849542507423e-06, | |
| "loss": 1.6464, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.34237236782305713, | |
| "grad_norm": 1.750834345817566, | |
| "learning_rate": 6.578197903411502e-06, | |
| "loss": 1.5719, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.34540221178609304, | |
| "grad_norm": 1.632150650024414, | |
| "learning_rate": 6.547900381748773e-06, | |
| "loss": 1.6209, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.34843205574912894, | |
| "grad_norm": 1.6225661039352417, | |
| "learning_rate": 6.517602860086045e-06, | |
| "loss": 1.6307, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.35146189971216485, | |
| "grad_norm": 1.6222798824310303, | |
| "learning_rate": 6.487305338423318e-06, | |
| "loss": 1.6103, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.35449174367520075, | |
| "grad_norm": 1.9228448867797852, | |
| "learning_rate": 6.45700781676059e-06, | |
| "loss": 1.6603, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.3575215876382366, | |
| "grad_norm": 1.9268038272857666, | |
| "learning_rate": 6.426710295097861e-06, | |
| "loss": 1.6057, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.3605514316012725, | |
| "grad_norm": 1.4592260122299194, | |
| "learning_rate": 6.3964127734351335e-06, | |
| "loss": 1.639, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.3635812755643084, | |
| "grad_norm": 1.8319112062454224, | |
| "learning_rate": 6.366115251772405e-06, | |
| "loss": 1.6403, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.3666111195273443, | |
| "grad_norm": 1.6500194072723389, | |
| "learning_rate": 6.335817730109678e-06, | |
| "loss": 1.6216, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 0.3696409634903802, | |
| "grad_norm": 1.6576268672943115, | |
| "learning_rate": 6.305520208446949e-06, | |
| "loss": 1.6326, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.37267080745341613, | |
| "grad_norm": 2.2077317237854004, | |
| "learning_rate": 6.275222686784221e-06, | |
| "loss": 1.609, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.37570065141645204, | |
| "grad_norm": 1.4579232931137085, | |
| "learning_rate": 6.244925165121494e-06, | |
| "loss": 1.5816, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.37873049537948794, | |
| "grad_norm": 1.6647648811340332, | |
| "learning_rate": 6.214627643458766e-06, | |
| "loss": 1.6208, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.38176033934252385, | |
| "grad_norm": 1.413583755493164, | |
| "learning_rate": 6.184330121796037e-06, | |
| "loss": 1.6206, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.38479018330555975, | |
| "grad_norm": 1.743116021156311, | |
| "learning_rate": 6.15403260013331e-06, | |
| "loss": 1.5973, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 0.38782002726859566, | |
| "grad_norm": 1.8276968002319336, | |
| "learning_rate": 6.123735078470582e-06, | |
| "loss": 1.6384, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.39084987123163156, | |
| "grad_norm": 1.3672616481781006, | |
| "learning_rate": 6.093437556807854e-06, | |
| "loss": 1.5883, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 0.39387971519466747, | |
| "grad_norm": 1.6729201078414917, | |
| "learning_rate": 6.063140035145126e-06, | |
| "loss": 1.6074, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.3969095591577034, | |
| "grad_norm": 1.433160662651062, | |
| "learning_rate": 6.0328425134823976e-06, | |
| "loss": 1.6359, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 0.3999394031207393, | |
| "grad_norm": 1.6422028541564941, | |
| "learning_rate": 6.0025449918196695e-06, | |
| "loss": 1.6121, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.4029692470837752, | |
| "grad_norm": 1.6269747018814087, | |
| "learning_rate": 5.972247470156942e-06, | |
| "loss": 1.6143, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 0.4059990910468111, | |
| "grad_norm": 1.5140916109085083, | |
| "learning_rate": 5.941949948494213e-06, | |
| "loss": 1.5386, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.409028935009847, | |
| "grad_norm": 1.6418771743774414, | |
| "learning_rate": 5.911652426831485e-06, | |
| "loss": 1.5685, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.4120587789728829, | |
| "grad_norm": 2.016139268875122, | |
| "learning_rate": 5.881354905168758e-06, | |
| "loss": 1.6092, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.4150886229359188, | |
| "grad_norm": 1.8047841787338257, | |
| "learning_rate": 5.851057383506029e-06, | |
| "loss": 1.6405, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 0.4181184668989547, | |
| "grad_norm": 1.426552653312683, | |
| "learning_rate": 5.820759861843301e-06, | |
| "loss": 1.5648, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.4211483108619906, | |
| "grad_norm": 1.8354159593582153, | |
| "learning_rate": 5.790462340180574e-06, | |
| "loss": 1.6048, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 0.4241781548250265, | |
| "grad_norm": 1.475692868232727, | |
| "learning_rate": 5.760164818517846e-06, | |
| "loss": 1.5869, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.4272079987880624, | |
| "grad_norm": 1.6546450853347778, | |
| "learning_rate": 5.729867296855117e-06, | |
| "loss": 1.5866, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 0.43023784275109833, | |
| "grad_norm": 1.5041762590408325, | |
| "learning_rate": 5.69956977519239e-06, | |
| "loss": 1.6071, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.43326768671413424, | |
| "grad_norm": 1.5460017919540405, | |
| "learning_rate": 5.669272253529662e-06, | |
| "loss": 1.5823, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 0.43629753067717014, | |
| "grad_norm": 1.6852233409881592, | |
| "learning_rate": 5.6389747318669345e-06, | |
| "loss": 1.6109, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.43932737464020605, | |
| "grad_norm": 1.5145368576049805, | |
| "learning_rate": 5.6086772102042056e-06, | |
| "loss": 1.5887, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 0.44235721860324195, | |
| "grad_norm": 1.6494066715240479, | |
| "learning_rate": 5.5783796885414775e-06, | |
| "loss": 1.5881, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.44538706256627786, | |
| "grad_norm": 1.7732048034667969, | |
| "learning_rate": 5.54808216687875e-06, | |
| "loss": 1.5923, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 0.44841690652931376, | |
| "grad_norm": 2.078328847885132, | |
| "learning_rate": 5.517784645216022e-06, | |
| "loss": 1.6117, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.45144675049234967, | |
| "grad_norm": 1.5237518548965454, | |
| "learning_rate": 5.487487123553293e-06, | |
| "loss": 1.6206, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 0.4544765944553856, | |
| "grad_norm": 1.5857949256896973, | |
| "learning_rate": 5.457189601890566e-06, | |
| "loss": 1.5938, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.4575064384184214, | |
| "grad_norm": 1.8254096508026123, | |
| "learning_rate": 5.426892080227838e-06, | |
| "loss": 1.6217, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 0.46053628238145733, | |
| "grad_norm": 1.7777276039123535, | |
| "learning_rate": 5.39659455856511e-06, | |
| "loss": 1.5695, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.46356612634449323, | |
| "grad_norm": 1.6662938594818115, | |
| "learning_rate": 5.366297036902381e-06, | |
| "loss": 1.5875, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 0.46659597030752914, | |
| "grad_norm": 1.6636159420013428, | |
| "learning_rate": 5.335999515239654e-06, | |
| "loss": 1.5969, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.46962581427056505, | |
| "grad_norm": 1.4926279783248901, | |
| "learning_rate": 5.305701993576926e-06, | |
| "loss": 1.5905, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 0.47265565823360095, | |
| "grad_norm": 1.566446304321289, | |
| "learning_rate": 5.275404471914199e-06, | |
| "loss": 1.6126, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.47568550219663686, | |
| "grad_norm": 1.8684853315353394, | |
| "learning_rate": 5.24510695025147e-06, | |
| "loss": 1.5737, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 0.47871534615967276, | |
| "grad_norm": 1.8654356002807617, | |
| "learning_rate": 5.214809428588742e-06, | |
| "loss": 1.5964, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.48174519012270867, | |
| "grad_norm": 1.3818305730819702, | |
| "learning_rate": 5.184511906926014e-06, | |
| "loss": 1.5757, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 0.48477503408574457, | |
| "grad_norm": 1.6214017868041992, | |
| "learning_rate": 5.154214385263286e-06, | |
| "loss": 1.559, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.4878048780487805, | |
| "grad_norm": 1.5026991367340088, | |
| "learning_rate": 5.1239168636005574e-06, | |
| "loss": 1.6033, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 0.4908347220118164, | |
| "grad_norm": 1.4133880138397217, | |
| "learning_rate": 5.09361934193783e-06, | |
| "loss": 1.5947, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.4938645659748523, | |
| "grad_norm": 1.5185195207595825, | |
| "learning_rate": 5.063321820275102e-06, | |
| "loss": 1.5858, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 0.4968944099378882, | |
| "grad_norm": 1.5284918546676636, | |
| "learning_rate": 5.033024298612373e-06, | |
| "loss": 1.5998, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.4999242539009241, | |
| "grad_norm": 1.5977320671081543, | |
| "learning_rate": 5.002726776949646e-06, | |
| "loss": 1.5609, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 0.5029540978639601, | |
| "grad_norm": 1.6577131748199463, | |
| "learning_rate": 4.972429255286918e-06, | |
| "loss": 1.5684, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.5059839418269959, | |
| "grad_norm": 1.5592504739761353, | |
| "learning_rate": 4.94213173362419e-06, | |
| "loss": 1.597, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 0.5090137857900318, | |
| "grad_norm": 1.6733429431915283, | |
| "learning_rate": 4.911834211961462e-06, | |
| "loss": 1.5803, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.5120436297530677, | |
| "grad_norm": 1.390070915222168, | |
| "learning_rate": 4.881536690298734e-06, | |
| "loss": 1.5868, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 0.5150734737161036, | |
| "grad_norm": 1.6423391103744507, | |
| "learning_rate": 4.851239168636006e-06, | |
| "loss": 1.6197, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.5181033176791395, | |
| "grad_norm": 1.5211403369903564, | |
| "learning_rate": 4.820941646973278e-06, | |
| "loss": 1.5985, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 0.5211331616421754, | |
| "grad_norm": 1.3367249965667725, | |
| "learning_rate": 4.7906441253105504e-06, | |
| "loss": 1.563, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.5241630056052113, | |
| "grad_norm": 1.4820023775100708, | |
| "learning_rate": 4.7603466036478215e-06, | |
| "loss": 1.5519, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 0.5271928495682472, | |
| "grad_norm": 1.5887796878814697, | |
| "learning_rate": 4.7306550324183485e-06, | |
| "loss": 1.5901, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.5302226935312832, | |
| "grad_norm": 2.0754947662353516, | |
| "learning_rate": 4.7003575107556205e-06, | |
| "loss": 1.58, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 0.533252537494319, | |
| "grad_norm": 1.545721173286438, | |
| "learning_rate": 4.670059989092892e-06, | |
| "loss": 1.5588, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.536282381457355, | |
| "grad_norm": 1.69772469997406, | |
| "learning_rate": 4.639762467430164e-06, | |
| "loss": 1.5847, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 0.5393122254203908, | |
| "grad_norm": 1.5981988906860352, | |
| "learning_rate": 4.609464945767437e-06, | |
| "loss": 1.5508, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.5423420693834268, | |
| "grad_norm": 1.884867787361145, | |
| "learning_rate": 4.579167424104708e-06, | |
| "loss": 1.5583, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 0.5453719133464626, | |
| "grad_norm": 1.8997677564620972, | |
| "learning_rate": 4.548869902441981e-06, | |
| "loss": 1.5681, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.5484017573094986, | |
| "grad_norm": 1.6099635362625122, | |
| "learning_rate": 4.518572380779253e-06, | |
| "loss": 1.5838, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 0.5514316012725344, | |
| "grad_norm": 1.659430742263794, | |
| "learning_rate": 4.488274859116525e-06, | |
| "loss": 1.5876, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.5544614452355704, | |
| "grad_norm": 1.72199547290802, | |
| "learning_rate": 4.457977337453797e-06, | |
| "loss": 1.5776, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 0.5574912891986062, | |
| "grad_norm": 1.5597950220108032, | |
| "learning_rate": 4.427679815791069e-06, | |
| "loss": 1.5383, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.5605211331616422, | |
| "grad_norm": 1.8483946323394775, | |
| "learning_rate": 4.397382294128341e-06, | |
| "loss": 1.6229, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 0.563550977124678, | |
| "grad_norm": 1.459078311920166, | |
| "learning_rate": 4.367084772465613e-06, | |
| "loss": 1.5796, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.566580821087714, | |
| "grad_norm": 1.5624710321426392, | |
| "learning_rate": 4.336787250802885e-06, | |
| "loss": 1.5648, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 0.5696106650507499, | |
| "grad_norm": 1.5045188665390015, | |
| "learning_rate": 4.306489729140157e-06, | |
| "loss": 1.6078, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.5726405090137858, | |
| "grad_norm": 1.5284470319747925, | |
| "learning_rate": 4.2761922074774285e-06, | |
| "loss": 1.5581, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 0.5756703529768217, | |
| "grad_norm": 1.7991974353790283, | |
| "learning_rate": 4.2458946858147e-06, | |
| "loss": 1.5629, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.5787001969398576, | |
| "grad_norm": 1.5898367166519165, | |
| "learning_rate": 4.215597164151972e-06, | |
| "loss": 1.6102, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 0.5817300409028935, | |
| "grad_norm": 1.5055047273635864, | |
| "learning_rate": 4.185299642489244e-06, | |
| "loss": 1.5602, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.5847598848659294, | |
| "grad_norm": 1.4318591356277466, | |
| "learning_rate": 4.155002120826517e-06, | |
| "loss": 1.5558, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 0.5877897288289653, | |
| "grad_norm": 1.6692588329315186, | |
| "learning_rate": 4.124704599163788e-06, | |
| "loss": 1.573, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.5908195727920013, | |
| "grad_norm": 1.4996757507324219, | |
| "learning_rate": 4.094407077501061e-06, | |
| "loss": 1.5641, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 0.5938494167550371, | |
| "grad_norm": 1.491071343421936, | |
| "learning_rate": 4.064109555838333e-06, | |
| "loss": 1.5546, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.5968792607180731, | |
| "grad_norm": 1.5985503196716309, | |
| "learning_rate": 4.033812034175605e-06, | |
| "loss": 1.5443, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 0.5999091046811089, | |
| "grad_norm": 1.6403000354766846, | |
| "learning_rate": 4.003514512512877e-06, | |
| "loss": 1.591, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.6029389486441449, | |
| "grad_norm": 1.3770664930343628, | |
| "learning_rate": 3.973216990850149e-06, | |
| "loss": 1.5142, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 0.6059687926071807, | |
| "grad_norm": 1.6218574047088623, | |
| "learning_rate": 3.942919469187421e-06, | |
| "loss": 1.5854, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.6089986365702167, | |
| "grad_norm": 1.2990355491638184, | |
| "learning_rate": 3.9126219475246926e-06, | |
| "loss": 1.5595, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 0.6120284805332525, | |
| "grad_norm": 1.4130761623382568, | |
| "learning_rate": 3.8823244258619645e-06, | |
| "loss": 1.5471, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.6150583244962884, | |
| "grad_norm": 1.4140211343765259, | |
| "learning_rate": 3.852026904199237e-06, | |
| "loss": 1.5071, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 0.6180881684593243, | |
| "grad_norm": 1.4824833869934082, | |
| "learning_rate": 3.821729382536508e-06, | |
| "loss": 1.5711, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.6211180124223602, | |
| "grad_norm": 1.6242071390151978, | |
| "learning_rate": 3.791431860873781e-06, | |
| "loss": 1.5181, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 0.6241478563853962, | |
| "grad_norm": 1.4817885160446167, | |
| "learning_rate": 3.7611343392110527e-06, | |
| "loss": 1.5468, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.627177700348432, | |
| "grad_norm": 1.3601789474487305, | |
| "learning_rate": 3.730836817548325e-06, | |
| "loss": 1.5761, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 0.630207544311468, | |
| "grad_norm": 1.3270106315612793, | |
| "learning_rate": 3.700539295885597e-06, | |
| "loss": 1.5336, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.6332373882745038, | |
| "grad_norm": 1.4662185907363892, | |
| "learning_rate": 3.670241774222869e-06, | |
| "loss": 1.5958, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 0.6362672322375398, | |
| "grad_norm": 1.480850100517273, | |
| "learning_rate": 3.639944252560141e-06, | |
| "loss": 1.565, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.6392970762005756, | |
| "grad_norm": 1.4275442361831665, | |
| "learning_rate": 3.6096467308974132e-06, | |
| "loss": 1.5316, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 0.6423269201636116, | |
| "grad_norm": 1.709631323814392, | |
| "learning_rate": 3.5793492092346847e-06, | |
| "loss": 1.5568, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.6453567641266474, | |
| "grad_norm": 1.4881377220153809, | |
| "learning_rate": 3.549051687571957e-06, | |
| "loss": 1.5452, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 0.6483866080896834, | |
| "grad_norm": 1.3687924146652222, | |
| "learning_rate": 3.518754165909229e-06, | |
| "loss": 1.567, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.6514164520527193, | |
| "grad_norm": 1.2314640283584595, | |
| "learning_rate": 3.4884566442465006e-06, | |
| "loss": 1.5678, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 0.6544462960157552, | |
| "grad_norm": 1.5389395952224731, | |
| "learning_rate": 3.458159122583773e-06, | |
| "loss": 1.5624, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.6574761399787911, | |
| "grad_norm": 1.401017189025879, | |
| "learning_rate": 3.427861600921045e-06, | |
| "loss": 1.5287, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 0.660505983941827, | |
| "grad_norm": 1.9174494743347168, | |
| "learning_rate": 3.397564079258317e-06, | |
| "loss": 1.547, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.6635358279048629, | |
| "grad_norm": 1.5186442136764526, | |
| "learning_rate": 3.3672665575955887e-06, | |
| "loss": 1.5489, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 0.6665656718678988, | |
| "grad_norm": 1.3896212577819824, | |
| "learning_rate": 3.336969035932861e-06, | |
| "loss": 1.5368, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.6695955158309347, | |
| "grad_norm": 1.7711433172225952, | |
| "learning_rate": 3.3072774647033872e-06, | |
| "loss": 1.5129, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 0.6726253597939706, | |
| "grad_norm": 1.5082119703292847, | |
| "learning_rate": 3.2769799430406596e-06, | |
| "loss": 1.5491, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.6756552037570065, | |
| "grad_norm": 1.3804906606674194, | |
| "learning_rate": 3.246682421377931e-06, | |
| "loss": 1.5687, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 0.6786850477200425, | |
| "grad_norm": 1.6013615131378174, | |
| "learning_rate": 3.2163848997152035e-06, | |
| "loss": 1.5891, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.6817148916830783, | |
| "grad_norm": 1.6114518642425537, | |
| "learning_rate": 3.1860873780524754e-06, | |
| "loss": 1.5891, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 0.6847447356461143, | |
| "grad_norm": 1.5464235544204712, | |
| "learning_rate": 3.1557898563897478e-06, | |
| "loss": 1.5576, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.6877745796091501, | |
| "grad_norm": 1.3998115062713623, | |
| "learning_rate": 3.1254923347270193e-06, | |
| "loss": 1.543, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 0.6908044235721861, | |
| "grad_norm": 1.5496994256973267, | |
| "learning_rate": 3.0951948130642917e-06, | |
| "loss": 1.5751, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.6938342675352219, | |
| "grad_norm": 1.4767708778381348, | |
| "learning_rate": 3.0648972914015636e-06, | |
| "loss": 1.5892, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 0.6968641114982579, | |
| "grad_norm": 1.5664912462234497, | |
| "learning_rate": 3.0345997697388355e-06, | |
| "loss": 1.5674, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.6998939554612937, | |
| "grad_norm": 1.5581713914871216, | |
| "learning_rate": 3.0043022480761075e-06, | |
| "loss": 1.5157, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 0.7029237994243297, | |
| "grad_norm": 1.4770848751068115, | |
| "learning_rate": 2.97400472641338e-06, | |
| "loss": 1.5241, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.7059536433873655, | |
| "grad_norm": 1.5183898210525513, | |
| "learning_rate": 2.9437072047506514e-06, | |
| "loss": 1.5752, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 0.7089834873504015, | |
| "grad_norm": 1.3331156969070435, | |
| "learning_rate": 2.9134096830879237e-06, | |
| "loss": 1.5524, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.7120133313134374, | |
| "grad_norm": 1.3551660776138306, | |
| "learning_rate": 2.8831121614251957e-06, | |
| "loss": 1.5653, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 0.7150431752764732, | |
| "grad_norm": 1.4175201654434204, | |
| "learning_rate": 2.852814639762468e-06, | |
| "loss": 1.5538, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.7180730192395092, | |
| "grad_norm": 1.482858657836914, | |
| "learning_rate": 2.8225171180997395e-06, | |
| "loss": 1.5411, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 0.721102863202545, | |
| "grad_norm": 1.3337117433547974, | |
| "learning_rate": 2.792219596437012e-06, | |
| "loss": 1.5402, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.724132707165581, | |
| "grad_norm": 1.529326319694519, | |
| "learning_rate": 2.761922074774284e-06, | |
| "loss": 1.5357, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 0.7271625511286168, | |
| "grad_norm": 1.40908944606781, | |
| "learning_rate": 2.7316245531115558e-06, | |
| "loss": 1.521, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.7301923950916528, | |
| "grad_norm": 1.434410572052002, | |
| "learning_rate": 2.7013270314488277e-06, | |
| "loss": 1.5723, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 0.7332222390546886, | |
| "grad_norm": 1.5077331066131592, | |
| "learning_rate": 2.6710295097861e-06, | |
| "loss": 1.5979, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.7362520830177246, | |
| "grad_norm": 1.4637471437454224, | |
| "learning_rate": 2.6407319881233716e-06, | |
| "loss": 1.5465, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 0.7392819269807605, | |
| "grad_norm": 1.7819764614105225, | |
| "learning_rate": 2.6104344664606435e-06, | |
| "loss": 1.5425, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.7423117709437964, | |
| "grad_norm": 1.3142591714859009, | |
| "learning_rate": 2.580136944797916e-06, | |
| "loss": 1.5659, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 0.7453416149068323, | |
| "grad_norm": 1.4326512813568115, | |
| "learning_rate": 2.5498394231351874e-06, | |
| "loss": 1.5506, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.7483714588698682, | |
| "grad_norm": 1.3926914930343628, | |
| "learning_rate": 2.5195419014724598e-06, | |
| "loss": 1.519, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 0.7514013028329041, | |
| "grad_norm": 1.5886470079421997, | |
| "learning_rate": 2.4892443798097317e-06, | |
| "loss": 1.5323, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.75443114679594, | |
| "grad_norm": 1.6596341133117676, | |
| "learning_rate": 2.4589468581470036e-06, | |
| "loss": 1.5572, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 0.7574609907589759, | |
| "grad_norm": 1.5624167919158936, | |
| "learning_rate": 2.428649336484276e-06, | |
| "loss": 1.517, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.7604908347220118, | |
| "grad_norm": 1.5052602291107178, | |
| "learning_rate": 2.398351814821548e-06, | |
| "loss": 1.5634, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 0.7635206786850477, | |
| "grad_norm": 1.4568336009979248, | |
| "learning_rate": 2.36805429315882e-06, | |
| "loss": 1.5581, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.7665505226480837, | |
| "grad_norm": 1.4686074256896973, | |
| "learning_rate": 2.337756771496092e-06, | |
| "loss": 1.5383, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 0.7695803666111195, | |
| "grad_norm": 1.53707754611969, | |
| "learning_rate": 2.3074592498333638e-06, | |
| "loss": 1.5654, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.7726102105741555, | |
| "grad_norm": 1.5452001094818115, | |
| "learning_rate": 2.277161728170636e-06, | |
| "loss": 1.5772, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 0.7756400545371913, | |
| "grad_norm": 1.553062915802002, | |
| "learning_rate": 2.2468642065079076e-06, | |
| "loss": 1.5178, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.7786698985002273, | |
| "grad_norm": 1.328934669494629, | |
| "learning_rate": 2.2165666848451796e-06, | |
| "loss": 1.5718, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 0.7816997424632631, | |
| "grad_norm": 1.7127735614776611, | |
| "learning_rate": 2.186269163182452e-06, | |
| "loss": 1.5341, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.7847295864262991, | |
| "grad_norm": 1.728936791419983, | |
| "learning_rate": 2.155971641519724e-06, | |
| "loss": 1.5546, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 0.7877594303893349, | |
| "grad_norm": 1.6442391872406006, | |
| "learning_rate": 2.125674119856996e-06, | |
| "loss": 1.56, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.7907892743523709, | |
| "grad_norm": 1.4701004028320312, | |
| "learning_rate": 2.0953765981942678e-06, | |
| "loss": 1.5182, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 0.7938191183154067, | |
| "grad_norm": 1.2920253276824951, | |
| "learning_rate": 2.0650790765315397e-06, | |
| "loss": 1.5291, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.7968489622784427, | |
| "grad_norm": 1.3979644775390625, | |
| "learning_rate": 2.034781554868812e-06, | |
| "loss": 1.5246, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 0.7998788062414786, | |
| "grad_norm": 1.7545385360717773, | |
| "learning_rate": 2.004484033206084e-06, | |
| "loss": 1.5752, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.8029086502045145, | |
| "grad_norm": 1.347290277481079, | |
| "learning_rate": 1.974186511543356e-06, | |
| "loss": 1.5264, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 0.8059384941675504, | |
| "grad_norm": 1.4548012018203735, | |
| "learning_rate": 1.943888989880628e-06, | |
| "loss": 1.5995, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.8089683381305863, | |
| "grad_norm": 1.6475197076797485, | |
| "learning_rate": 1.9135914682179e-06, | |
| "loss": 1.5506, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 0.8119981820936222, | |
| "grad_norm": 1.497528314590454, | |
| "learning_rate": 1.883293946555172e-06, | |
| "loss": 1.5702, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.815028026056658, | |
| "grad_norm": 1.791481852531433, | |
| "learning_rate": 1.852996424892444e-06, | |
| "loss": 1.5474, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 0.818057870019694, | |
| "grad_norm": 1.5969312191009521, | |
| "learning_rate": 1.822698903229716e-06, | |
| "loss": 1.5416, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.8210877139827298, | |
| "grad_norm": 1.4239813089370728, | |
| "learning_rate": 1.792401381566988e-06, | |
| "loss": 1.5252, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 0.8241175579457658, | |
| "grad_norm": 1.579437017440796, | |
| "learning_rate": 1.76210385990426e-06, | |
| "loss": 1.5764, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.8271474019088016, | |
| "grad_norm": 1.6514719724655151, | |
| "learning_rate": 1.731806338241532e-06, | |
| "loss": 1.545, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 0.8301772458718376, | |
| "grad_norm": 1.5468313694000244, | |
| "learning_rate": 1.701508816578804e-06, | |
| "loss": 1.5377, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.8332070898348735, | |
| "grad_norm": 1.8032771348953247, | |
| "learning_rate": 1.6712112949160762e-06, | |
| "loss": 1.5183, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 0.8362369337979094, | |
| "grad_norm": 2.1406657695770264, | |
| "learning_rate": 1.6409137732533481e-06, | |
| "loss": 1.5268, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.8392667777609453, | |
| "grad_norm": 1.7589893341064453, | |
| "learning_rate": 1.61061625159062e-06, | |
| "loss": 1.5054, | |
| "step": 13850 | |
| }, | |
| { | |
| "epoch": 0.8422966217239812, | |
| "grad_norm": 1.619626522064209, | |
| "learning_rate": 1.5803187299278922e-06, | |
| "loss": 1.5747, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.8453264656870171, | |
| "grad_norm": 1.3544018268585205, | |
| "learning_rate": 1.5500212082651641e-06, | |
| "loss": 1.5264, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 0.848356309650053, | |
| "grad_norm": 1.3174818754196167, | |
| "learning_rate": 1.5197236866024359e-06, | |
| "loss": 1.4991, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.8513861536130889, | |
| "grad_norm": 1.3408424854278564, | |
| "learning_rate": 1.489426164939708e-06, | |
| "loss": 1.5574, | |
| "step": 14050 | |
| }, | |
| { | |
| "epoch": 0.8544159975761249, | |
| "grad_norm": 1.5097547769546509, | |
| "learning_rate": 1.45912864327698e-06, | |
| "loss": 1.5423, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.8574458415391607, | |
| "grad_norm": 1.5766481161117554, | |
| "learning_rate": 1.4288311216142519e-06, | |
| "loss": 1.5253, | |
| "step": 14150 | |
| }, | |
| { | |
| "epoch": 0.8604756855021967, | |
| "grad_norm": 1.2671705484390259, | |
| "learning_rate": 1.398533599951524e-06, | |
| "loss": 1.5421, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.8635055294652325, | |
| "grad_norm": 1.622070074081421, | |
| "learning_rate": 1.368236078288796e-06, | |
| "loss": 1.5283, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 0.8665353734282685, | |
| "grad_norm": 1.5320727825164795, | |
| "learning_rate": 1.3379385566260681e-06, | |
| "loss": 1.5386, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.8695652173913043, | |
| "grad_norm": 1.5714218616485596, | |
| "learning_rate": 1.30764103496334e-06, | |
| "loss": 1.558, | |
| "step": 14350 | |
| }, | |
| { | |
| "epoch": 0.8725950613543403, | |
| "grad_norm": 1.5528987646102905, | |
| "learning_rate": 1.277343513300612e-06, | |
| "loss": 1.5058, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.8756249053173761, | |
| "grad_norm": 1.4519600868225098, | |
| "learning_rate": 1.2470459916378842e-06, | |
| "loss": 1.535, | |
| "step": 14450 | |
| }, | |
| { | |
| "epoch": 0.8786547492804121, | |
| "grad_norm": 1.9346317052841187, | |
| "learning_rate": 1.2173544204084107e-06, | |
| "loss": 1.5321, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.881684593243448, | |
| "grad_norm": 1.4452751874923706, | |
| "learning_rate": 1.1870568987456827e-06, | |
| "loss": 1.5147, | |
| "step": 14550 | |
| }, | |
| { | |
| "epoch": 0.8847144372064839, | |
| "grad_norm": 1.4155831336975098, | |
| "learning_rate": 1.1567593770829546e-06, | |
| "loss": 1.5439, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.8877442811695198, | |
| "grad_norm": 1.3461506366729736, | |
| "learning_rate": 1.1264618554202268e-06, | |
| "loss": 1.517, | |
| "step": 14650 | |
| }, | |
| { | |
| "epoch": 0.8907741251325557, | |
| "grad_norm": 1.4958879947662354, | |
| "learning_rate": 1.0961643337574987e-06, | |
| "loss": 1.5288, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.8938039690955916, | |
| "grad_norm": 1.610373616218567, | |
| "learning_rate": 1.0658668120947706e-06, | |
| "loss": 1.5272, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 0.8968338130586275, | |
| "grad_norm": 1.5633084774017334, | |
| "learning_rate": 1.0355692904320428e-06, | |
| "loss": 1.524, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.8998636570216634, | |
| "grad_norm": 1.5080559253692627, | |
| "learning_rate": 1.0052717687693147e-06, | |
| "loss": 1.5302, | |
| "step": 14850 | |
| }, | |
| { | |
| "epoch": 0.9028935009846993, | |
| "grad_norm": 1.5691548585891724, | |
| "learning_rate": 9.749742471065869e-07, | |
| "loss": 1.5573, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.9059233449477352, | |
| "grad_norm": 1.44107985496521, | |
| "learning_rate": 9.446767254438588e-07, | |
| "loss": 1.5266, | |
| "step": 14950 | |
| }, | |
| { | |
| "epoch": 0.9089531889107711, | |
| "grad_norm": 1.5320124626159668, | |
| "learning_rate": 9.143792037811309e-07, | |
| "loss": 1.5842, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.911983032873807, | |
| "grad_norm": 1.8644306659698486, | |
| "learning_rate": 8.840816821184028e-07, | |
| "loss": 1.5547, | |
| "step": 15050 | |
| }, | |
| { | |
| "epoch": 0.9150128768368428, | |
| "grad_norm": 1.5941940546035767, | |
| "learning_rate": 8.537841604556748e-07, | |
| "loss": 1.5481, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.9180427207998788, | |
| "grad_norm": 1.3055516481399536, | |
| "learning_rate": 8.234866387929468e-07, | |
| "loss": 1.5227, | |
| "step": 15150 | |
| }, | |
| { | |
| "epoch": 0.9210725647629147, | |
| "grad_norm": 1.5031893253326416, | |
| "learning_rate": 7.931891171302187e-07, | |
| "loss": 1.5489, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.9241024087259506, | |
| "grad_norm": 1.575303554534912, | |
| "learning_rate": 7.628915954674908e-07, | |
| "loss": 1.5486, | |
| "step": 15250 | |
| }, | |
| { | |
| "epoch": 0.9271322526889865, | |
| "grad_norm": 1.5559568405151367, | |
| "learning_rate": 7.325940738047628e-07, | |
| "loss": 1.4914, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.9301620966520224, | |
| "grad_norm": 1.3393162488937378, | |
| "learning_rate": 7.022965521420348e-07, | |
| "loss": 1.5474, | |
| "step": 15350 | |
| }, | |
| { | |
| "epoch": 0.9331919406150583, | |
| "grad_norm": 1.404003381729126, | |
| "learning_rate": 6.719990304793069e-07, | |
| "loss": 1.5088, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.9362217845780942, | |
| "grad_norm": 1.5367882251739502, | |
| "learning_rate": 6.417015088165788e-07, | |
| "loss": 1.5545, | |
| "step": 15450 | |
| }, | |
| { | |
| "epoch": 0.9392516285411301, | |
| "grad_norm": 1.4406241178512573, | |
| "learning_rate": 6.114039871538509e-07, | |
| "loss": 1.527, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.942281472504166, | |
| "grad_norm": 1.3530008792877197, | |
| "learning_rate": 5.811064654911229e-07, | |
| "loss": 1.5504, | |
| "step": 15550 | |
| }, | |
| { | |
| "epoch": 0.9453113164672019, | |
| "grad_norm": 1.4533566236495972, | |
| "learning_rate": 5.508089438283949e-07, | |
| "loss": 1.518, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.9483411604302379, | |
| "grad_norm": 1.5848952531814575, | |
| "learning_rate": 5.205114221656669e-07, | |
| "loss": 1.524, | |
| "step": 15650 | |
| }, | |
| { | |
| "epoch": 0.9513710043932737, | |
| "grad_norm": 1.5302923917770386, | |
| "learning_rate": 4.902139005029388e-07, | |
| "loss": 1.5194, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.9544008483563097, | |
| "grad_norm": 1.381064772605896, | |
| "learning_rate": 4.599163788402109e-07, | |
| "loss": 1.5345, | |
| "step": 15750 | |
| }, | |
| { | |
| "epoch": 0.9574306923193455, | |
| "grad_norm": 1.5697840452194214, | |
| "learning_rate": 4.2961885717748293e-07, | |
| "loss": 1.5332, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.9604605362823815, | |
| "grad_norm": 1.5636427402496338, | |
| "learning_rate": 3.99321335514755e-07, | |
| "loss": 1.5104, | |
| "step": 15850 | |
| }, | |
| { | |
| "epoch": 0.9634903802454173, | |
| "grad_norm": 1.45536208152771, | |
| "learning_rate": 3.690238138520269e-07, | |
| "loss": 1.5505, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.9665202242084533, | |
| "grad_norm": 1.49254310131073, | |
| "learning_rate": 3.387262921892989e-07, | |
| "loss": 1.512, | |
| "step": 15950 | |
| }, | |
| { | |
| "epoch": 0.9695500681714891, | |
| "grad_norm": 1.603808045387268, | |
| "learning_rate": 3.0842877052657095e-07, | |
| "loss": 1.5309, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.9725799121345251, | |
| "grad_norm": 1.5627918243408203, | |
| "learning_rate": 2.7813124886384294e-07, | |
| "loss": 1.5498, | |
| "step": 16050 | |
| }, | |
| { | |
| "epoch": 0.975609756097561, | |
| "grad_norm": 1.4155369997024536, | |
| "learning_rate": 2.47833727201115e-07, | |
| "loss": 1.567, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.9786396000605969, | |
| "grad_norm": 1.3787842988967896, | |
| "learning_rate": 2.1753620553838698e-07, | |
| "loss": 1.5511, | |
| "step": 16150 | |
| }, | |
| { | |
| "epoch": 0.9816694440236328, | |
| "grad_norm": 1.3937129974365234, | |
| "learning_rate": 1.87238683875659e-07, | |
| "loss": 1.4971, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.9846992879866687, | |
| "grad_norm": 1.588771104812622, | |
| "learning_rate": 1.5694116221293101e-07, | |
| "loss": 1.5384, | |
| "step": 16250 | |
| }, | |
| { | |
| "epoch": 0.9877291319497046, | |
| "grad_norm": 1.402185320854187, | |
| "learning_rate": 1.26643640550203e-07, | |
| "loss": 1.517, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.9907589759127405, | |
| "grad_norm": 1.557084321975708, | |
| "learning_rate": 9.634611888747501e-08, | |
| "loss": 1.5527, | |
| "step": 16350 | |
| }, | |
| { | |
| "epoch": 0.9937888198757764, | |
| "grad_norm": 1.4594271183013916, | |
| "learning_rate": 6.604859722474703e-08, | |
| "loss": 1.5216, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.9968186638388123, | |
| "grad_norm": 1.467354655265808, | |
| "learning_rate": 3.5751075562019034e-08, | |
| "loss": 1.5684, | |
| "step": 16450 | |
| }, | |
| { | |
| "epoch": 0.9998485078018482, | |
| "grad_norm": 1.326578140258789, | |
| "learning_rate": 5.453553899291039e-09, | |
| "loss": 1.5009, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 1.4647282361984253, | |
| "eval_runtime": 301.5458, | |
| "eval_samples_per_second": 48.646, | |
| "eval_steps_per_second": 6.082, | |
| "step": 16503 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 16503, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.4495512772608e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |